Skip to content

Commit

Permalink
Update image handling and add image processing options (#706)
Browse files Browse the repository at this point in the history
* Update image handling for buffers and blobs and add jimp dependency for image processing

* Add jimp dependency and mark it as external in compile scripts

* Refactor image options and clean up imports for image processing functions

* Refactor image processing and resolution functions for clarity and efficiency

* Add detail, scaling, and auto-cropping options to images documentation
  • Loading branch information
pelikhan authored Sep 11, 2024
1 parent d2df04c commit f452c3f
Show file tree
Hide file tree
Showing 23 changed files with 723 additions and 35 deletions.
12 changes: 12 additions & 0 deletions docs/genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 27 additions & 1 deletion docs/src/content/docs/reference/scripts/images.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,37 @@ Local files are loaded and encoded as a data uri.
The `defImages` function also supports [Buffer](https://nodejs.org/api/buffer.html)
and [Blob](https://developer.mozilla.org/en-US/docs/Web/API/Blob).


This example takes a screenshot of bing.com and adds it to the images.

```js
const page = await host.browse("https://bing.com")
const screenshot = await page.screenshot() // returns a node.js Buffer
defImages(screenshot)
```

## Detail

OpenAI supports a "low" / "high" field.

```js 'detail: "low"'
defImages(img, { detail: "low" })
```

## Scaling

You can specify a maximum width, maximum height. GenAIScript will resize
the image to fit into the constraints.

```js "maxWidth: 800" "maxHeight: 800"
defImages(img, { maxWidth: 800 })
// and / or
defImages(img, { maxHeight: 800 })
```

## Auto cropping

You can automatically remove uniform color on the edges of the image.

```js "autoCrop"
defImages(img, { autoCrop: true })
```
12 changes: 12 additions & 0 deletions genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"@lvce-editor/ripgrep": "^1.2.0",
"dockerode": "^4.0.2",
"gpt-tokenizer": "^2.2.1",
"jimp": "^1.6.0",
"mammoth": "^1.8.0",
"mathjs": "^13.1.1",
"pdfjs-dist": "4.6.82",
Expand Down Expand Up @@ -77,8 +78,8 @@
"zx": "^8.1.5"
},
"scripts": {
"compile": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts && node ../../scripts/patch-cli.mjs",
"compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts",
"compile": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp && node ../../scripts/patch-cli.mjs",
"compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp",
"postcompile": "node built/genaiscript.cjs info help > ../../docs/src/content/docs/reference/cli/commands.md",
"vis:treemap": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.treemap.html",
"vis:network": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.network.html --template network",
Expand Down
1 change: 1 addition & 0 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"ignore": "^5.3.2",
"inflection": "^3.0.0",
"ini": "^5.0.0",
"jimp": "^1.6.0",
"json5": "^2.2.3",
"jsonrepair": "^3.8.0",
"magic-string": "^0.30.11",
Expand Down
8 changes: 4 additions & 4 deletions packages/core/src/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,17 +110,17 @@ ${CSVToMarkdown(tidyData(rows, options))}
}

export async function resolveFileDataUri(
file: WorkspaceFile,
filename: string,
options?: TraceOptions
) {
let bytes: Uint8Array
if (/^https?:\/\//i.test(file.filename)) {
if (/^https?:\/\//i.test(filename)) {
const fetch = await createFetch(options)
const resp = await fetch(file.filename)
const resp = await fetch(filename)
const buffer = await resp.arrayBuffer()
bytes = new Uint8Array(buffer)
} else {
bytes = new Uint8Array(await host.readFile(file.filename))
bytes = new Uint8Array(await host.readFile(filename))
}
const mime = (await fileTypeFromBuffer(bytes))?.mime
if (!mime) return undefined
Expand Down
12 changes: 12 additions & 0 deletions packages/core/src/genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions packages/core/src/image.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { resolveFileDataUri } from "./file"
import { TraceOptions } from "./trace"

export async function imageEncodeForLLM(
url: string | Buffer | Blob,
options: DefImagesOptions & TraceOptions
) {
const { Jimp, HorizontalAlign, VerticalAlign } = await import("jimp")
const { autoCrop, maxHeight, maxWidth } = options
// If the image is already a string and we don't need to do any processing, return it
if (
typeof url === "string" &&
!autoCrop &&
maxHeight === undefined &&
maxWidth === undefined
)
return url

if (typeof url === "string") url = await resolveFileDataUri(url)

if (url instanceof Blob) url = Buffer.from(await url.arrayBuffer())
const img = await Jimp.read(url)
if (autoCrop) await img.autocrop()
if (options.maxWidth ?? options.maxHeight) {
await img.contain({
w: img.width > maxWidth ? maxWidth : img.width,
h: img.height > maxHeight ? maxHeight : img.height,
align: HorizontalAlign.CENTER | VerticalAlign.MIDDLE,
})
}
const outputMime = img.mime ?? ("image/jpeg" as any)
const buf = await img.getBuffer(outputMime)
const b64 = await buf.toString("base64")
const imageDataUri = `data:${outputMime};base64,${b64}`
return imageDataUri
}
41 changes: 15 additions & 26 deletions packages/core/src/runpromptcontext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ import { resolveFileDataUri } from "./file"
import { isGlobMatch } from "./glob"
import { logVerbose } from "./util"
import { renderShellOutput } from "./chatrender"
import { fileTypeFromBuffer } from "file-type"
import { jinjaRender } from "./jinja"
import { mustacheRender } from "./mustache"
import { imageEncodeForLLM } from "./image"

export function createChatTurnGenerationContext(
options: GenerationOptions,
Expand Down Expand Up @@ -245,34 +245,20 @@ export function createChatGenerationContext(
const { detail } = defOptions || {}
if (Array.isArray(files))
files.forEach((file) => defImages(file, defOptions))
else if (typeof files === "string")
appendChild(node, createImageNode({ url: files, detail }))
else if (files instanceof Buffer) {
const buffer: Buffer = files
else if (
typeof files === "string" ||
files instanceof Blob ||
files instanceof Buffer
) {
const img = files
appendChild(
node,
createImageNode(
(async () => {
const mime = await fileTypeFromBuffer(buffer)
const b64 = await buffer.toString("base64")
const url = `data:${mime.mime};base64,${b64}`
return {
url,
detail,
}
})()
)
)
} else if (files instanceof Blob) {
const blob: Blob = files
appendChild(
node,
createImageNode(
(async () => {
const buffer = Buffer.from(await blob.arrayBuffer())
const mime = await fileTypeFromBuffer(buffer)
const b64 = await buffer.toString("base64")
const url = `data:${mime.mime};base64,${b64}`
const url = await imageEncodeForLLM(img, {
...defOptions,
trace,
})
return {
url,
detail,
Expand All @@ -286,7 +272,10 @@ export function createChatGenerationContext(
node,
createImageNode(
(async () => {
const url = await resolveFileDataUri(file, { trace })
const url = await imageEncodeForLLM(file.filename, {
...defOptions,
trace,
})
return {
url,
filename: file.filename,
Expand Down
12 changes: 12 additions & 0 deletions packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,18 @@ interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {

interface DefImagesOptions {
detail?: "high" | "low"
/**
* Maximum width of the image
*/
maxWidth?: number
/**
* Maximum height of the image
*/
maxHeight?: number
/**
* Auto cropping same color on the edges of the image
*/
autoCrop?: boolean
}

interface ChatTaskOptions {
Expand Down
2 changes: 1 addition & 1 deletion packages/sample/genaisrc/describe-image.genai.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ script({
})

$`Return the list of objects in the images.`
defImages(env.files, { detail: "low" })
defImages(env.files, { detail: "low", maxWidth: 400, autoCrop: true })
12 changes: 12 additions & 0 deletions packages/sample/genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions packages/sample/genaisrc/node/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions packages/sample/genaisrc/python/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions packages/sample/genaisrc/style/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions packages/sample/src/aici/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions packages/sample/src/errors/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f452c3f

Please sign in to comment.