-
Notifications
You must be signed in to change notification settings - Fork 229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gguf: better type usage #655
Merged
Merged
Changes from 5 commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
e061017
refactor typing + usage
ngxson 9a9e771
correct optional fields
ngxson 6d704bc
add non-strict & strict typing mode
ngxson 8767726
lint & format
ngxson c2afbdc
cast between strict & nonStrict
ngxson 5f547dd
style nits
ngxson 31bac8b
refactor options
ngxson 74e8cfd
Merge branch 'main' into xsn/gguf_type_usage
ngxson 8c1bce0
format
ngxson 2e62e41
fix CI
ngxson a2250d3
fix CI (2)
ngxson File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import { describe, it } from "vitest"; | ||
import type { gguf } from "./gguf"; | ||
import type { GGUFMetadata, GGUFParseOutput, GGUFType } from "./types"; | ||
|
||
describe("gguf-types", () => { | ||
it("gguf() type can be casted (at compile time)", async () => { | ||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
const result: Awaited<ReturnType<typeof gguf>> = null as any; | ||
const strictType = result as GGUFParseOutput<GGUFType.strict>; | ||
// @ts-expect-error because the key "abc" does not exist | ||
strictType.metadata.abc = 123; | ||
const nonStrictType = result as GGUFParseOutput<GGUFType.nonStrict>; | ||
nonStrictType.metadata.abc = 123; // PASS, because it can be anything | ||
// @ts-expect-error because ArrayBuffer is not a MetadataValue | ||
nonStrictType.metadata.fff = ArrayBuffer; | ||
}); | ||
|
||
it("GGUFType.nonStrict should be correct (at compile time)", async () => { | ||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
const model: GGUFMetadata<GGUFType.nonStrict> = null as any; | ||
model.kv_count = 123n; | ||
model.abc = 456; // PASS, because it can be anything | ||
}); | ||
|
||
it("GGUFType.strict should be correct (at compile time)", async () => { | ||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
const model: GGUFMetadata<GGUFType.strict> = null as any; | ||
|
||
if (model["general.architecture"] === "whisper") { | ||
model["encoder.whisper.block_count"] = 0; | ||
// @ts-expect-error because it must be a number | ||
model["encoder.whisper.block_count"] = "abc"; | ||
} | ||
|
||
if (model["tokenizer.ggml.model"] === undefined) { | ||
// @ts-expect-error because it's undefined | ||
model["tokenizer.ggml.eos_token_id"] = 1; | ||
} | ||
if (model["tokenizer.ggml.model"] === "gpt2") { | ||
// @ts-expect-error because it must be a number | ||
model["tokenizer.ggml.eos_token_id"] = undefined; | ||
model["tokenizer.ggml.eos_token_id"] = 1; | ||
} | ||
|
||
if (model["general.architecture"] === "mamba") { | ||
model["mamba.ssm.conv_kernel"] = 0; | ||
// @ts-expect-error because it must be a number | ||
model["mamba.ssm.conv_kernel"] = "abc"; | ||
} | ||
if (model["general.architecture"] === "llama") { | ||
// @ts-expect-error llama does not have ssm.* keys | ||
model["mamba.ssm.conv_kernel"] = 0; | ||
} | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,21 +50,32 @@ export enum GGUFValueType { | |
const ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"] as const; | ||
export type Architecture = (typeof ARCHITECTURES)[number]; | ||
|
||
interface General { | ||
"general.architecture": Architecture; | ||
"general.name": string; | ||
"general.file_type": number; | ||
"general.quantization_version": number; | ||
export interface GGUFGeneralInfo<TArchitecture extends Architecture> { | ||
"general.architecture": TArchitecture; | ||
"general.name"?: string; | ||
"general.file_type"?: number; | ||
"general.quantization_version"?: number; | ||
} | ||
|
||
type ModelMetadata = Whisper | RWKV | TransformerLLM; | ||
interface NoModelMetadata { | ||
"general.architecture"?: undefined; | ||
} | ||
|
||
export type ModelBase< | ||
TArchitecture extends | ||
| Architecture | ||
| `encoder.${Extract<Architecture, "whisper">}` | ||
| `decoder.${Extract<Architecture, "whisper">}`, | ||
> = { [K in `${TArchitecture}.layer_count`]: number } & { [K in `${TArchitecture}.feed_forward_length`]: number } & { | ||
[K in `${TArchitecture}.context_length`]: number; | ||
} & { [K in `${TArchitecture}.embedding_length`]: number } & { [K in `${TArchitecture}.block_count`]: number }; | ||
> = Record< | ||
| `${TArchitecture}.context_length` | ||
| `${TArchitecture}.block_count` | ||
| `${TArchitecture}.embedding_length` | ||
| `${TArchitecture}.feed_forward_length`, | ||
number | ||
>; | ||
|
||
/// Tokenizer | ||
|
||
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert"; | ||
interface Tokenizer { | ||
|
@@ -75,21 +86,43 @@ interface Tokenizer { | |
"tokenizer.ggml.bos_token_id": number; | ||
"tokenizer.ggml.eos_token_id": number; | ||
"tokenizer.ggml.add_bos_token": boolean; | ||
"tokenizer.chat_template": string; | ||
"tokenizer.chat_template"?: string; | ||
} | ||
interface NoTokenizer { | ||
"tokenizer.ggml.model"?: undefined; | ||
} | ||
|
||
/// Models outside of llama.cpp: "rwkv" and "whisper" | ||
|
||
export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number }; | ||
export type LLM = TransformerLLM | RWKV; | ||
export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">; | ||
export type Model = (LLM | Whisper) & Partial<Tokenizer>; | ||
export type RWKV = GGUFGeneralInfo<"rwkv"> & | ||
ModelBase<"rwkv"> & { | ||
"rwkv.architecture_version": number; | ||
}; | ||
|
||
export type GGUFMetadata = { | ||
// TODO: whisper.cpp doesn't yet support gguf. This maybe changed in the future. | ||
export type Whisper = GGUFGeneralInfo<"whisper"> & | ||
ModelBase<"encoder.whisper"> & | ||
ModelBase<"decoder.whisper"> & { | ||
"whisper.encoder.mels_count": number; | ||
"whisper.encoder.attention.head_count": number; | ||
"whisper.decoder.attention.head_count": number; | ||
}; | ||
|
||
/// Types for parse output | ||
|
||
export enum GGUFType { | ||
strict, | ||
nonStrict, | ||
mishig25 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
export type GGUFMetadata<TGGUFType extends GGUFType = GGUFType.strict> = { | ||
version: Version; | ||
tensor_count: bigint; | ||
kv_count: bigint; | ||
} & Partial<General> & | ||
Partial<Model> & | ||
Record<string, MetadataValue>; | ||
} & GGUFModelKV & | ||
(TGGUFType extends GGUFType.strict ? unknown : Record<string, MetadataValue>); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI, the |
||
|
||
export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer); | ||
|
||
export interface GGUFTensorInfo { | ||
name: string; | ||
|
@@ -99,7 +132,7 @@ export interface GGUFTensorInfo { | |
offset: bigint; | ||
} | ||
|
||
export interface GGUFParseOutput { | ||
metadata: GGUFMetadata; | ||
export interface GGUFParseOutput<TGGUFType extends GGUFType = GGUFType.strict> { | ||
metadata: GGUFMetadata<TGGUFType>; | ||
tensorInfos: GGUFTensorInfo[]; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's use
// @ts-expect-error
instead of// @ts-ignore
in general(no need for eslint-disable this way)
Here I think you can change
const metadata: GGUFMetadata
toconst metadata: GGUFMetadata<GGUFType.NON_STRICT>
to remove the error (not sure if it's the best fix)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in 31bac8b