fix(ggml/llmb): use IndexMap for GGUF

rustformers · Oct 23, 2023 · e4db5b9 · e4db5b9
1 parent 43ebc3d
commit e4db5b9
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 26 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -6,7 +6,7 @@ members = [
     "crates/llm",
     "crates/llm-base",
     "crates/models/*",
-    "binaries/*"
+    "binaries/*",
 ]
 resolver = "2"
 default-members = ["binaries/llm-cli", "crates/llm"]
@@ -33,6 +33,7 @@ memmap2 = "0.5.10"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 tracing = { version = "0.1", features = ["log"] }
 llm-samplers = "=0.0.6"
+indexmap = "2.0.2"
 
 # Config for 'cargo dist'
 [workspace.metadata.dist]
@@ -45,7 +46,12 @@ ci = ["github"]
 # The installers to generate for each app
 installers = ["shell", "powershell"]
 # Target platforms to build apps for (Rust target-triple syntax)
-targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows-msvc", "aarch64-apple-darwin"]
+targets = [
+    "x86_64-unknown-linux-gnu",
+    "x86_64-apple-darwin",
+    "x86_64-pc-windows-msvc",
+    "aarch64-apple-darwin",
+]
 
 # The profile that 'cargo dist' will build with
 [profile.dist]

diff --git a/crates/ggml/Cargo.toml b/crates/ggml/Cargo.toml
@@ -11,6 +11,7 @@ ggml-sys = { path = "sys", version = "0.2.0-dev" }
 
 thiserror = { workspace = true }
 memmap2 = { workspace = true }
+indexmap = { workspace = true }
 
 [dev-dependencies]
 rand = { workspace = true }

diff --git a/crates/ggml/src/format/gguf/metadata.rs b/crates/ggml/src/format/gguf/metadata.rs
@@ -1,13 +1,14 @@
-use std::{collections::HashMap, io::BufRead};
+use std::io::BufRead;
 
+use indexmap::IndexMap;
 use thiserror::Error;
 
 use crate::util;
 
 use super::{GgufContext, GgufLoadError};
 
 #[derive(Debug, Clone, PartialEq)]
-pub struct Metadata(pub HashMap<String, MetadataValue>);
+pub struct Metadata(pub IndexMap<String, MetadataValue>);
 impl Metadata {
     pub fn iter(&self) -> impl Iterator<Item = (&String, &MetadataValue)> {
         self.0.iter()

diff --git a/crates/ggml/src/format/gguf/mod.rs b/crates/ggml/src/format/gguf/mod.rs
@@ -1,13 +1,11 @@
 #![allow(missing_docs)]
 
-use std::{
-    collections::HashMap,
-    io::{BufRead, Seek},
-};
+use std::io::{BufRead, Seek};
 
 use super::{data_size, header_size, ContainerType, ContainerTypeReadError};
 use crate::{util, ElementType};
 
+use indexmap::IndexMap;
 use thiserror::Error;
 
 mod metadata;
@@ -49,7 +47,7 @@ pub enum GgufSaveError {
     // TODO!
 }
 
-pub type TensorInfos = HashMap<String, TensorInfo>;
+pub type TensorInfos = IndexMap<String, TensorInfo>;
 
 #[derive(Debug, Clone, PartialEq)]
 pub struct Gguf {
@@ -74,7 +72,7 @@ impl Gguf {
         let tensor_count = util::read_length(reader, ctx.use_64_bit_length)?;
         let metadata_kv_count = util::read_length(reader, ctx.use_64_bit_length)?;
 
-        let mut metadata = HashMap::with_capacity(metadata_kv_count);
+        let mut metadata = IndexMap::with_capacity(metadata_kv_count);
         for _ in 0..metadata_kv_count {
             let (key, value) = MetadataValue::read_key_value(&ctx, reader)?;
             metadata.insert(key, value);
@@ -86,7 +84,7 @@ impl Gguf {
             .and_then(|v| v.as_uint32())
             .unwrap_or(DEFAULT_ALIGNMENT) as u64;
 
-        let mut tensor_infos = HashMap::with_capacity(tensor_count);
+        let mut tensor_infos = IndexMap::with_capacity(tensor_count);
         for _ in 0..tensor_count {
             let (key, value) = TensorInfo::read_name_value(&ctx, reader)?;
             tensor_infos.insert(key, value);

diff --git a/crates/llm-base/Cargo.toml b/crates/llm-base/Cargo.toml
@@ -17,16 +17,19 @@ bytemuck = { workspace = true }
 rand = { workspace = true }
 serde = { workspace = true }
 thiserror = { workspace = true }
+indexmap = { workspace = true }
+memmap2 = { workspace = true }
+tracing = { workspace = true }
+llm-samplers = { workspace = true }
 
 partial_sort = "0.2.0"
 serde_bytes = "0.11"
-memmap2 = { workspace = true }
 half = "2"
-tokenizers = {version="0.13.4", default-features=false, features=["onig"]}
+tokenizers = { version = "0.13.4", default-features = false, features = [
+    "onig",
+] }
 regex = "1.8"
-tracing = { workspace = true }
 
-llm-samplers = { workspace = true }
 
 [features]
 tokenizers-remote = ["tokenizers/http"]

diff --git a/crates/llm-base/src/lora.rs b/crates/llm-base/src/lora.rs
@@ -8,10 +8,8 @@ use ggml::{
     format::gguf::{Gguf, Metadata, TensorInfo},
     GraphExecutionPlan,
 };
-use std::{
-    collections::{HashMap, HashSet},
-    path::PathBuf,
-};
+use indexmap::IndexMap;
+use std::{collections::HashSet, path::PathBuf};
 
 #[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
 /// Parameters for a [LoRA](https://arxiv.org/abs/2106.09685) adapter.
@@ -50,7 +48,7 @@ pub struct LoraAdapter {
     /// Scaling to apply to the LoRA weights.
     pub scaling: f32,
     /// The tensors of the LoRA.
-    pub tensors: HashMap<String, TensorInfo>,
+    pub tensors: IndexMap<String, TensorInfo>,
     /// Names of the tensors that should be patched.
     pub tensors_to_patch: HashSet<String>,
     /// Source containing the LoRA weights.