diff --git a/native/tiktoken/Cargo.lock b/native/tiktoken/Cargo.lock index 701bd7b..9e935ce 100644 --- a/native/tiktoken/Cargo.lock +++ b/native/tiktoken/Cargo.lock @@ -274,9 +274,9 @@ dependencies = [ [[package]] name = "tiktoken-rs" -version = "0.2.2" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bdbfb7cc1905920c7b0487c47c3ea858a979b85ed41b74dfe411ca6e3f6fcdf" +checksum = "a1e520ded49607c6b80a4ab517f564c05e2b34f2c549dbd7b6a528caa2009dda" dependencies = [ "anyhow", "base64", diff --git a/native/tiktoken/Cargo.toml b/native/tiktoken/Cargo.toml index 37446c4..5154920 100644 --- a/native/tiktoken/Cargo.toml +++ b/native/tiktoken/Cargo.toml @@ -11,4 +11,4 @@ crate-type = ["cdylib"] [dependencies] rustler = "0.30.0" -tiktoken-rs = "0.2.2" +tiktoken-rs = "0.5.6" diff --git a/native/tiktoken/src/lib.rs b/native/tiktoken/src/lib.rs index 5980e46..103eb63 100644 --- a/native/tiktoken/src/lib.rs +++ b/native/tiktoken/src/lib.rs @@ -3,7 +3,13 @@ use std::vec::Vec; #[rustler::nif] fn encoding_for_model(model: &str) -> Option<&str> { - tiktoken_rs::encoding_for_model(model) + match tiktoken_rs::tokenizer::get_tokenizer(model) { + Some(tiktoken_rs::tokenizer::Tokenizer::Cl100kBase) => Some("cl100k_base"), + Some(tiktoken_rs::tokenizer::Tokenizer::P50kBase) => Some("p50k_base"), + Some(tiktoken_rs::tokenizer::Tokenizer::R50kBase) => Some("r50k_base"), + Some(tiktoken_rs::tokenizer::Tokenizer::P50kEdit) => Some("p50k_edit"), + _ => None, + } } // p50k