From 0bf0193096aec10276fc111c5e248521ce61b563 Mon Sep 17 00:00:00 2001 From: yoziru Date: Sun, 28 Jul 2024 13:15:54 +0200 Subject: [PATCH] use llama3-tokenizer-js --- package.json | 2 +- src/components/chat/chat-bottombar.tsx | 4 ++-- src/lib/mistral-tokenizer-js.d.ts | 3 --- src/lib/token-counter.ts | 6 +++--- yarn.lock | 10 +++++----- 5 files changed, 11 insertions(+), 14 deletions(-) delete mode 100644 src/lib/mistral-tokenizer-js.d.ts diff --git a/package.json b/package.json index 992fa4d..4119f14 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,7 @@ "ai": "^3.2.0", "class-variance-authority": "^0.7.0", "clsx": "^2.1.0", - "mistral-tokenizer-js": "^1.0.0", + "llama3-tokenizer-js": "^1.1.3", "next": "14.1.4", "next-themes": "^0.3.0", "react": "^18", diff --git a/src/components/chat/chat-bottombar.tsx b/src/components/chat/chat-bottombar.tsx index dc843e8..33bc1a9 100644 --- a/src/components/chat/chat-bottombar.tsx +++ b/src/components/chat/chat-bottombar.tsx @@ -4,7 +4,7 @@ import React from "react"; import { PaperPlaneIcon, StopIcon } from "@radix-ui/react-icons"; import { ChatRequestOptions } from "ai"; -import mistralTokenizer from "mistral-tokenizer-js"; +import llama3Tokenizer from "llama3-tokenizer-js"; import TextareaAutosize from "react-textarea-autosize"; import { basePath, useHasMounted } from "@/lib/utils"; @@ -41,7 +41,7 @@ export default function ChatBottombar({ handleSubmit(e as unknown as React.FormEvent); } }; - const tokenCount = input ? mistralTokenizer.encode(input).length - 1 : 0; + const tokenCount = input ? llama3Tokenizer.encode(input).length - 1 : 0; const [tokenLimit, setTokenLimit] = React.useState(4096); React.useEffect(() => { diff --git a/src/lib/mistral-tokenizer-js.d.ts b/src/lib/mistral-tokenizer-js.d.ts deleted file mode 100644 index 239ba1a..0000000 --- a/src/lib/mistral-tokenizer-js.d.ts +++ /dev/null @@ -1,3 +0,0 @@ -declare module "mistral-tokenizer-js" { - export function encode(input: string): string[]; -} diff --git a/src/lib/token-counter.ts b/src/lib/token-counter.ts index 596ce79..5e17fd8 100644 --- a/src/lib/token-counter.ts +++ b/src/lib/token-counter.ts @@ -1,5 +1,5 @@ import { CoreMessage, Message } from "ai"; -import mistralTokenizer from "mistral-tokenizer-js"; +import llama3Tokenizer from "llama3-tokenizer-js"; export const getTokenLimit = async (basePath: string) => { const res = await fetch(basePath + "/api/settings"); @@ -18,9 +18,9 @@ export const encodeChat = (messages: Message[] | CoreMessage[]): number => { let numTokens = 0; for (const message of messages) { numTokens += tokensPerMessage; - numTokens += mistralTokenizer.encode(message.role).length; + numTokens += llama3Tokenizer.encode(message.role).length; if (typeof message.content === "string") { - numTokens += mistralTokenizer.encode(message.content).length; + numTokens += llama3Tokenizer.encode(message.content).length; } } numTokens += 3; diff --git a/yarn.lock b/yarn.lock index 7039104..6aab0ef 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2603,6 +2603,11 @@ lines-and-columns@^1.1.6: resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg== +llama3-tokenizer-js@^1.1.3: + version "1.1.3" + resolved "https://registry.yarnpkg.com/llama3-tokenizer-js/-/llama3-tokenizer-js-1.1.3.tgz#db5d905879180c847917c50e7386e75a0b2530c7" + integrity sha512-ST7tpVSPw4oO/ibZxwPlNyEOpsr4StZwJQh1zIVvmRmtbeGVeOssSEcNhd9Ul61sZEkx1zQ4iSBo9rDUX0gWlA== + locate-path@^6.0.0: version "6.0.0" resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-6.0.0.tgz#55321eb309febbc59c4801d931a72452a681d286" @@ -3149,11 +3154,6 @@ minimist@^1.2.0, minimist@^1.2.6: resolved "https://registry.yarnpkg.com/minipass/-/minipass-7.1.2.tgz#93a9626ce5e5e66bd4db86849e7515e92340a707" integrity sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw== -mistral-tokenizer-js@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/mistral-tokenizer-js/-/mistral-tokenizer-js-1.0.0.tgz#d501a3ecbe5ab4ea3cd2061e303510b864052e32" - integrity sha512-9+M/2Ul5M5oRFwX+QmwUIxpi7iptLgkIs87f3DEwVqmt/hQ7j4RGkawOBjs3LsmJzEbpSQcv8GGOMG1lwoAKyw== - ms@2.1.2: version "2.1.2" resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"