fix: use block_in_place in llama tokenizer (#235)

This commit changes the `LLamaTokenizer` to use `tokio::task::block_in_place` in an attempt avoid the following error: ```console $ env RUST_BACKTRACE=1 \ LLM_CHAIN_MODEL=models/llama-2-7b-chat.ggmlv3.q4_0.bin \ cargo r --example few_shot thread 'main' panicked at 'Cannot block the current thread from within a runtime. This happens because a function attempted to block the current thread while the thread is being used to drive asynchronous tasks.', crates/llm-chain-llama/src/executor.rs:290:36 stack backtrace: 0: rust_begin_unwind at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs:593:5 1: core::panicking::panic_fmt at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/panicking.rs:67:14 2: core::panicking::panic_display at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/panicking.rs:150:5 3: core::panicking::panic_str at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/panicking.rs:134:5 4: core::option::expect_failed at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/option.rs:1932:5 5: core::option::Option<T>::expect at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/option.rs:898:21 6: tokio::future::block_on::block_on at /home/danielbevenius/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.33.0/src/future/block_on.rs:6:21 7: tokio::sync::mutex::Mutex<T>::blocking_lock at /home/danielbevenius/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.33.0/src/sync/mutex.rs:510:9 8: <llm_chain_llama::executor::LLamaTokenizer as llm_chain::tokens::Tokenizer>::tokenize_str at ./llm-chain-llama/src/executor.rs:290:23 9: <llm_chain_llama::executor::Executor as llm_chain::traits::Executor>::tokens_used at ./llm-chain-llama/src/executor.rs:233:31 ``` Fixes: #211 Signed-off-by: Daniel Bevenius <[email protected]>
sobelio · Nov 14, 2023 · d5afd29 · d5afd29
1 parent 45970eb
commit d5afd29
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 6 deletions.
diff --git a/crates/llm-chain-llama/examples/few_shot.rs b/crates/llm-chain-llama/examples/few_shot.rs
@@ -5,10 +5,10 @@ use llm_chain::{chains::conversation::Chain, executor, parameters, prompt, step:
 ///
 /// This example can be seen as a "chain of thought"
 ///
-/// Usage: cargo run --example few-shot
+/// Usage: cargo run --example few_shot
 ///
 /// Make sure to have the env var 'LLM_CHAIN_MODEL' set
-#[tokio::main(flavor = "current_thread")]
+#[tokio::main(flavor = "multi_thread", worker_threads = 1)]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
     let opts = options!(
         NThreads: 4_usize,

diff --git a/crates/llm-chain-llama/src/executor.rs b/crates/llm-chain-llama/src/executor.rs
@@ -287,14 +287,19 @@ impl<'a> LLamaTokenizer<'a> {
 
 impl Tokenizer for LLamaTokenizer<'_> {
     fn tokenize_str(&self, doc: &str) -> Result<TokenCollection, TokenizerError> {
-        let context = self.context.blocking_lock();
-        let tokenized = tokenize(&context, doc, true);
+        let tokenized = tokio::task::block_in_place(|| {
+            let context = self.context.blocking_lock();
+            tokenize(&context, doc, true)
+        });
         Ok(tokenized.into())
     }
 
     fn to_string(&self, tokens: TokenCollection) -> Result<String, TokenizerError> {
-        let context = self.context.blocking_lock();
-        let output = embedding_to_output(&context, &tokens.as_i32()?);
+        let tokens = &tokens.as_i32()?;
+        let output = tokio::task::block_in_place(|| {
+            let context = self.context.blocking_lock();
+            embedding_to_output(&context, tokens)
+        });
         Ok(output.to_string())
     }
 }