diff --git a/llgtrt/src/startup.rs b/llgtrt/src/startup.rs index 3d46f94..a0ccbdc 100644 --- a/llgtrt/src/startup.rs +++ b/llgtrt/src/startup.rs @@ -158,12 +158,27 @@ pub async fn run_server(mut cli_config: CliConfig) -> anyhow::Result<()> { AsyncExecutor::set_global(executor); + let trie = tok_env.tok_trie(); + + let state = AppState { + tok_bos: trie.info().tok_bos, + tok_eos_chat: Some(trie.info().tok_eos), + tok_eos_completions: Some(trie.info().tok_eos), + json_start_token_name: config.tokenizer.json_start_token.clone(), + tok_env, + next_client_req_id: std::sync::atomic::AtomicUsize::new(1000), + chat_builder, + constraint_mgr, + }; + // warmup request log::info!("Warming up executor"); - let mut resp = tok_env.tokenize("The ultimate answer to life, the universe and everything is"); + let mut warmup_tokens = + state.tokenize_with_bos("The ultimate answer to life, the universe and everything is"); + log::debug!("Warmup tokens: {:?}", warmup_tokens); let (_, mut rx) = AsyncExecutor::lock().add_request( RequestInit { - tokens: resp.clone(), + tokens: warmup_tokens.clone(), params: RequestParams { max_new_tokens: 10, ..Default::default() @@ -174,22 +189,12 @@ pub async fn run_server(mut cli_config: CliConfig) -> anyhow::Result<()> { vec![], )?; while let Some(r) = rx.recv().await { - resp.extend_from_slice(&r.response.tokens); + warmup_tokens.extend_from_slice(&r.response.tokens); } - log::info!("Warmup: {}", tok_env.tok_trie().tokens_dbg(&resp)); - - let trie = tok_env.tok_trie(); - - let state = AppState { - tok_bos: trie.info().tok_bos, - tok_eos_chat: Some(trie.info().tok_eos), - tok_eos_completions: Some(trie.info().tok_eos), - json_start_token_name: config.tokenizer.json_start_token.clone(), - tok_env, - next_client_req_id: std::sync::atomic::AtomicUsize::new(1000), - chat_builder, - constraint_mgr, - }; + log::info!( + "Warmup: {}", + state.tok_env.tok_trie().tokens_dbg(&warmup_tokens) + ); let api_key = cli_config.api_key.clone();