From f670cfb5aacd532dcb72cb77db32b968f3ff9ce8 Mon Sep 17 00:00:00 2001 From: Jeffrey Tang <810895+jeffreyftang@users.noreply.github.com> Date: Tue, 19 Mar 2024 23:20:29 -0500 Subject: [PATCH] enh: Add ignore_eos_token param to completions and chat completions endpoints (#344) --- router/src/lib.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index 3f7ba1650..b218b3df9 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -480,6 +480,7 @@ struct ChatCompletionRequest { response_format: Option, repetition_penalty: Option, top_k: Option, + ignore_eos_token: Option, } #[derive(Clone, Debug, Deserialize, ToSchema)] @@ -505,6 +506,7 @@ struct CompletionRequest { // TODO(travis): add other LoRAX params here repetition_penalty: Option, top_k: Option, + ignore_eos_token: Option, } #[derive(Serialize, ToSchema)] @@ -623,7 +625,7 @@ impl From for CompatGenerateRequest { .max_tokens .map(|x| x as u32) .unwrap_or(default_max_new_tokens()), - ignore_eos_token: false, + ignore_eos_token: req.ignore_eos_token.unwrap_or(false), return_full_text: req.echo, stop: req.stop, truncate: None, @@ -660,7 +662,7 @@ impl From for CompatGenerateRequest { .max_tokens .map(|x| x as u32) .unwrap_or(default_max_new_tokens()), - ignore_eos_token: false, + ignore_eos_token: req.ignore_eos_token.unwrap_or(false), return_full_text: None, stop: req.stop, truncate: None,