From dc3fe71b183e852a29b24711ecf4f05057d6432c Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 28 May 2024 08:06:36 +0700 Subject: [PATCH] feat: change n_batch default to 2048 (#63) Co-authored-by: vansangpfiev --- src/llama_engine.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama_engine.cc b/src/llama_engine.cc index b66a86a4..0f29024f 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -327,9 +327,9 @@ bool LlamaEngine::LoadModelImpl(std::shared_ptr jsonBody) { params.n_ctx = jsonBody->get("ctx_len", 2048).asInt(); params.embedding = jsonBody->get("embedding", true).asBool(); model_type = jsonBody->get("model_type", "llm").asString(); - // Check if n_parallel exists in jsonBody, if not, set to drogon_thread - params.n_batch = jsonBody->get("n_batch", 512).asInt(); + params.n_batch = jsonBody->get("n_batch", 2048).asInt(); params.n_ubatch = jsonBody->get("n_ubatch", params.n_batch).asInt(); + // Check if n_parallel exists in jsonBody, if not, set to drogon_thread params.n_parallel = jsonBody->get("n_parallel", 1).asInt(); params.n_threads = jsonBody->get("cpu_threads", std::thread::hardware_concurrency())