Skip to content

Commit

Permalink
Merge pull request #79 from GaiaNet-AI/feat-batch-size
Browse files Browse the repository at this point in the history
feat: add `chat_batch_size` and `embedding_batch_size` fields in `config.json`
  • Loading branch information
apepkuss authored Jul 4, 2024
2 parents 756370b + bfa1878 commit ea5f443
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
2 changes: 2 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
"prompt_template": "phi-3-chat",
"reverse_prompt": "",
"chat_ctx_size": "2048",
"chat_batch_size": "512",
"embedding": "https://huggingface.co/gaianet/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf",
"embedding_name": "Nomic-embed-text-v1.5",
"embedding_ctx_size": "512",
"embedding_batch_size": "512",
"snapshot": "https://huggingface.co/datasets/gaianet/paris/resolve/main/paris_768_nomic-embed-text-v1.5-f16.snapshot",
"system_prompt": "You are a tour guide in Paris, France. Please answer the question from a Paris visitor accurately.",
"embedding_collection_name": "default",
Expand Down
24 changes: 22 additions & 2 deletions gaianet
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,11 @@ start() {

# parse context size for chat model
chat_ctx_size=$(awk -F'"' '/"chat_ctx_size":/ {print $4}' config.json)
# parse batch size for chat model
chat_batch_size=$(awk -F'"' '/"chat_batch_size":/ {print $4}' config.json)
if [ -z "$chat_batch_size" ]; then
chat_batch_size=$chat_ctx_size
fi
# parse prompt type for chat model
prompt_type=$(awk -F'"' '/"prompt_template":/ {print $4}' config.json)
# parse system prompt for chat model
Expand Down Expand Up @@ -671,6 +676,11 @@ start() {
fi
# parse context size for embedding model
embedding_ctx_size=$(awk -F'"' '/"embedding_ctx_size":/ {print $4}' config.json)
# parse batch size for embedding model
embedding_batch_size=$(awk -F'"' '/"embedding_batch_size":/ {print $4}' config.json)
if [ -z "$embedding_batch_size" ]; then
embedding_batch_size=$embedding_ctx_size
fi
# parse port for LlamaEdge API Server
llamaedge_port=$(awk -F'"' '/"llamaedge_port":/ {print $4}' config.json)
# parse qdrant limit
Expand Down Expand Up @@ -721,7 +731,7 @@ start() {
rag-api-server.wasm \
--model-name $chat_model_stem,$embedding_model_stem \
--ctx-size $chat_ctx_size,$embedding_ctx_size \
--batch-size $chat_ctx_size,$embedding_ctx_size \
--batch-size $chat_batch_size,$embedding_batch_size \
--prompt-template $prompt_type,embedding \
--rag-policy $rag_policy \
--qdrant-collection-name $embedding_collection_name \
Expand Down Expand Up @@ -900,6 +910,11 @@ start() {

# parse context size for chat model
chat_ctx_size=$(awk -F'"' '/"chat_ctx_size":/ {print $4}' config.json)
# parse batch size for chat model
chat_batch_size=$(awk -F'"' '/"chat_batch_size":/ {print $4}' config.json)
if [ -z "$chat_batch_size" ]; then
chat_batch_size=$chat_ctx_size
fi
# parse prompt type for chat model
prompt_type=$(awk -F'"' '/"prompt_template":/ {print $4}' config.json)
# parse reverse prompt for chat model
Expand All @@ -923,6 +938,11 @@ start() {
fi
# parse context size for embedding model
embedding_ctx_size=$(awk -F'"' '/"embedding_ctx_size":/ {print $4}' config.json)
# parse batch size for embedding model
embedding_batch_size=$(awk -F'"' '/"embedding_batch_size":/ {print $4}' config.json)
if [ -z "$embedding_batch_size" ]; then
embedding_batch_size=$embedding_ctx_size
fi
# parse port for LlamaEdge API Server
llamaedge_port=$(awk -F'"' '/"llamaedge_port":/ {print $4}' config.json)
# check port
Expand Down Expand Up @@ -955,7 +975,7 @@ start() {
llama-api-server.wasm \
--model-name $chat_model_stem,$embedding_model_stem \
--ctx-size $chat_ctx_size,$embedding_ctx_size \
--batch-size $chat_ctx_size,$embedding_ctx_size \
--batch-size $chat_batch_size,$embedding_batch_size \
--prompt-template $prompt_type,embedding \
--web-ui ./ \
--socket-addr 0.0.0.0:$llamaedge_port)
Expand Down

0 comments on commit ea5f443

Please sign in to comment.