chore: revert llama.cpp to b3029

janhq · Jun 25, 2024 · 0f645e8 · 0f645e8
1 parent 5124a55
commit 0f645e8
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 6 deletions.
diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
@@ -58,6 +58,7 @@ jobs:
     needs: [create-draft-release]
     timeout-minutes: 60
     strategy:
+      fail-fast: false
       matrix:
         include:
           - os: "linux"

diff --git a/llama.cpp b/llama.cpp
diff --git a/src/llama_server_context.cc b/src/llama_server_context.cc
@@ -264,11 +264,7 @@ json LlamaServerContext::GetModelProps() {
 }
 
 int LlamaServerContext::RequestCompletion(json data, bool infill,
-                                          bool embedding, int multitask_id) {
-  // From this commit: 'llama : allow pooled embeddings on any model (#7477)'
-  // we need to explicitly set embedding flad for each request
-  llama_set_embeddings(ctx, embedding);
-
+                                          bool embedding, int multitask_id) { 
   TaskServer task;
   task.id = id_gen++;
   task.target_id = 0;