kp-forks · pull · Apr 17, 2024 · Apr 15, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
@@ -14,7 +14,7 @@ jobs:
     steps:
       - name: Dependabot metadata
         id: metadata
-        uses: dependabot/fetch-metadata@v1.3.4
+        uses: dependabot/fetch-metadata@v2.0.0
         with:
           github-token: "${{ secrets.GITHUB_TOKEN }}"
           skip-commit-verification: true

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -92,7 +92,7 @@ jobs:
           name: LocalAI-linux-${{ matrix.build }}
           path: release/
       - name: Release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         if: startsWith(github.ref, 'refs/tags/')
         with:
           files: |
@@ -164,7 +164,7 @@ jobs:
           name: LocalAI-MacOS-${{ matrix.build }}
           path: release/
       - name: Release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         if: startsWith(github.ref, 'refs/tags/')
         with:
           files: |
@@ -211,7 +211,7 @@ jobs:
           name: LocalAI-MacOS-arm64-${{ matrix.build }}
           path: release/
       - name: Release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         if: startsWith(github.ref, 'refs/tags/')
         with:
           files: |

diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml
@@ -24,7 +24,7 @@ jobs:
           args: '-no-fail -fmt sarif -out results.sarif ./...'
       - name: Upload SARIF file
         if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: github/codeql-action/upload-sarif@v2
+        uses: github/codeql-action/upload-sarif@v3
         with:
           # Path to SARIF file relative to the root of the repository
           sarif_file: results.sarif
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -121,7 +121,7 @@ jobs:
           PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: dave-gray101/action-tmate@master
+        uses: mxschmitt/action-tmate@v3.18
         with:
           connect-timeout-seconds: 180
 
@@ -174,7 +174,7 @@ jobs:
             make run-e2e-aio
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: dave-gray101/action-tmate@master
+        uses: mxschmitt/action-tmate@v3.18
         with:
           connect-timeout-seconds: 180
 
@@ -209,6 +209,6 @@ jobs:
           BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: dave-gray101/action-tmate@master
+        uses: mxschmitt/action-tmate@v3.18
         with:
           connect-timeout-seconds: 180
diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=1958f7e06ca2d2e3ab5698cc67513ba359144d8e
+CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
-WHISPER_CPP_VERSION?=8f253ef3af1c62c04316ba4afa7145fc4d701a8c
+WHISPER_CPP_VERSION?=a750868428868abd437e228ae5cab763ef3dc387
 
 # bert.cpp version
 BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d

diff --git a/backend/backend.proto b/backend/backend.proto
@@ -114,6 +114,8 @@ message PredictOptions {
 // The response message containing the result
 message Reply {
   bytes message = 1;
+  int32 tokens = 2;
+  int32 prompt_tokens = 3;
 }
 
 message ModelOptions {

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
@@ -2332,6 +2332,10 @@ class BackendServiceImpl final : public backend::Backend::Service {
                 std::string completion_text = result.result_json.value("content", "");
 
                 reply.set_message(completion_text);
+                int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
+                reply.set_tokens(tokens_predicted);
+                int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
+                reply.set_prompt_tokens(tokens_evaluated);
 
                 // Send the reply
                 writer->Write(reply);
@@ -2357,6 +2361,10 @@ class BackendServiceImpl final : public backend::Backend::Service {
         task_result result = llama.queue_results.recv(task_id);
         if (!result.error && result.stop) {
             completion_text = result.result_json.value("content", "");
+            int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
+            int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
+            reply->set_prompt_tokens(tokens_evaluated);
+            reply->set_tokens(tokens_predicted);
             reply->set_message(completion_text);
         }
         else

diff --git a/core/backend/llm.go b/core/backend/llm.go
@@ -189,6 +189,12 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest,
 	} else {
 		go func() {
 			reply, err := inferenceModel.Predict(ctx, grpcPredOpts)
+			if tokenUsage.Prompt == 0 {
+				tokenUsage.Prompt = int(reply.PromptTokens)
+			}
+			if tokenUsage.Completion == 0 {
+				tokenUsage.Completion = int(reply.Tokens)
+			}
 			if err != nil {
 				rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
 				close(rawResultChannel)

diff --git a/core/cli/run.go b/core/cli/run.go
@@ -60,7 +60,7 @@ func (r *RunCMD) Run(ctx *Context) error {
 		config.WithYAMLConfigPreload(r.PreloadModelsConfig),
 		config.WithModelPath(r.ModelsPath),
 		config.WithContextSize(r.ContextSize),
-		config.WithDebug(ctx.Debug),
+		config.WithDebug(*ctx.LogLevel == "debug"),
 		config.WithImageDir(r.ImagePath),
 		config.WithAudioDir(r.AudioPath),
 		config.WithUploadDir(r.UploadPath),

diff --git a/core/clients/store.go b/core/clients/store.go
@@ -0,0 +1,151 @@
+package clients
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+)
+
+// Define a struct to hold the store API client
+type StoreClient struct {
+	BaseURL string
+	Client  *http.Client
+}
+
+type SetRequest struct {
+	Keys   [][]float32 `json:"keys"`
+	Values []string    `json:"values"`
+}
+
+type GetRequest struct {
+	Keys [][]float32 `json:"keys"`
+}
+
+type GetResponse struct {
+	Keys   [][]float32 `json:"keys"`
+	Values []string    `json:"values"`
+}
+
+type DeleteRequest struct {
+	Keys [][]float32 `json:"keys"`
+}
+
+type FindRequest struct {
+	TopK int       `json:"topk"`
+	Key  []float32 `json:"key"`
+}
+
+type FindResponse struct {
+	Keys         [][]float32 `json:"keys"`
+	Values       []string    `json:"values"`
+	Similarities []float32   `json:"similarities"`
+}
+
+// Constructor for StoreClient
+func NewStoreClient(baseUrl string) *StoreClient {
+	return &StoreClient{
+		BaseURL: baseUrl,
+		Client:  &http.Client{},
+	}
+}
+
+// Implement Set method
+func (c *StoreClient) Set(req SetRequest) error {
+	return c.doRequest("stores/set", req)
+}
+
+// Implement Get method
+func (c *StoreClient) Get(req GetRequest) (*GetResponse, error) {
+	body, err := c.doRequestWithResponse("stores/get", req)
+	if err != nil {
+		return nil, err
+	}
+
+	var resp GetResponse
+	err = json.Unmarshal(body, &resp)
+	if err != nil {
+		return nil, err
+	}
+
+	return &resp, nil
+}
+
+// Implement Delete method
+func (c *StoreClient) Delete(req DeleteRequest) error {
+	return c.doRequest("stores/delete", req)
+}
+
+// Implement Find method
+func (c *StoreClient) Find(req FindRequest) (*FindResponse, error) {
+	body, err := c.doRequestWithResponse("stores/find", req)
+	if err != nil {
+		return nil, err
+	}
+
+	var resp FindResponse
+	err = json.Unmarshal(body, &resp)
+	if err != nil {
+		return nil, err
+	}
+
+	return &resp, nil
+}
+
+// Helper function to perform a request without expecting a response body
+func (c *StoreClient) doRequest(path string, data interface{}) error {
+	jsonData, err := json.Marshal(data)
+	if err != nil {
+		return err
+	}
+
+	req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return err
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.Client.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode)
+	}
+
+	return nil
+}
+
+// Helper function to perform a request and parse the response body
+func (c *StoreClient) doRequestWithResponse(path string, data interface{}) ([]byte, error) {
+	jsonData, err := json.Marshal(data)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.Client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode)
+	}
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	return body, nil
+}
diff --git a/core/http/api.go b/core/http/api.go
@@ -108,24 +108,6 @@ func App(application *core.Application) (*fiber.App, error) {
 			return c.Next()
 		}
 
-		// // Check for api_keys.json file
-		// fileContent, err := os.ReadFile("api_keys.json")
-		// if err == nil {
-		// 	// Parse JSON content from the file
-		// 	var fileKeys []string
-		// 	err := json.Unmarshal(fileContent, &fileKeys)
-		// 	if err != nil {
-		// 		return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
-		// 	}
-
-		// 	// Add file keys to options.ApiKeys
-		// 	application.ApplicationConfig.ApiKeys = append(application.ApplicationConfig.ApiKeys, fileKeys...)
-		// }
-
-		// if len(application.ApplicationConfig.ApiKeys) == 0 {
-		// 	return c.Next()
-		// }
-
 		authHeader := readAuthHeader(c)
 		if authHeader == "" {
 			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})

diff --git a/core/services/openai.go b/core/services/openai.go
@@ -160,7 +160,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest
 
 	bc, request, err := oais.getConfig(request)
 	if err != nil {
-		log.Error().Msgf("[oais::GenerateTextFromRequest] error getting configuration: %q", err)
+		log.Error().Err(err).Msgf("[oais::GenerateTextFromRequest] error getting configuration")
 		return
 	}
 
@@ -259,7 +259,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest
 	// If any of the setup goroutines experienced an error, quit early here.
 	if setupError != nil {
 		go func() {
-			log.Error().Msgf("[OAIS GenerateTextFromRequest] caught an error during setup: %q", setupError)
+			log.Error().Err(setupError).Msgf("[OAIS GenerateTextFromRequest] caught an error during setup")
 			rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError}
 			close(rawFinalResultChannel)
 		}()
@@ -603,7 +603,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche
 					Usage: schema.OpenAIUsage{
 						PromptTokens:     rawResult.Value.Usage.Prompt,
 						CompletionTokens: rawResult.Value.Usage.Completion,
-						TotalTokens:      rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt,
+						TotalTokens:      rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion,
 					},
 				}
 
@@ -644,7 +644,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche
 					Usage: schema.OpenAIUsage{
 						PromptTokens:     rawResult.Value.Usage.Prompt,
 						CompletionTokens: rawResult.Value.Usage.Completion,
-						TotalTokens:      rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt,
+						TotalTokens:      rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion,
 					},
 				}
 
@@ -778,13 +778,16 @@ func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults
 		// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
 		ss := map[string]interface{}{}
 		// This prevent newlines to break JSON parsing for clients
-		// s := utils.EscapeNewLines(llmresult)
-		json.Unmarshal([]byte(llmresult), &ss)
+		s := utils.EscapeNewLines(llmresult)
+		if err := json.Unmarshal([]byte(s), &ss); err != nil {
+			log.Error().Msgf("error unmarshalling JSON: %s", err.Error())
+			return results
+		}
 
 		// The grammar defines the function name as "function", while OpenAI returns "name"
 		func_name, ok := ss["function"]
 		if !ok {
-			log.Debug().Msg("ss[function] is not OK!")
+			log.Debug().Msgf("ss[function] is not OK!, llm result: %q", llmresult)
 			return results
 		}
 		// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object