Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from mudler:master #68

Merged
merged 14 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/dependabot_auto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@v1.3.4
uses: dependabot/fetch-metadata@v2.0.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
name: LocalAI-linux-${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
Expand Down Expand Up @@ -164,7 +164,7 @@ jobs:
name: LocalAI-MacOS-${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
Expand Down Expand Up @@ -211,7 +211,7 @@ jobs:
name: LocalAI-MacOS-arm64-${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/secscan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
args: '-no-fail -fmt sarif -out results.sarif ./...'
- name: Upload SARIF file
if: ${{ github.actor != 'dependabot[bot]' }}
uses: github/codeql-action/upload-sarif@v2
uses: github/codeql-action/upload-sarif@v3
with:
# Path to SARIF file relative to the root of the repository
sarif_file: results.sarif
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: dave-gray101/action-tmate@master
uses: mxschmitt/action-tmate@v3.18
with:
connect-timeout-seconds: 180

Expand Down Expand Up @@ -174,7 +174,7 @@ jobs:
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: dave-gray101/action-tmate@master
uses: mxschmitt/action-tmate@v3.18
with:
connect-timeout-seconds: 180

Expand Down Expand Up @@ -209,6 +209,6 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: dave-gray101/action-tmate@master
uses: mxschmitt/action-tmate@v3.18
with:
connect-timeout-seconds: 180
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BINARY_NAME=local-ai

# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=1958f7e06ca2d2e3ab5698cc67513ba359144d8e
CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60

# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
Expand All @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_CPP_VERSION?=8f253ef3af1c62c04316ba4afa7145fc4d701a8c
WHISPER_CPP_VERSION?=a750868428868abd437e228ae5cab763ef3dc387

# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
Expand Down
2 changes: 2 additions & 0 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ message PredictOptions {
// The response message containing the result
message Reply {
bytes message = 1;
int32 tokens = 2;
int32 prompt_tokens = 3;
}

message ModelOptions {
Expand Down
8 changes: 8 additions & 0 deletions backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2332,6 +2332,10 @@ class BackendServiceImpl final : public backend::Backend::Service {
std::string completion_text = result.result_json.value("content", "");

reply.set_message(completion_text);
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
reply.set_tokens(tokens_predicted);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply.set_prompt_tokens(tokens_evaluated);

// Send the reply
writer->Write(reply);
Expand All @@ -2357,6 +2361,10 @@ class BackendServiceImpl final : public backend::Backend::Service {
task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) {
completion_text = result.result_json.value("content", "");
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply->set_prompt_tokens(tokens_evaluated);
reply->set_tokens(tokens_predicted);
reply->set_message(completion_text);
}
else
Expand Down
6 changes: 6 additions & 0 deletions core/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,12 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest,
} else {
go func() {
reply, err := inferenceModel.Predict(ctx, grpcPredOpts)
if tokenUsage.Prompt == 0 {
tokenUsage.Prompt = int(reply.PromptTokens)
}
if tokenUsage.Completion == 0 {
tokenUsage.Completion = int(reply.Tokens)
}
if err != nil {
rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
close(rawResultChannel)
Expand Down
2 changes: 1 addition & 1 deletion core/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (r *RunCMD) Run(ctx *Context) error {
config.WithYAMLConfigPreload(r.PreloadModelsConfig),
config.WithModelPath(r.ModelsPath),
config.WithContextSize(r.ContextSize),
config.WithDebug(ctx.Debug),
config.WithDebug(*ctx.LogLevel == "debug"),
config.WithImageDir(r.ImagePath),
config.WithAudioDir(r.AudioPath),
config.WithUploadDir(r.UploadPath),
Expand Down
151 changes: 151 additions & 0 deletions core/clients/store.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package clients

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

// Define a struct to hold the store API client
type StoreClient struct {
BaseURL string
Client *http.Client
}

type SetRequest struct {
Keys [][]float32 `json:"keys"`
Values []string `json:"values"`
}

type GetRequest struct {
Keys [][]float32 `json:"keys"`
}

type GetResponse struct {
Keys [][]float32 `json:"keys"`
Values []string `json:"values"`
}

type DeleteRequest struct {
Keys [][]float32 `json:"keys"`
}

type FindRequest struct {
TopK int `json:"topk"`
Key []float32 `json:"key"`
}

type FindResponse struct {
Keys [][]float32 `json:"keys"`
Values []string `json:"values"`
Similarities []float32 `json:"similarities"`
}

// Constructor for StoreClient
func NewStoreClient(baseUrl string) *StoreClient {
return &StoreClient{
BaseURL: baseUrl,
Client: &http.Client{},
}
}

// Implement Set method
func (c *StoreClient) Set(req SetRequest) error {
return c.doRequest("stores/set", req)
}

// Implement Get method
func (c *StoreClient) Get(req GetRequest) (*GetResponse, error) {
body, err := c.doRequestWithResponse("stores/get", req)
if err != nil {
return nil, err
}

var resp GetResponse
err = json.Unmarshal(body, &resp)
if err != nil {
return nil, err
}

return &resp, nil
}

// Implement Delete method
func (c *StoreClient) Delete(req DeleteRequest) error {
return c.doRequest("stores/delete", req)
}

// Implement Find method
func (c *StoreClient) Find(req FindRequest) (*FindResponse, error) {
body, err := c.doRequestWithResponse("stores/find", req)
if err != nil {
return nil, err
}

var resp FindResponse
err = json.Unmarshal(body, &resp)
if err != nil {
return nil, err
}

return &resp, nil
}

// Helper function to perform a request without expecting a response body
func (c *StoreClient) doRequest(path string, data interface{}) error {
jsonData, err := json.Marshal(data)
if err != nil {
return err
}

req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")

resp, err := c.Client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode)
}

return nil
}

// Helper function to perform a request and parse the response body
func (c *StoreClient) doRequestWithResponse(path string, data interface{}) ([]byte, error) {
jsonData, err := json.Marshal(data)
if err != nil {
return nil, err
}

req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")

resp, err := c.Client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode)
}

body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}

return body, nil
}
18 changes: 0 additions & 18 deletions core/http/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,24 +108,6 @@ func App(application *core.Application) (*fiber.App, error) {
return c.Next()
}

// // Check for api_keys.json file
// fileContent, err := os.ReadFile("api_keys.json")
// if err == nil {
// // Parse JSON content from the file
// var fileKeys []string
// err := json.Unmarshal(fileContent, &fileKeys)
// if err != nil {
// return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
// }

// // Add file keys to options.ApiKeys
// application.ApplicationConfig.ApiKeys = append(application.ApplicationConfig.ApiKeys, fileKeys...)
// }

// if len(application.ApplicationConfig.ApiKeys) == 0 {
// return c.Next()
// }

authHeader := readAuthHeader(c)
if authHeader == "" {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
Expand Down
17 changes: 10 additions & 7 deletions core/services/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest

bc, request, err := oais.getConfig(request)
if err != nil {
log.Error().Msgf("[oais::GenerateTextFromRequest] error getting configuration: %q", err)
log.Error().Err(err).Msgf("[oais::GenerateTextFromRequest] error getting configuration")
return
}

Expand Down Expand Up @@ -259,7 +259,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest
// If any of the setup goroutines experienced an error, quit early here.
if setupError != nil {
go func() {
log.Error().Msgf("[OAIS GenerateTextFromRequest] caught an error during setup: %q", setupError)
log.Error().Err(setupError).Msgf("[OAIS GenerateTextFromRequest] caught an error during setup")
rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError}
close(rawFinalResultChannel)
}()
Expand Down Expand Up @@ -603,7 +603,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche
Usage: schema.OpenAIUsage{
PromptTokens: rawResult.Value.Usage.Prompt,
CompletionTokens: rawResult.Value.Usage.Completion,
TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt,
TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion,
},
}

Expand Down Expand Up @@ -644,7 +644,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche
Usage: schema.OpenAIUsage{
PromptTokens: rawResult.Value.Usage.Prompt,
CompletionTokens: rawResult.Value.Usage.Completion,
TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt,
TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion,
},
}

Expand Down Expand Up @@ -778,13 +778,16 @@ func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults
// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
ss := map[string]interface{}{}
// This prevent newlines to break JSON parsing for clients
// s := utils.EscapeNewLines(llmresult)
json.Unmarshal([]byte(llmresult), &ss)
s := utils.EscapeNewLines(llmresult)
if err := json.Unmarshal([]byte(s), &ss); err != nil {
log.Error().Msgf("error unmarshalling JSON: %s", err.Error())
return results
}

// The grammar defines the function name as "function", while OpenAI returns "name"
func_name, ok := ss["function"]
if !ok {
log.Debug().Msg("ss[function] is not OK!")
log.Debug().Msgf("ss[function] is not OK!, llm result: %q", llmresult)
return results
}
// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
Expand Down
Loading
Loading