diff --git a/Dockerfile.aio b/Dockerfile.aio new file mode 100644 index 000000000000..4097e6d57680 --- /dev/null +++ b/Dockerfile.aio @@ -0,0 +1,9 @@ +ARG BASE_IMAGE=ubuntu:22.04 + +FROM ${BASE_IMAGE} +ARG SIZE=cpu +ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml" + +COPY aio/${SIZE} /aio-models + +ENTRYPOINT [ "/build/entrypoint.sh" ] \ No newline at end of file diff --git a/Makefile b/Makefile index 85d6c7c90e6f..c03091d0b701 100644 --- a/Makefile +++ b/Makefile @@ -535,6 +535,8 @@ backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper. grpcs: prepare $(GRPC_BACKENDS) DOCKER_IMAGE?=local-ai +DOCKER_AIO_IMAGE?=local-ai-aio +DOCKER_AIO_SIZE?=cpu IMAGE_TYPE?=core BASE_IMAGE?=ubuntu:22.04 @@ -545,6 +547,18 @@ docker: --build-arg GO_TAGS=$(GO_TAGS) \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \ -t $(DOCKER_IMAGE) . + +docker-aio: + @echo "Building AIO image with size $(DOCKER_AIO_SIZE)" + @echo "Building AIO image with base image $(BASE_IMAGE)" + docker build \ + --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg SIZE=$(DOCKER_AIO_SIZE) \ + -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio . + +docker-aio-all: + $(MAKE) docker-aio DOCKER_AIO_SIZE=cpu + $(MAKE) docker-aio DOCKER_AIO_SIZE=cpu docker-image-intel: docker build \ diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml new file mode 100644 index 000000000000..512d63a41dfe --- /dev/null +++ b/aio/cpu/embeddings.yaml @@ -0,0 +1,13 @@ +name: all-minilm-l6-v2 +backend: sentencetransformers +embeddings: true +parameters: + model: all-MiniLM-L6-v2 + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ + "input": "Your text string goes here", + "model": "all-minilm-l6-v2" + }' \ No newline at end of file diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml new file mode 100644 index 000000000000..3b9c2eec18b1 --- /dev/null +++ b/aio/cpu/image-gen.yaml @@ -0,0 +1,53 @@ +name: stablediffusion +backend: stablediffusion +parameters: + model: stablediffusion_assets + +license: "BSD-3" +urls: +- https://github.com/EdVince/Stable-Diffusion-NCNN +- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE + +description: | + Stable Diffusion in NCNN with c++, supported txt2img and img2img + +download_files: +- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" + sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" +- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" + sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" +- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" +- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" + sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" +- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" +- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" +- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" +- filename: "stablediffusion_assets/log_sigmas.bin" + sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" +- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" + sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" +- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" + sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" +- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" +- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" +- filename: "stablediffusion_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" \ No newline at end of file diff --git a/aio/cpu/speech-to-text.yaml b/aio/cpu/speech-to-text.yaml new file mode 100644 index 000000000000..f7ebd2176f3f --- /dev/null +++ b/aio/cpu/speech-to-text.yaml @@ -0,0 +1,18 @@ +name: whisper +backend: whisper +parameters: + model: ggml-whisper-base.bin + +usage: | + ## example audio file + wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg + + ## Send the example audio file to the transcriptions endpoint + curl http://localhost:8080/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@$PWD/gb1.ogg" -F model="whisper" + +download_files: +- filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/aio/cpu/text-to-speech.yaml b/aio/cpu/text-to-speech.yaml new file mode 100644 index 000000000000..93c1140375fb --- /dev/null +++ b/aio/cpu/text-to-speech.yaml @@ -0,0 +1,15 @@ +name: voice-en-us-amy-low +download_files: + - filename: voice-en-us-amy-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz + +parameters: + model: en-us-amy-low.onnx + +usage: | + To test if this model works as expected, you can use the following curl command: + + curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ + "model":"voice-en-us-amy-low", + "input": "Hi, this is a test." + }' \ No newline at end of file diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml new file mode 100644 index 000000000000..7558ba9ffd07 --- /dev/null +++ b/aio/cpu/text-to-text.yaml @@ -0,0 +1,22 @@ +name: gpt-3.5-turbo +context_size: 2048 +f16: true +gpu_layers: 90 +mmap: true +trimsuffix: +- "\n" +parameters: + model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf + +template: + chat: &template |- + Instruct: {{.Input}} + Output: + completion: *template + +usage: | + To use this model, interact with the API (in another terminal) with curl for instance: + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml new file mode 100644 index 000000000000..3d240681f175 --- /dev/null +++ b/aio/cpu/vision.yaml @@ -0,0 +1,40 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: bakllava-mmproj.gguf +parameters: + model: bakllava.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: bakllava.gguf + uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf +- filename: bakllava-mmproj.gguf + uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml new file mode 100644 index 000000000000..512d63a41dfe --- /dev/null +++ b/aio/gpu-8g/embeddings.yaml @@ -0,0 +1,13 @@ +name: all-minilm-l6-v2 +backend: sentencetransformers +embeddings: true +parameters: + model: all-MiniLM-L6-v2 + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ + "input": "Your text string goes here", + "model": "all-minilm-l6-v2" + }' \ No newline at end of file diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml new file mode 100644 index 000000000000..3857cd6bb890 --- /dev/null +++ b/aio/gpu-8g/image-gen.yaml @@ -0,0 +1,22 @@ +name: dreamshaper +parameters: + model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors +backend: diffusers +step: 25 +f16: true +cuda: true +diffusers: + pipeline_type: StableDiffusionPipeline + cuda: true + enable_parameters: "negative_prompt,num_inference_steps" + scheduler_type: "k_dpmpp_2m" + +usage: | + curl http://localhost:8080/v1/images/generations \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "|", + "model": "dreamshaper", + "step": 25, + "size": "512x512" + }' \ No newline at end of file diff --git a/aio/gpu-8g/speech-to-text.yaml b/aio/gpu-8g/speech-to-text.yaml new file mode 100644 index 000000000000..f7ebd2176f3f --- /dev/null +++ b/aio/gpu-8g/speech-to-text.yaml @@ -0,0 +1,18 @@ +name: whisper +backend: whisper +parameters: + model: ggml-whisper-base.bin + +usage: | + ## example audio file + wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg + + ## Send the example audio file to the transcriptions endpoint + curl http://localhost:8080/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@$PWD/gb1.ogg" -F model="whisper" + +download_files: +- filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/aio/gpu-8g/text-to-speech.yaml b/aio/gpu-8g/text-to-speech.yaml new file mode 100644 index 000000000000..93c1140375fb --- /dev/null +++ b/aio/gpu-8g/text-to-speech.yaml @@ -0,0 +1,15 @@ +name: voice-en-us-amy-low +download_files: + - filename: voice-en-us-amy-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz + +parameters: + model: en-us-amy-low.onnx + +usage: | + To test if this model works as expected, you can use the following curl command: + + curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ + "model":"voice-en-us-amy-low", + "input": "Hi, this is a test." + }' \ No newline at end of file diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml new file mode 100644 index 000000000000..d91e057c69fb --- /dev/null +++ b/aio/gpu-8g/text-to-text.yaml @@ -0,0 +1,51 @@ +name: gpt-3.5-turbo +mmap: true +parameters: + model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + +roles: + assistant_function_call: assistant + function: tool +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + {{if .Content}}{{.Content}}{{end}} + {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input}} + <|im_start|>assistant + + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "hermes-2-pro-mistral", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml new file mode 100644 index 000000000000..3d240681f175 --- /dev/null +++ b/aio/gpu-8g/vision.yaml @@ -0,0 +1,40 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: bakllava-mmproj.gguf +parameters: + model: bakllava.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: bakllava.gguf + uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf +- filename: bakllava-mmproj.gguf + uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 3add0972149d..383a2b773f2a 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -248,7 +248,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup Role: r, RoleName: role, Content: i.StringContent, + FunctionCall: i.FunctionCall, FunctionName: i.Name, + LastMessage: messageIndex == (len(input.Messages) - 1), + Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), MessageIndex: messageIndex, } templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml new file mode 100644 index 000000000000..84510d2abe76 --- /dev/null +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -0,0 +1,51 @@ +name: hermes-2-pro-mistral +mmap: true +parameters: + model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + +roles: + assistant_function_call: assistant + function: tool +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + {{if .Content}}{{.Content}}{{end}} + {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input}} + <|im_start|>assistant + + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "hermes-2-pro-mistral", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/go.mod b/go.mod index b218ca41e628..f2c53e84f9da 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,9 @@ require ( ) require ( + github.com/Masterminds/goutils v1.1.1 // indirect + github.com/Masterminds/semver/v3 v3.2.0 // indirect + github.com/Masterminds/sprig/v3 v3.2.3 // indirect github.com/alecthomas/chroma v0.10.0 // indirect github.com/aymanbagabas/go-osc52 v1.0.3 // indirect github.com/aymerick/douceur v0.2.0 // indirect @@ -66,11 +69,14 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect github.com/gorilla/css v1.0.0 // indirect + github.com/huandu/xstrings v1.3.3 // indirect github.com/klauspost/pgzip v1.2.5 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/microcosm-cc/bluemonday v1.0.26 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect + github.com/mitchellh/copystructure v1.0.0 // indirect + github.com/mitchellh/reflectwalk v1.0.0 // indirect github.com/muesli/reflow v0.3.0 // indirect github.com/muesli/termenv v0.13.0 // indirect github.com/nwaples/rardecode v1.1.0 // indirect @@ -81,12 +87,15 @@ require ( github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect + github.com/shopspring/decimal v1.2.0 // indirect + github.com/spf13/cast v1.3.1 // indirect github.com/ulikunitz/xz v0.5.9 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect github.com/yuin/goldmark v1.5.2 // indirect github.com/yuin/goldmark-emoji v1.0.1 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect + golang.org/x/crypto v0.14.0 // indirect golang.org/x/term v0.13.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect diff --git a/go.sum b/go.sum index a3ecade2dd92..7238ceba3f8f 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,11 @@ github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= +github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= +github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= +github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= +github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= +github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= @@ -85,6 +91,7 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= @@ -95,7 +102,10 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4= +github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= @@ -136,6 +146,10 @@ github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3r github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= +github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY= +github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= @@ -210,9 +224,14 @@ github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFt github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= +github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= +github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= +github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -245,6 +264,7 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMx github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU= github.com/yuin/goldmark v1.5.2/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os= @@ -266,7 +286,12 @@ go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmY golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -274,14 +299,18 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= +golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -297,6 +326,8 @@ golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -311,6 +342,7 @@ golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= @@ -318,11 +350,13 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pkg/model/loader.go b/pkg/model/loader.go index bea32fb72a4c..c2c9df0ef077 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -10,6 +10,7 @@ import ( "sync" "text/template" + "github.com/Masterminds/sprig/v3" grammar "github.com/go-skynet/LocalAI/pkg/grammar" "github.com/go-skynet/LocalAI/pkg/grpc" process "github.com/mudler/go-processmanager" @@ -36,6 +37,9 @@ type ChatMessageTemplateData struct { FunctionName string Content string MessageIndex int + Function bool + FunctionCall interface{} + LastMessage bool } // Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go? @@ -261,7 +265,7 @@ func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateN } // Parse the template - tmpl, err := template.New("prompt").Parse(dat) + tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat) if err != nil { return err } diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index cc514334e424..979b4d835a06 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -60,7 +60,23 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model } } default: - log.Warn().Msgf("[startup] failed resolving model '%s'", url) + if _, err := os.Stat(url); err == nil { + log.Debug().Msgf("[startup] resolved local model: %s", url) + // copy to modelPath + md5Name := utils.MD5(url) + + modelYAML, err := os.ReadFile(url) + if err != nil { + log.Error().Msgf("error loading model: %s", err.Error()) + continue + } + + if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil { + log.Error().Msgf("error loading model: %s", err.Error()) + } + } else { + log.Warn().Msgf("[startup] failed resolving model '%s'", url) + } } } }