Skip to content

Commit

Permalink
feat(functions/aio): all-in-one images, function template enhancements (
Browse files Browse the repository at this point in the history
mudler#1862)

* feat(startup): allow to specify models from local files

* feat(aio): add Dockerfile, make targets, aio profiles

* feat(template): add Function and LastMessage

* add hermes2-pro-mistral

* update hermes2 definition

* feat(template): add sprig

* feat(template): expose FunctionCall

* feat(aio): switch llm for text
  • Loading branch information
mudler authored Mar 21, 2024
1 parent eeaf8c7 commit e533dcf
Show file tree
Hide file tree
Showing 20 changed files with 462 additions and 2 deletions.
9 changes: 9 additions & 0 deletions Dockerfile.aio
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ARG BASE_IMAGE=ubuntu:22.04

FROM ${BASE_IMAGE}
ARG SIZE=cpu
ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml"

COPY aio/${SIZE} /aio-models

ENTRYPOINT [ "/build/entrypoint.sh" ]
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,8 @@ backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.
grpcs: prepare $(GRPC_BACKENDS)

DOCKER_IMAGE?=local-ai
DOCKER_AIO_IMAGE?=local-ai-aio
DOCKER_AIO_SIZE?=cpu
IMAGE_TYPE?=core
BASE_IMAGE?=ubuntu:22.04

Expand All @@ -545,6 +547,18 @@ docker:
--build-arg GO_TAGS=$(GO_TAGS) \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE) .

docker-aio:
@echo "Building AIO image with size $(DOCKER_AIO_SIZE)"
@echo "Building AIO image with base image $(BASE_IMAGE)"
docker build \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg SIZE=$(DOCKER_AIO_SIZE) \
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .

docker-aio-all:
$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu

docker-image-intel:
docker build \
Expand Down
13 changes: 13 additions & 0 deletions aio/cpu/embeddings.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: all-minilm-l6-v2
backend: sentencetransformers
embeddings: true
parameters:
model: all-MiniLM-L6-v2

usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "all-minilm-l6-v2"
}'
53 changes: 53 additions & 0 deletions aio/cpu/image-gen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: stablediffusion
backend: stablediffusion
parameters:
model: stablediffusion_assets

license: "BSD-3"
urls:
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE

description: |
Stable Diffusion in NCNN with c++, supported txt2img and img2img
download_files:
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- filename: "stablediffusion_assets/log_sigmas.bin"
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
- filename: "stablediffusion_assets/vocab.txt"
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
18 changes: 18 additions & 0 deletions aio/cpu/speech-to-text.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: whisper
backend: whisper
parameters:
model: ggml-whisper-base.bin

usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
15 changes: 15 additions & 0 deletions aio/cpu/text-to-speech.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: voice-en-us-amy-low
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz

parameters:
model: en-us-amy-low.onnx

usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"voice-en-us-amy-low",
"input": "Hi, this is a test."
}'
22 changes: 22 additions & 0 deletions aio/cpu/text-to-text.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: gpt-3.5-turbo
context_size: 2048
f16: true
gpu_layers: 90
mmap: true
trimsuffix:
- "\n"
parameters:
model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf

template:
chat: &template |-
Instruct: {{.Input}}
Output:
completion: *template

usage: |
To use this model, interact with the API (in another terminal) with curl for instance:
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "phi-2",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
40 changes: 40 additions & 0 deletions aio/cpu/vision.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
backend: llama-cpp
context_size: 4096
f16: true

gpu_layers: 90
mmap: true
name: llava

roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"

mmproj: bakllava-mmproj.gguf
parameters:
model: bakllava.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
mirostat: 2
mirostat_eta: 1.0
mirostat_tau: 1.0

template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: bakllava.gguf
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf

usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llava",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
13 changes: 13 additions & 0 deletions aio/gpu-8g/embeddings.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: all-minilm-l6-v2
backend: sentencetransformers
embeddings: true
parameters:
model: all-MiniLM-L6-v2

usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "all-minilm-l6-v2"
}'
22 changes: 22 additions & 0 deletions aio/gpu-8g/image-gen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: dreamshaper
parameters:
model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
backend: diffusers
step: 25
f16: true
cuda: true
diffusers:
pipeline_type: StableDiffusionPipeline
cuda: true
enable_parameters: "negative_prompt,num_inference_steps"
scheduler_type: "k_dpmpp_2m"

usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"model": "dreamshaper",
"step": 25,
"size": "512x512"
}'
18 changes: 18 additions & 0 deletions aio/gpu-8g/speech-to-text.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: whisper
backend: whisper
parameters:
model: ggml-whisper-base.bin

usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
15 changes: 15 additions & 0 deletions aio/gpu-8g/text-to-speech.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: voice-en-us-amy-low
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz

parameters:
model: en-us-amy-low.onnx

usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"voice-en-us-amy-low",
"input": "Hi, this is a test."
}'
51 changes: 51 additions & 0 deletions aio/gpu-8g/text-to-text.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: gpt-3.5-turbo
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf

roles:
assistant_function_call: assistant
function: tool
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
{{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
{{ if eq .RoleName "function" }}<tool_result>{{end}}
{{if .Content}}{{.Content}}{{end}}
{{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
{{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
{{ if eq .RoleName "function" }}</tool_result>{{end}}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
Use the following pydantic model json schema for each tool call you will make:
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{'arguments': <args-dict>, 'name': <function-name>}
</tool_call><|im_end|>
{{.Input}}
<|im_start|>assistant
<tool_call>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "hermes-2-pro-mistral",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
40 changes: 40 additions & 0 deletions aio/gpu-8g/vision.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
backend: llama-cpp
context_size: 4096
f16: true

gpu_layers: 90
mmap: true
name: llava

roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"

mmproj: bakllava-mmproj.gguf
parameters:
model: bakllava.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
mirostat: 2
mirostat_eta: 1.0
mirostat_tau: 1.0

template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: bakllava.gguf
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf

usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llava",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
3 changes: 3 additions & 0 deletions core/http/endpoints/openai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
Role: r,
RoleName: role,
Content: i.StringContent,
FunctionCall: i.FunctionCall,
FunctionName: i.Name,
LastMessage: messageIndex == (len(input.Messages) - 1),
Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
MessageIndex: messageIndex,
}
templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
Expand Down
Loading

0 comments on commit e533dcf

Please sign in to comment.