From b2772509b44f2a19bb5d61a19c261b2ea02dc180 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 19 Apr 2024 18:23:44 +0200 Subject: [PATCH] models(llama3): add llama3 to embedded models (#2074) Signed-off-by: Ettore Di Giacinto --- aio/cpu/text-to-text.yaml | 20 +++-- aio/gpu-8g/text-to-text.yaml | 20 +++-- aio/intel/text-to-text.yaml | 20 +++-- embedded/models/hermes-2-pro-mistral.yaml | 20 +++-- embedded/models/llama3-instruct.yaml | 48 +++++++++++ pkg/model/loader_test.go | 99 ++++++++++++++++++++++- 6 files changed, 199 insertions(+), 28 deletions(-) create mode 100644 embedded/models/llama3-instruct.yaml diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 6c4ec9e68ce0..cf18f659ae8d 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} {{- if .Content}} - {{.Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 8d5c84f772f0..0407bb2292dc 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} {{- if .Content}} - {{.Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index a7cb5b4daf71..f5f93c14ff05 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -7,14 +7,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} {{- if .Content}} - {{.Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index 7bfa94180548..dd18ce6f862a 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} {{- if .Content}} - {{.Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml new file mode 100644 index 000000000000..d483d2b2a16e --- /dev/null +++ b/embedded/models/llama3-instruct.yaml @@ -0,0 +1,48 @@ +name: llama3-8b-instruct +mmap: true +parameters: + model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + +template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} +context_size: 8192 +f16: true +stopwords: +- <|im_end|> +- +- "<|eot_id|>" +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama3-8b-instruct", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go index e4207b35ca76..d3956b63ee98 100644 --- a/pkg/model/loader_test.go +++ b/pkg/model/loader_test.go @@ -27,7 +27,84 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq {{- end }} <|im_end|>` -var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ +const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + +{{ if .FunctionCall -}} +Function call: +{{ else if eq .RoleName "tool" -}} +Function response: +{{ end -}} +{{ if .Content -}} +{{.Content -}} +{{ else if .FunctionCall -}} +{{ toJson .FunctionCall -}} +{{ end -}} +<|eot_id|>` + +var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "template": llama3, + "expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "user", + RoleName: "user", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "assistant": { + "template": llama3, + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_call": { + "template": llama3, + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "", + FunctionCall: map[string]string{"function": "test"}, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_response": { + "template": llama3, + "expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "tool", + RoleName: "tool", + Content: "Response from tool", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, +} + +var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ "user": { "template": chatML, "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>", @@ -91,13 +168,27 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac } var _ = Describe("Templates", func() { - Context("chat message", func() { + Context("chat message ChatML", func() { + var modelLoader *ModelLoader + BeforeEach(func() { + modelLoader = NewModelLoader("") + }) + for key := range chatMLTestMatch { + foo := chatMLTestMatch[key] + It("renders correctly `"+key+"`", func() { + templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) + Expect(err).ToNot(HaveOccurred()) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) + Context("chat message llama3", func() { var modelLoader *ModelLoader BeforeEach(func() { modelLoader = NewModelLoader("") }) - for key := range testMatch { - foo := testMatch[key] + for key := range llama3TestMatch { + foo := llama3TestMatch[key] It("renders correctly `"+key+"`", func() { templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) Expect(err).ToNot(HaveOccurred())