From b2772509b44f2a19bb5d61a19c261b2ea02dc180 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 19 Apr 2024 18:23:44 +0200
Subject: [PATCH] models(llama3): add llama3 to embedded models (#2074)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 aio/cpu/text-to-text.yaml                 | 20 +++--
 aio/gpu-8g/text-to-text.yaml              | 20 +++--
 aio/intel/text-to-text.yaml               | 20 +++--
 embedded/models/hermes-2-pro-mistral.yaml | 20 +++--
 embedded/models/llama3-instruct.yaml      | 48 +++++++++++
 pkg/model/loader_test.go                  | 99 ++++++++++++++++++++++-
 6 files changed, 199 insertions(+), 28 deletions(-)
 create mode 100644 embedded/models/llama3-instruct.yaml

diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 6c4ec9e68ce0..cf18f659ae8d 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -6,14 +6,22 @@ parameters:
 template:
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}<tool_call>{{end}}
-    {{- if eq .RoleName "tool" }}<tool_result>{{end }}
+    {{- if .FunctionCall }}
+    <tool_call>
+    {{- else if eq .RoleName "tool" }}
+    <tool_response>
+    {{- end }}
     {{- if .Content}}
-    {{.Content}}
+    {{.Content }}
+    {{- end }}
+    {{- if .FunctionCall}}
+    {{toJson .FunctionCall}}
+    {{- end }}
+    {{- if .FunctionCall }}
+    </tool_call>
+    {{- else if eq .RoleName "tool" }}
+    </tool_response>
     {{- end }}
-    {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
-    {{- if .FunctionCall }}</tool_call>{{end }}
-    {{- if eq .RoleName "tool" }}</tool_result>{{end }}
     <|im_end|>
   # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
   function: |
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index 8d5c84f772f0..0407bb2292dc 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -6,14 +6,22 @@ parameters:
 template:
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}<tool_call>{{end}}
-    {{- if eq .RoleName "tool" }}<tool_result>{{end }}
+    {{- if .FunctionCall }}
+    <tool_call>
+    {{- else if eq .RoleName "tool" }}
+    <tool_response>
+    {{- end }}
     {{- if .Content}}
-    {{.Content}}
+    {{.Content }}
+    {{- end }}
+    {{- if .FunctionCall}}
+    {{toJson .FunctionCall}}
+    {{- end }}
+    {{- if .FunctionCall }}
+    </tool_call>
+    {{- else if eq .RoleName "tool" }}
+    </tool_response>
     {{- end }}
-    {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
-    {{- if .FunctionCall }}</tool_call>{{end }}
-    {{- if eq .RoleName "tool" }}</tool_result>{{end }}
     <|im_end|>
   # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
   function: |
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index a7cb5b4daf71..f5f93c14ff05 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -7,14 +7,22 @@ parameters:
 template:
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}<tool_call>{{end}}
-    {{- if eq .RoleName "tool" }}<tool_result>{{end }}
+    {{- if .FunctionCall }}
+    <tool_call>
+    {{- else if eq .RoleName "tool" }}
+    <tool_response>
+    {{- end }}
     {{- if .Content}}
-    {{.Content}}
+    {{.Content }}
+    {{- end }}
+    {{- if .FunctionCall}}
+    {{toJson .FunctionCall}}
+    {{- end }}
+    {{- if .FunctionCall }}
+    </tool_call>
+    {{- else if eq .RoleName "tool" }}
+    </tool_response>
     {{- end }}
-    {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
-    {{- if .FunctionCall }}</tool_call>{{end }}
-    {{- if eq .RoleName "tool" }}</tool_result>{{end }}
     <|im_end|>
   # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
   function: |
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
index 7bfa94180548..dd18ce6f862a 100644
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -6,14 +6,22 @@ parameters:
 template:
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}<tool_call>{{end}}
-    {{- if eq .RoleName "tool" }}<tool_result>{{end }}
+    {{- if .FunctionCall }}
+    <tool_call>
+    {{- else if eq .RoleName "tool" }}
+    <tool_response>
+    {{- end }}
     {{- if .Content}}
-    {{.Content}}
+    {{.Content }}
+    {{- end }}
+    {{- if .FunctionCall}}
+    {{toJson .FunctionCall}}
+    {{- end }}
+    {{- if .FunctionCall }}
+    </tool_call>
+    {{- else if eq .RoleName "tool" }}
+    </tool_response>
     {{- end }}
-    {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
-    {{- if .FunctionCall }}</tool_call>{{end }}
-    {{- if eq .RoleName "tool" }}</tool_result>{{end }}
     <|im_end|>
   # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
   function: |
diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml
new file mode 100644
index 000000000000..d483d2b2a16e
--- /dev/null
+++ b/embedded/models/llama3-instruct.yaml
@@ -0,0 +1,48 @@
+name: llama3-8b-instruct
+mmap: true
+parameters:
+  model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
+
+template:
+  chat_message: |
+    <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+    {{ if .FunctionCall -}}
+    Function call:
+    {{ else if eq .RoleName "tool" -}}
+    Function response:
+    {{ end -}}
+    {{ if .Content -}}
+    {{.Content -}}
+    {{ else if .FunctionCall -}}
+    {{ toJson .FunctionCall -}}
+    {{ end -}}
+    <|eot_id|>
+  function: |
+    <|start_header_id|>system<|end_header_id|>
+
+    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+    <tools>
+    {{range .Functions}}
+    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+    {{end}}
+    </tools>
+    Use the following pydantic model json schema for each tool call you will make:
+    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+    Function call:
+  chat: |
+    <|begin_of_text|>{{.Input }}
+    <|start_header_id|>assistant<|end_header_id|>
+  completion: |
+    {{.Input}}
+context_size: 8192
+f16: true
+stopwords:
+- <|im_end|>
+- <dummy32000>
+- "<|eot_id|>"
+usage: |
+      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+          "model": "llama3-8b-instruct",
+          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+      }'
diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go
index e4207b35ca76..d3956b63ee98 100644
--- a/pkg/model/loader_test.go
+++ b/pkg/model/loader_test.go
@@ -27,7 +27,84 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq
 {{- end }}
 <|im_end|>`
 
-var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+{{ if .FunctionCall -}}
+Function call:
+{{ else if eq .RoleName "tool" -}}
+Function response:
+{{ end -}}
+{{ if .Content -}}
+{{.Content -}}
+{{ else if .FunctionCall -}}
+{{ toJson .FunctionCall -}}
+{{ end -}}
+<|eot_id|>`
+
+var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+	"user": {
+		"template": llama3,
+		"expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
+		"data": model.ChatMessageTemplateData{
+			SystemPrompt: "",
+			Role:         "user",
+			RoleName:     "user",
+			Content:      "A long time ago in a galaxy far, far away...",
+			FunctionCall: nil,
+			FunctionName: "",
+			LastMessage:  false,
+			Function:     false,
+			MessageIndex: 0,
+		},
+	},
+	"assistant": {
+		"template": llama3,
+		"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
+		"data": model.ChatMessageTemplateData{
+			SystemPrompt: "",
+			Role:         "assistant",
+			RoleName:     "assistant",
+			Content:      "A long time ago in a galaxy far, far away...",
+			FunctionCall: nil,
+			FunctionName: "",
+			LastMessage:  false,
+			Function:     false,
+			MessageIndex: 0,
+		},
+	},
+	"function_call": {
+		"template": llama3,
+		"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>",
+		"data": model.ChatMessageTemplateData{
+			SystemPrompt: "",
+			Role:         "assistant",
+			RoleName:     "assistant",
+			Content:      "",
+			FunctionCall: map[string]string{"function": "test"},
+			FunctionName: "",
+			LastMessage:  false,
+			Function:     false,
+			MessageIndex: 0,
+		},
+	},
+	"function_response": {
+		"template": llama3,
+		"expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>",
+		"data": model.ChatMessageTemplateData{
+			SystemPrompt: "",
+			Role:         "tool",
+			RoleName:     "tool",
+			Content:      "Response from tool",
+			FunctionCall: nil,
+			FunctionName: "",
+			LastMessage:  false,
+			Function:     false,
+			MessageIndex: 0,
+		},
+	},
+}
+
+var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
 	"user": {
 		"template": chatML,
 		"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>",
@@ -91,13 +168,27 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac
 }
 
 var _ = Describe("Templates", func() {
-	Context("chat message", func() {
+	Context("chat message ChatML", func() {
+		var modelLoader *ModelLoader
+		BeforeEach(func() {
+			modelLoader = NewModelLoader("")
+		})
+		for key := range chatMLTestMatch {
+			foo := chatMLTestMatch[key]
+			It("renders correctly `"+key+"`", func() {
+				templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData))
+				Expect(err).ToNot(HaveOccurred())
+				Expect(templated).To(Equal(foo["expected"]), templated)
+			})
+		}
+	})
+	Context("chat message llama3", func() {
 		var modelLoader *ModelLoader
 		BeforeEach(func() {
 			modelLoader = NewModelLoader("")
 		})
-		for key := range testMatch {
-			foo := testMatch[key]
+		for key := range llama3TestMatch {
+			foo := llama3TestMatch[key]
 			It("renders correctly `"+key+"`", func() {
 				templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData))
 				Expect(err).ToNot(HaveOccurred())