Add the Gemini model format

Kiln-AI · Jan 14, 2025 · 3840687 · 3840687
1 parent 3f656ff
commit 3840687
Show file tree

Hide file tree

Showing 2 changed files with 45 additions and 0 deletions.
diff --git a/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte b/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte
@@ -130,6 +130,10 @@
       "download_huggingface_chat_template_toolcall",
       "Download: HuggingFace chat template with tool calls (JSONL)",
     ])
+    available_model_select.push([
+      "download_vertex_gemini_1_5",
+      "Download: Google Vertex-AI Gemini 1.5 format (JSONL)",
+    ])
   }
 
   const download_model_select_options: Record<string, string> = {
@@ -138,6 +142,7 @@
     download_huggingface_chat_template: "huggingface_chat_template_jsonl",
     download_huggingface_chat_template_toolcall:
       "huggingface_chat_template_toolcall_jsonl",
+    download_vertex_gemini_1_5: "vertex_gemini_1_5",
   }
 
   let datasets: DatasetSplit[] | null = null
@@ -751,6 +756,7 @@
             [disabled_header, "Select a split strategy"],
             ["train_test", "Train/Test -- 80/20"],
             ["train_test_val", "Train/Test/Val -- 60/20/20"],
+            ["train_test_val_80", "Train/Test/Val -- 80/10/10"],
             ["all", "Entire Dataset -- 100"],
           ]}
           bind:value={new_dataset_split}

diff --git a/libs/core/kiln_ai/adapters/fine_tune/dataset_formatter.py b/libs/core/kiln_ai/adapters/fine_tune/dataset_formatter.py
@@ -25,6 +25,9 @@ class DatasetFormat(str, Enum):
         "huggingface_chat_template_toolcall_jsonl"
     )
 
+    """Vertex Gemini 1.5 format (flash and pro)"""
+    VERTEX_GEMINI_1_5 = "vertex_gemini_1_5"
+
 
 class FormatGenerator(Protocol):
     """Protocol for format generators"""
@@ -121,11 +124,47 @@ def generate_huggingface_chat_template_toolcall(
     }
 
 
+def generate_vertex_gemini_1_5(
+    task_run: TaskRun, system_message: str
+) -> Dict[str, Any]:
+    """Generate Vertex Gemini 1.5 format (flash and pro)"""
+    # See https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-supervised-tuning-prepare
+    return {
+        "systemInstruction": {
+            "role": "system",
+            "parts": [
+                {
+                    "text": system_message,
+                }
+            ],
+        },
+        "contents": [
+            {
+                "role": "user",
+                "parts": [
+                    {
+                        "text": task_run.input,
+                    }
+                ],
+            },
+            {
+                "role": "model",
+                "parts": [
+                    {
+                        "text": task_run.output.output,
+                    }
+                ],
+            },
+        ],
+    }
+
+
 FORMAT_GENERATORS: Dict[DatasetFormat, FormatGenerator] = {
     DatasetFormat.OPENAI_CHAT_JSONL: generate_chat_message_response,
     DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL: generate_chat_message_toolcall,
     DatasetFormat.HUGGINGFACE_CHAT_TEMPLATE_JSONL: generate_huggingface_chat_template,
     DatasetFormat.HUGGINGFACE_CHAT_TEMPLATE_TOOLCALL_JSONL: generate_huggingface_chat_template_toolcall,
+    DatasetFormat.VERTEX_GEMINI_1_5: generate_vertex_gemini_1_5,
 }