From 38406873f6ddc8d30ce7a17115070ffb0f519958 Mon Sep 17 00:00:00 2001 From: scosman Date: Mon, 13 Jan 2025 19:53:26 -0500 Subject: [PATCH] Add the Gemini model format --- .../[task_id]/create_finetune/+page.svelte | 6 +++ .../adapters/fine_tune/dataset_formatter.py | 39 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte b/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte index c8d64b9..2737086 100644 --- a/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte +++ b/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte @@ -130,6 +130,10 @@ "download_huggingface_chat_template_toolcall", "Download: HuggingFace chat template with tool calls (JSONL)", ]) + available_model_select.push([ + "download_vertex_gemini_1_5", + "Download: Google Vertex-AI Gemini 1.5 format (JSONL)", + ]) } const download_model_select_options: Record = { @@ -138,6 +142,7 @@ download_huggingface_chat_template: "huggingface_chat_template_jsonl", download_huggingface_chat_template_toolcall: "huggingface_chat_template_toolcall_jsonl", + download_vertex_gemini_1_5: "vertex_gemini_1_5", } let datasets: DatasetSplit[] | null = null @@ -751,6 +756,7 @@ [disabled_header, "Select a split strategy"], ["train_test", "Train/Test -- 80/20"], ["train_test_val", "Train/Test/Val -- 60/20/20"], + ["train_test_val_80", "Train/Test/Val -- 80/10/10"], ["all", "Entire Dataset -- 100"], ]} bind:value={new_dataset_split} diff --git a/libs/core/kiln_ai/adapters/fine_tune/dataset_formatter.py b/libs/core/kiln_ai/adapters/fine_tune/dataset_formatter.py index b290319..ebbb397 100644 --- a/libs/core/kiln_ai/adapters/fine_tune/dataset_formatter.py +++ b/libs/core/kiln_ai/adapters/fine_tune/dataset_formatter.py @@ -25,6 +25,9 @@ class DatasetFormat(str, Enum): "huggingface_chat_template_toolcall_jsonl" ) + """Vertex Gemini 1.5 format (flash and pro)""" + VERTEX_GEMINI_1_5 = "vertex_gemini_1_5" + class FormatGenerator(Protocol): """Protocol for format generators""" @@ -121,11 +124,47 @@ def generate_huggingface_chat_template_toolcall( } +def generate_vertex_gemini_1_5( + task_run: TaskRun, system_message: str +) -> Dict[str, Any]: + """Generate Vertex Gemini 1.5 format (flash and pro)""" + # See https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-supervised-tuning-prepare + return { + "systemInstruction": { + "role": "system", + "parts": [ + { + "text": system_message, + } + ], + }, + "contents": [ + { + "role": "user", + "parts": [ + { + "text": task_run.input, + } + ], + }, + { + "role": "model", + "parts": [ + { + "text": task_run.output.output, + } + ], + }, + ], + } + + FORMAT_GENERATORS: Dict[DatasetFormat, FormatGenerator] = { DatasetFormat.OPENAI_CHAT_JSONL: generate_chat_message_response, DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL: generate_chat_message_toolcall, DatasetFormat.HUGGINGFACE_CHAT_TEMPLATE_JSONL: generate_huggingface_chat_template, DatasetFormat.HUGGINGFACE_CHAT_TEMPLATE_TOOLCALL_JSONL: generate_huggingface_chat_template_toolcall, + DatasetFormat.VERTEX_GEMINI_1_5: generate_vertex_gemini_1_5, }