Skip to content

Commit

Permalink
Add the Gemini model format
Browse files Browse the repository at this point in the history
  • Loading branch information
scosman committed Jan 14, 2025
1 parent 3f656ff commit 3840687
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@
"download_huggingface_chat_template_toolcall",
"Download: HuggingFace chat template with tool calls (JSONL)",
])
available_model_select.push([
"download_vertex_gemini_1_5",
"Download: Google Vertex-AI Gemini 1.5 format (JSONL)",
])
}
const download_model_select_options: Record<string, string> = {
Expand All @@ -138,6 +142,7 @@
download_huggingface_chat_template: "huggingface_chat_template_jsonl",
download_huggingface_chat_template_toolcall:
"huggingface_chat_template_toolcall_jsonl",
download_vertex_gemini_1_5: "vertex_gemini_1_5",
}
let datasets: DatasetSplit[] | null = null
Expand Down Expand Up @@ -751,6 +756,7 @@
[disabled_header, "Select a split strategy"],
["train_test", "Train/Test -- 80/20"],
["train_test_val", "Train/Test/Val -- 60/20/20"],
["train_test_val_80", "Train/Test/Val -- 80/10/10"],
["all", "Entire Dataset -- 100"],
]}
bind:value={new_dataset_split}
Expand Down
39 changes: 39 additions & 0 deletions libs/core/kiln_ai/adapters/fine_tune/dataset_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ class DatasetFormat(str, Enum):
"huggingface_chat_template_toolcall_jsonl"
)

"""Vertex Gemini 1.5 format (flash and pro)"""
VERTEX_GEMINI_1_5 = "vertex_gemini_1_5"


class FormatGenerator(Protocol):
"""Protocol for format generators"""
Expand Down Expand Up @@ -121,11 +124,47 @@ def generate_huggingface_chat_template_toolcall(
}


def generate_vertex_gemini_1_5(
task_run: TaskRun, system_message: str
) -> Dict[str, Any]:
"""Generate Vertex Gemini 1.5 format (flash and pro)"""
# See https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-supervised-tuning-prepare
return {
"systemInstruction": {
"role": "system",
"parts": [
{
"text": system_message,
}
],
},
"contents": [
{
"role": "user",
"parts": [
{
"text": task_run.input,
}
],
},
{
"role": "model",
"parts": [
{
"text": task_run.output.output,
}
],
},
],
}


FORMAT_GENERATORS: Dict[DatasetFormat, FormatGenerator] = {
DatasetFormat.OPENAI_CHAT_JSONL: generate_chat_message_response,
DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL: generate_chat_message_toolcall,
DatasetFormat.HUGGINGFACE_CHAT_TEMPLATE_JSONL: generate_huggingface_chat_template,
DatasetFormat.HUGGINGFACE_CHAT_TEMPLATE_TOOLCALL_JSONL: generate_huggingface_chat_template_toolcall,
DatasetFormat.VERTEX_GEMINI_1_5: generate_vertex_gemini_1_5,
}


Expand Down

0 comments on commit 3840687

Please sign in to comment.