From a8d881d5fbcb324bbfb6bb63badec0eeb69467b9 Mon Sep 17 00:00:00 2001 From: lmeribal Date: Tue, 6 Aug 2024 14:21:52 +0300 Subject: [PATCH 01/10] Multimodal dataset example --- docs/dataset_example.md | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/docs/dataset_example.md b/docs/dataset_example.md index 7ca2f81..6ea91c5 100644 --- a/docs/dataset_example.md +++ b/docs/dataset_example.md @@ -11,7 +11,7 @@ - [Pair Preferences Dataset](#-pair-preferences-dataset) - [KTO Dataset](#-kto-dataset) - [Sampling Dataset](#-sampling-dataset) -- [Multimodal Dataset ](#-multimodal-dataset) (⌛️ Work in progress...) +- [Multimodal Dataset ](#-multimodal-dataset) - [Classification Dataset](#-classification-dataset) - [DPPO Dataset](#-ddpo-dataset) (⌛️ Work in progress...) @@ -118,9 +118,45 @@ Example: ## Multimodal Dataset -⌛️ in progress.. +- `messages`: `list[MultimodalChatMessage]` — This is a sequence of messages that make up the chat history. Each `ChatMessage` includes: + - `role` - The participant's role in the conversation (e.g., `user` or `bot`). + - `type` – The type of modality (e.g., `text` or `image`) + - `content` - If the `type` is `text`, it's the textual content of the message. If it's `image`, it's the file path. +Example: +```json +{ + "id": "0", + "messages": [ + { + "role": "system", + "type": "text", + "content": "You are a Multimodal AI assistant." + }, + { + "role": "user", + "type": "image", + "content": "/path/to/cat.jpg" + }, + { + "role": "user", + "type": "image", + "content": "/path/to/dog.jpg" + }, + { + "role": "user", + "type": "text", + "content": "What's the difference between these two images?" + }, + { + "role": "bot", + "type": "text", + "content": "The two images in question both feature animals, albeit of different species. The first image depicts a dog, which is generally perceived as an animal that elicits positive emotional responses. The second image features a cat, which is also regarded as an animal that evokes a positive emotional response." + } + ] +} +``` From a03e995c13472e7c16bf1093619d6eb5f98e84ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D1=8B=D0=BA=D0=BE=D0=B2=20=D0=95=D0=BB=D0=B8=D1=81?= =?UTF-8?q?=D0=B5=D0=B9=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B5=D0=B2=D0=B8?= =?UTF-8?q?=D1=87?= Date: Mon, 19 Aug 2024 11:34:07 +0000 Subject: [PATCH 02/10] in progress --- tutorials/multimodal/multimodal.json | 195 +++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 tutorials/multimodal/multimodal.json diff --git a/tutorials/multimodal/multimodal.json b/tutorials/multimodal/multimodal.json new file mode 100644 index 0000000..1b5040a --- /dev/null +++ b/tutorials/multimodal/multimodal.json @@ -0,0 +1,195 @@ +{ + "train_dataset_settings": { + "sources": [ + { + "name": "train", + "records_path": "tests/fixtures/datasets/multimodal/image_chat.jsonl", + "sample_rate": 1 + } + ], + "prompt_template": { + "role_tag_mapping": { + "bot": "", + "user": "", + "system": "" + }, + "prefix_template": "{role}", + "suffix_template": "" + }, + "modality_token_mapping": { + "image": "", + "audio": "