From 15d013aa5f9acebdba4dd50806d185f1b1935f89 Mon Sep 17 00:00:00 2001 From: Elisei Rykov Date: Sat, 28 Sep 2024 15:44:15 +0300 Subject: [PATCH] in progress --- .../multimodal/attention_based_selector.json | 205 ++++++++++++++++++ .../train/multimodal/attention_pooling.json | 205 ++++++++++++++++++ configs/exp/train/multimodal/llava.json | 205 ++++++++++++++++++ .../train/multimodal/llava_non_pickled.json | 205 ++++++++++++++++++ .../accelerate_llava_next_data.json | 14 ++ configs/utils/preprocess/coco2014_clip.json | 13 +- configs/utils/preprocess/llava_next_data.json | 14 ++ .../common/data/multimodal/common.py | 3 +- .../dataset/multimodal/collators.py | 2 +- 9 files changed, 859 insertions(+), 7 deletions(-) create mode 100644 configs/exp/train/multimodal/attention_based_selector.json create mode 100644 configs/exp/train/multimodal/attention_pooling.json create mode 100644 configs/exp/train/multimodal/llava.json create mode 100644 configs/exp/train/multimodal/llava_non_pickled.json create mode 100644 configs/utils/preprocess/accelerate_llava_next_data.json create mode 100644 configs/utils/preprocess/llava_next_data.json diff --git a/configs/exp/train/multimodal/attention_based_selector.json b/configs/exp/train/multimodal/attention_based_selector.json new file mode 100644 index 0000000..b22f3c3 --- /dev/null +++ b/configs/exp/train/multimodal/attention_based_selector.json @@ -0,0 +1,205 @@ +{ + "train_dataset_settings": { + "sources": [ + { + "name": "train", + "records_path": "/from_s3/dataset/llava_next_data_dialogs/train_chat.jsonl", + "sample_rate": 1.0 + } + ], + "prompt_template": { + "role_tag_mapping": { + "bot": "assistant", + "user": "user", + "system": "system" + }, + "prefix_template": "<|start_header_id|>{role}<|end_header_id|>\n\n", + "suffix_template": "<|eot_id|>" + }, + "modality_token_mapping": { + "image": "", + "audio": "