diff --git a/configs/exp/train/multimodal/attention_based_selector.json b/configs/exp/train/multimodal/attention_based_selector.json
index b22f3c3..a90dc4c 100644
--- a/configs/exp/train/multimodal/attention_based_selector.json
+++ b/configs/exp/train/multimodal/attention_based_selector.json
@@ -127,9 +127,9 @@
         "save_total_limit": 5,
         "load_best_model_at_end": false,
         "deepspeed": "configs/exp/deepspeed/stage2.json",
-        "dispatch_batches": false,
         "dataloader_num_workers": 16,
-        "dataloader_pin_memory": false
+        "dataloader_pin_memory": false,
+        "dispatch_batches": false
     },
     "wandb_settings": {
         "project_name": "rykov",
diff --git a/configs/exp/train/multimodal/attention_pooling.json b/configs/exp/train/multimodal/attention_pooling.json
index 0b48b50..ba51284 100644
--- a/configs/exp/train/multimodal/attention_pooling.json
+++ b/configs/exp/train/multimodal/attention_pooling.json
@@ -63,11 +63,7 @@
                 },
                 "audio": null
             },
-<<<<<<< HEAD
         "n_modality_embeddings": 64,
-=======
-        "n_modality_embeddings": 128,
->>>>>>> 1167a3408adc49898aa4b00c6235adbfceabf9d1
         "start_modality_token": "<MS>",
         "end_modality_token": "</MS>",
         "dataset_type": "multimodal",
@@ -186,11 +182,7 @@
             },
             "dataset_type": "multimodal",
             "max_tokens_count": 2000,
-<<<<<<< HEAD
             "n_modality_embeddings": 64,
-=======
-            "n_modality_embeddings": 128,
->>>>>>> 1167a3408adc49898aa4b00c6235adbfceabf9d1
             "start_modality_token": "<MS>",
             "end_modality_token": "</MS>",
             "only_answer_loss": true,
diff --git a/tests/fixtures/configs/train/multimodal/llama_llava_base_clip.json b/tests/fixtures/configs/train/multimodal/llama_llava_base_clip.json
index d95ee19..68a105f 100644
--- a/tests/fixtures/configs/train/multimodal/llama_llava_base_clip.json
+++ b/tests/fixtures/configs/train/multimodal/llama_llava_base_clip.json
@@ -22,12 +22,12 @@
       },
       "modality_reader_settings_mapping": {
         "image": {
-          "reader_type": "clip",
+          "reader_type": "pickle",
           "reader_path": "tests/fixtures/models/clip_tiny"
         },
         "audio": null
       },
-      "n_modality_embeddings": 225,
+      "n_modality_embeddings": 32,
       "start_modality_token": "<MS>",
       "end_modality_token": "</MS>",
       "dataset_type": "multimodal",
@@ -58,12 +58,12 @@
       },
       "modality_reader_settings_mapping": {
         "image": {
-          "reader_type": "clip",
+          "reader_type": "pickle",
           "reader_path": "tests/fixtures/models/clip_tiny"
         },
         "audio": null
       },
-      "n_modality_embeddings": 225,
+      "n_modality_embeddings": 32,
       "start_modality_token": "<MS>",
       "end_modality_token": "</MS>",
       "dataset_type": "multimodal",
@@ -111,35 +111,40 @@
     "trainer_settings": {
       "evaluation_strategy": "epoch",
       "save_strategy": "epoch",
-      "per_device_train_batch_size": 1,
-      "per_device_eval_batch_size": 1,
-      "gradient_accumulation_steps": 1,
+      "per_device_train_batch_size": 2,
+      "per_device_eval_batch_size": 2,
+      "gradient_accumulation_steps": 2,
       "logging_steps": 1,
       "learning_rate": 0.00002,
       "num_train_epochs": 1,
       "lr_scheduler_type": "cosine",
       "warmup_steps": 0,
       "fp16": false,
-      "bf16": false,
+      "bf16": true,
       "optim": "adamw_torch",
       "save_total_limit": 1,
-      "no_cuda": true
+      "no_cuda": false,
+      "dispatch_batches": false,
+      "load_best_model_at_end": false,
+      "deepspeed": null,
+      "dataloader_num_workers": 16,
+      "dataloader_pin_memory": false
     },
     "logging_settings": {
-      "project_name": "alignment",
-      "run_name": "multimodal",
-      "entity": "turbo-alignment"
+        "project_name": "rykov",
+        "run_name": "multimodal",
+        "entity": "rykov"
     },
     "modality_encoder_settings_mapping": {
       "image": {
         "modality_encoder_type": "clip",
-        "is_pickle": false,
+        "is_pickle": true,
         "encoder_path": "tests/fixtures/models/clip_tiny"
       },
       "audio": null
     },
     "modality_projector_mapping": {
-      "image": "llava",
+      "image": "top_k_attention_pooling_with_n_heads",
       "audio": null
     },
     "modality_projector_initialization_mapping": {
@@ -176,7 +181,7 @@
         },
         "dataset_type": "multimodal",
         "max_tokens_count": 2000,
-        "n_modality_embeddings": 225,
+        "n_modality_embeddings": 32,
         "start_modality_token": "<MS>",
         "end_modality_token": "</MS>",
         "only_answer_loss": true,
@@ -186,7 +191,7 @@
         },
         "modality_reader_settings_mapping": {
           "image": {
-            "reader_type": "clip",
+            "reader_type": "pickle",
             "reader_path": "tests/fixtures/models/clip_tiny"
           },
           "audio": null
diff --git a/tests/fixtures/datasets/multimodal/image_chat.jsonl b/tests/fixtures/datasets/multimodal/image_chat.jsonl
index e53f0b5..70761bc 100644
--- a/tests/fixtures/datasets/multimodal/image_chat.jsonl
+++ b/tests/fixtures/datasets/multimodal/image_chat.jsonl
@@ -1,4 +1,4 @@
-{"id": "0", "messages": [{"role": "user", "type": "image", "content": "tests/fixtures/datasets/multimodal/images/img_1.jpg"}, {"role": "user", "type": "text", "content": "Describe the scene"}, {"role": "bot", "type": "text", "content": "Sorry, I will not describe the scene."}]}
-{"id": "1", "messages": [{"role": "user", "type": "image", "content": "tests/fixtures/datasets/multimodal/images/img_2.jpg"}, {"role": "user", "type": "text", "content": "What do you see on the image?"}, {"role": "bot", "type": "text", "content": "I see nothing."}, {"role": "user", "type": "text", "content": "What about this one?"}, {"role": "user", "type": "image", "content": "tests/fixtures/datasets/multimodal/images/img_1.jpg"}, {"role": "bot", "type": "text", "content": "Sorry..."}]}
-{"id": "2", "messages": [{"role": "user", "type": "image", "content": "tests/fixtures/datasets/multimodal/images/img_3.jpg"}, {"role": "user", "type": "image", "content": "tests/fixtures/datasets/multimodal/images/img_4.jpg"}, {"role": "user", "type": "text", "content": "Please, describe these two photos."}, {"role": "bot", "type": "text", "content": "OK."}]}
-{"id": "3", "messages": [{"role": "user", "type": "image", "content": "tests/fixtures/datasets/multimodal/images/img_5.jpg"}, {"role": "user", "type": "text", "content": "Describe the scene"}, {"role": "bot", "type": "text", "content": "No."}]}
\ No newline at end of file
+{"id": "0", "messages": [{"role": "user", "type": "image", "content": "images/00000/000000935.jpg"}, {"role": "user", "type": "text", "content": "Describe the scene"}, {"role": "bot", "type": "text", "content": "Sorry, I will not describe the scene."}]}
+{"id": "1", "messages": [{"role": "user", "type": "image", "content": "images/00000/000000934.jpg"}, {"role": "user", "type": "text", "content": "What do you see on the image?"}, {"role": "bot", "type": "text", "content": "I see nothing."}, {"role": "user", "type": "text", "content": "What about this one?"}]}
+{"id": "2", "messages": [{"role": "user", "type": "image", "content": "images/00000/000000933.jpg"}, {"role": "user", "type": "text", "content": "Please, describe these two photos."}, {"role": "bot", "type": "text", "content": "OK."}]}
+{"id": "3", "messages": [{"role": "user", "type": "image", "content": "images/00000/000000932.jpg"}, {"role": "user", "type": "text", "content": "Describe the scene"}, {"role": "bot", "type": "text", "content": "No."}]}
\ No newline at end of file