fix

Signed-off-by: n1ck-guo <[email protected]>
intel · Jan 3, 2025 · 2637332 · 2637332
1 parent c941958
commit 2637332
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 13 deletions.
diff --git a/auto_round/auto_quantizer.py b/auto_round/auto_quantizer.py
@@ -416,16 +416,24 @@ def convert_model(self, model: nn.Module):
         data_type = quantization_config.data_type if hasattr(quantization_config,
                                                              "data_type") else "int"  # pragma: no cover
         sym = quantization_config.sym
-        to_quant_block_names = quantization_config.to_quant_block_names if hasattr(quantization_config,
-                                                                                   "to_quant_block_names") else None
+
         quant_block_list = quantization_config.quant_block_list if hasattr(quantization_config,
                                                                                    "quant_block_list") else None
-        if to_quant_block_names is None: # TODO check compatibility
-            all_blocks = get_block_names(model)
-        else:
-            all_blocks = get_multimodal_block_names(model, quant_vision=True)
+
         if quant_block_list is None:
-            quant_block_list = find_matching_blocks(model, all_blocks, to_quant_block_names)
+            to_quant_block_names = quantization_config.to_quant_block_names if hasattr(quantization_config,
+                                                                                   "to_quant_block_names") else None
+            if to_quant_block_names is not None:
+                if isinstance(to_quant_block_names, (list, tuple)):
+                    quant_block_list = to_quant_block_names
+                else:
+                    quant_block_list = []
+                    for block in to_quant_block_names.split(','):
+                        quant_block_list.append([f'{block}.{i}' for i in range(len(get_module(model, block)))])
+            else:
+                all_blocks = get_block_names(model)
+                quant_block_list = find_matching_blocks(model, all_blocks, to_quant_block_names)
+
         layer_names = get_layer_names_in_block(model, quant_block_list=quant_block_list)
 
         extra_config = {}

diff --git a/auto_round/special_model_handler.py b/auto_round/special_model_handler.py
@@ -33,6 +33,7 @@ def _get_deepseek_vl2_multimodal_block(model, quant_vision=False):
     block_names = []
     if quant_vision:
         block_names.append([f"vision.blocks.{i}" for i in range(len(model.vision.blocks))])
+        block_names.append([f"projector.layers.{i}" for i in range(len(model.projector.layers))])
     block_names.append([f"language.model.layers.{i}" for i in range(len(model.language.model.layers))])
     return block_names
 

diff --git a/test_cuda/test_support_vlms.py b/test_cuda/test_support_vlms.py
@@ -13,13 +13,13 @@
 class TestSupportVLMS(unittest.TestCase):
     @classmethod
     def setUpClass(self):
-        self.save_dir = os.path.join(os.path.dirname(__file__), "./ut_saved")
+        self.save_dir = os.path.join(os.path.dirname(__file__), "ut_saved")
         self.python_path = sys.executable
         self.device = 0
 
-    @classmethod
-    def tearDownClass(self):
-        shutil.rmtree(self.save_dir, ignore_errors=True)
+    # @classmethod
+    # def tearDownClass(self):
+    #     shutil.rmtree(self.save_dir, ignore_errors=True)
 
     def test_qwen2(self):
         model_path = "/models/Qwen2-VL-2B-Instruct/"
@@ -338,10 +338,65 @@ def test_deepseek_vl2(self):
         model_path = "/models/deepseek-vl2-tiny"
         res = os.system(
             f"cd .. && {self.python_path} -m auto_round --mllm "
-            f"--model {model_path} --iter 3 --nsamples 10 --bs 4 --output_dir {self.save_dir} --device auto"
+            f"--model {model_path} --iter 3 --nsamples 10 --bs 4 --output_dir {self.save_dir} --device auto --group_size 32 "
+            f"--fp_layers language.model.layer.4,language.model.layer.6"
             )
         self.assertFalse(res > 0 or res == -1, msg="deepseek vl2 tuning fail")
-        shutil.rmtree(self.save_dir, ignore_errors=True)
+
+        quantized_model_path = os.path.join(self.save_dir, "deepseek-vl2-tiny-w4g32-auto_round")
+        from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
+        from transformers import AutoModelForCausalLM
+        vl_chat_processor: DeepseekVLV2Processor = DeepseekVLV2Processor.from_pretrained(quantized_model_path)
+        tokenizer = vl_chat_processor.tokenizer
+
+        vl_gpt: DeepseekVLV2ForCausalLM = AutoModelForCausalLM.from_pretrained(
+            quantized_model_path,
+            trust_remote_code=True,
+            device_map="auto",
+            torch_dtype="auto",
+        )
+        vl_gpt = vl_gpt.eval()
+
+        image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
+        content = "Describe this image."
+
+        ## single image conversation example
+        conversation = [
+            {
+                "role": "<|User|>",
+                "content": content,
+            },
+            {"role": "<|Assistant|>", "content": ""},
+        ]
+
+        # load images and prepare for inputs
+        pil_images = Image.open(requests.get(image_url, stream=True).raw)
+        prepare_inputs = vl_chat_processor(
+            conversations=conversation,
+            images=[pil_images],
+            force_batchify=True,
+            system_prompt=""
+        )
+        prepare_inputs = prepare_inputs.to(vl_gpt.device)
+
+        # run image encoder to get the image embeddings
+        inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
+
+        # run the model to get the response
+        outputs = vl_gpt.language.generate(
+            input_ids = prepare_inputs["input_ids"],
+            inputs_embeds=inputs_embeds,
+            attention_mask=prepare_inputs.attention_mask,
+            pad_token_id=tokenizer.eos_token_id,
+            bos_token_id=tokenizer.bos_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            max_new_tokens=512,  
+            do_sample=False,  
+            use_cache=True 
+        )
+
+        answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
+        print(f"{prepare_inputs['sft_format'][0]}", answer)
 
 if __name__ == "__main__":
     unittest.main()