Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
Signed-off-by: n1ck-guo <[email protected]>
  • Loading branch information
n1ck-guo committed Jan 3, 2025
1 parent c941958 commit 2637332
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 13 deletions.
22 changes: 15 additions & 7 deletions auto_round/auto_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,16 +416,24 @@ def convert_model(self, model: nn.Module):
data_type = quantization_config.data_type if hasattr(quantization_config,
"data_type") else "int" # pragma: no cover
sym = quantization_config.sym
to_quant_block_names = quantization_config.to_quant_block_names if hasattr(quantization_config,
"to_quant_block_names") else None

quant_block_list = quantization_config.quant_block_list if hasattr(quantization_config,
"quant_block_list") else None
if to_quant_block_names is None: # TODO check compatibility
all_blocks = get_block_names(model)
else:
all_blocks = get_multimodal_block_names(model, quant_vision=True)

if quant_block_list is None:
quant_block_list = find_matching_blocks(model, all_blocks, to_quant_block_names)
to_quant_block_names = quantization_config.to_quant_block_names if hasattr(quantization_config,
"to_quant_block_names") else None
if to_quant_block_names is not None:
if isinstance(to_quant_block_names, (list, tuple)):
quant_block_list = to_quant_block_names
else:
quant_block_list = []
for block in to_quant_block_names.split(','):
quant_block_list.append([f'{block}.{i}' for i in range(len(get_module(model, block)))])
else:
all_blocks = get_block_names(model)
quant_block_list = find_matching_blocks(model, all_blocks, to_quant_block_names)

layer_names = get_layer_names_in_block(model, quant_block_list=quant_block_list)

extra_config = {}
Expand Down
1 change: 1 addition & 0 deletions auto_round/special_model_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def _get_deepseek_vl2_multimodal_block(model, quant_vision=False):
block_names = []
if quant_vision:
block_names.append([f"vision.blocks.{i}" for i in range(len(model.vision.blocks))])
block_names.append([f"projector.layers.{i}" for i in range(len(model.projector.layers))])
block_names.append([f"language.model.layers.{i}" for i in range(len(model.language.model.layers))])
return block_names

Expand Down
67 changes: 61 additions & 6 deletions test_cuda/test_support_vlms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
class TestSupportVLMS(unittest.TestCase):
@classmethod
def setUpClass(self):
self.save_dir = os.path.join(os.path.dirname(__file__), "./ut_saved")
self.save_dir = os.path.join(os.path.dirname(__file__), "ut_saved")
self.python_path = sys.executable
self.device = 0

@classmethod
def tearDownClass(self):
shutil.rmtree(self.save_dir, ignore_errors=True)
# @classmethod
# def tearDownClass(self):
# shutil.rmtree(self.save_dir, ignore_errors=True)

def test_qwen2(self):
model_path = "/models/Qwen2-VL-2B-Instruct/"
Expand Down Expand Up @@ -338,10 +338,65 @@ def test_deepseek_vl2(self):
model_path = "/models/deepseek-vl2-tiny"
res = os.system(
f"cd .. && {self.python_path} -m auto_round --mllm "
f"--model {model_path} --iter 3 --nsamples 10 --bs 4 --output_dir {self.save_dir} --device auto"
f"--model {model_path} --iter 3 --nsamples 10 --bs 4 --output_dir {self.save_dir} --device auto --group_size 32 "
f"--fp_layers language.model.layer.4,language.model.layer.6"
)
self.assertFalse(res > 0 or res == -1, msg="deepseek vl2 tuning fail")
shutil.rmtree(self.save_dir, ignore_errors=True)

quantized_model_path = os.path.join(self.save_dir, "deepseek-vl2-tiny-w4g32-auto_round")
from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
from transformers import AutoModelForCausalLM
vl_chat_processor: DeepseekVLV2Processor = DeepseekVLV2Processor.from_pretrained(quantized_model_path)
tokenizer = vl_chat_processor.tokenizer

vl_gpt: DeepseekVLV2ForCausalLM = AutoModelForCausalLM.from_pretrained(
quantized_model_path,
trust_remote_code=True,
device_map="auto",
torch_dtype="auto",
)
vl_gpt = vl_gpt.eval()

image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
content = "Describe this image."

## single image conversation example
conversation = [
{
"role": "<|User|>",
"content": content,
},
{"role": "<|Assistant|>", "content": ""},
]

# load images and prepare for inputs
pil_images = Image.open(requests.get(image_url, stream=True).raw)
prepare_inputs = vl_chat_processor(
conversations=conversation,
images=[pil_images],
force_batchify=True,
system_prompt=""
)
prepare_inputs = prepare_inputs.to(vl_gpt.device)

# run image encoder to get the image embeddings
inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)

# run the model to get the response
outputs = vl_gpt.language.generate(
input_ids = prepare_inputs["input_ids"],
inputs_embeds=inputs_embeds,
attention_mask=prepare_inputs.attention_mask,
pad_token_id=tokenizer.eos_token_id,
bos_token_id=tokenizer.bos_token_id,
eos_token_id=tokenizer.eos_token_id,
max_new_tokens=512,
do_sample=False,
use_cache=True
)

answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
print(f"{prepare_inputs['sft_format'][0]}", answer)

if __name__ == "__main__":
unittest.main()

0 comments on commit 2637332

Please sign in to comment.