Skip to content

Commit

Permalink
add qvision awq generation ut
Browse files Browse the repository at this point in the history
Signed-off-by: Zhang, Weiwei1 <[email protected]>
  • Loading branch information
WeiweiZhang1 committed Dec 30, 2024
1 parent 8f70cce commit 2311f26
Showing 1 changed file with 67 additions and 4 deletions.
71 changes: 67 additions & 4 deletions test_cuda/test_support_vlms.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_phi3(self):
## test tune
res = os.system(
f"cd .. && {self.python_path} -m auto_round --mllm "
f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}")
f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}")
self.assertFalse(res > 0 or res == -1, msg="Phi-3.5 tuning fail")

## test infer
Expand Down Expand Up @@ -114,11 +114,74 @@ def test_phi3(self):
image_inputs = Image.open(requests.get(image_url, stream=True).raw)
inputs = processor(prompt, image_inputs, return_tensors="pt").to(model.device)

generation_args = {
generation_args = {
"max_new_tokens": 1000,
"temperature": 0.0,
"do_sample": False,
}

generate_ids = model.generate(**inputs,
eos_token_id=processor.tokenizer.eos_token_id,
**generation_args
)

# remove input tokens
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
response = processor.batch_decode(generate_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False)[0]
print(response)
shutil.rmtree(quantized_model_path, ignore_errors=True)

def test_phi3_vision_awq(self):
model_path = "/models/Phi-3.5-vision-instruct/"
## test tune
res = os.system(
f"cd .. && {self.python_path} -m auto_round --mllm "
f"--model {model_path} --iter 2 --quant_nontext_module "
f"--nsample 64 --seqlen 32 "
f"--format auto_awq --output_dir {self.save_dir} --device {self.device}")
self.assertFalse(res > 0 or res == -1, msg="Phi-3.5 tuning fail")

## test infer
from transformers import AutoModelForCausalLM, AutoProcessor
from auto_round.export.export_to_awq import WQLinear_GEMM
quantized_model_path = os.path.join(self.save_dir, "Phi-3.5-vision-instruct-w4g128-auto_awq")
res = os.system(f"cp /models/Phi-3.5-vision-instruct/*.py {quantized_model_path}")
model = AutoModelForCausalLM.from_pretrained(
quantized_model_path,
device_map=f"cuda:{self.device}",
trust_remote_code=True,
torch_dtype="auto"
)
assert "WQLinear_GEMM" in str(
type(model.model.vision_embed_tokens.img_processor.vision_model.encoder.layers[0].mlp.fc1)), \
"model quantization failed."
processor = AutoProcessor.from_pretrained(quantized_model_path,
trust_remote_code=True,
num_crops=4
)

image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
content = "Describe this image."
messages = [
{"role": "user",
"content": "<|image_1|>\n"+content},
]

prompt = processor.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
image_inputs = Image.open(requests.get(image_url, stream=True).raw)
inputs = processor(prompt, image_inputs, return_tensors="pt").to(model.device)

generation_args = {
"max_new_tokens": 1000,
"temperature": 0.0,
"do_sample": False,
}
}

generate_ids = model.generate(**inputs,
eos_token_id=processor.tokenizer.eos_token_id,
Expand Down Expand Up @@ -272,4 +335,4 @@ def test_72b(self):
shutil.rmtree(self.save_dir, ignore_errors=True)

if __name__ == "__main__":
unittest.main()
unittest.main()

0 comments on commit 2311f26

Please sign in to comment.