Skip to content

Commit

Permalink
better use of tokenizer
Browse files Browse the repository at this point in the history
Signed-off-by: n1ck-guo <[email protected]>
  • Loading branch information
n1ck-guo committed Dec 27, 2024
1 parent 05dece5 commit 7a91f2b
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions auto_round/mllm/autoround_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,13 @@ def _only_text_test(model, tokenizer, device, model_type):
if model_type in SUPPORT_ONLY_TEXT_MODELS: # save time
return True

new_tokenizer = deepcopy(tokenizer)
device = detect_device(device)
text = ["only text", "test"]
tokenizer.padding_side = 'left'
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
new_tokenizer.padding_side = 'left'
if new_tokenizer.pad_token is None:
new_tokenizer.pad_token = new_tokenizer.eos_token
inputs = new_tokenizer(text, return_tensors="pt", padding=True, truncation=True)

try:
inputs = inputs.to(device)
Expand Down Expand Up @@ -182,7 +183,7 @@ def __init__(
if isinstance(dataset, str):
if quant_nontext_module or \
(dataset in CALIB_DATASETS.keys() and not \
_only_text_test(model, deepcopy(tokenizer), device, self.template.model_type)):
_only_text_test(model, tokenizer, device, self.template.model_type)):
if quant_nontext_module:
logger.warning(f"Text only dataset cannot be used for calibrating non-text modules,"
"switching to liuhaotian/llava_conv_58k")
Expand Down

0 comments on commit 7a91f2b

Please sign in to comment.