Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Qwen/Qwen2.5-7B-Instruct model layer_wise_quant function error #2017

Open
hadoop2xu opened this issue Sep 30, 2024 · 0 comments
Open

Qwen/Qwen2.5-7B-Instruct model layer_wise_quant function error #2017

hadoop2xu opened this issue Sep 30, 2024 · 0 comments

Comments

@hadoop2xu
Copy link

hadoop2xu commented Sep 30, 2024

my code:

class NewDataloader:
    def __init__(self, batch_size, **kwargs):
        self.batch_size = batch_size
    def __iter__(self):
        yield torch.tensor([1986,   374,    279,   2086,  11652,     13, 151643, 151643, 151643,
               151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
               151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
               151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
               151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
               151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
               151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643], device="cpu"), 
               torch.tensor(1, device="cpu")

def lmq_quant():
    eval_dataloader = NewDataloader(batch_size=1)
    model_path = "Qwen/Qwen2.5-7B-Instruct"
    fp32_model = load_empty_model(model_path, torchscript=True)
    conf = PostTrainingQuantConfig(
        approach="weight_only",
        recipes={"layer_wise_quant": True,"rtn_args": {"enable_full_range": True}, "smooth_quant": True},
        tuning_criterion=TuningCriterion(
            timeout=1000,
            max_trials=100,
        ),
    )

    q_model = quantization.fit(
        fp32_model,
        conf,
        calib_dataloader=eval_dataloader,
        eval_func=lambda x: 0.1,
    )
    print("q_model: ", q_model)
    ouput_dir = "./saved_model"
    q_model.save(ouput_dir)

error :
2024-10-06 10:05:13 [WARNING][logger.py:132] [Strategy] Please install mpi4py correctly if using distributed tuning; otherwise, ignore this warning.
2024-10-06 10:05:13 [INFO][logger.py:114] SmoothQuant args 'folding' is not set, it's False now.
/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/modeling_utils.py:4779: FutureWarning: _is_quantized_training_enabled is going to be deprecated in transformers 4.39.0. Please use model.hf_quantizer.is_trainable instead
warnings.warn(
/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if sequence_length != 1:
2024-10-06 10:05:13 [WARNING][logger.py:132] tuple index out of range
2024-10-06 10:05:13 [WARNING][logger.py:132] Jit trace in GraphTrace failed, absorb layer detection is skipped
2024-10-06 10:05:13 [INFO][logger.py:114] Calibrating...
2024-10-06 10:05:13 [ERROR][logger.py:96] Unexpected exception TypeError("embedding(): argument 'indices' (position 2) must be Tensor, not tuple") happened during tuning.
Traceback (most recent call last):
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/utils.py", line 95, in model_forward
output = forward_wrapper(model, input, device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/utils.py", line 85, in forward_wrapper
output = model(input)
^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1167, in forward
outputs = self.model(
^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 952, in forward
position_embeddings = self.rotary_emb(hidden_states, position_ids)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 215, in forward
with torch.autocast(device_type=device_type, enabled=False):
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/amp/autocast_mode.py", line 241, in init
raise RuntimeError(
RuntimeError: User specified an unsupported autocast device_type 'meta'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/utils.py", line 81, in forward_wrapper
output = model(*input)
^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1167, in forward
outputs = self.model(
^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 945, in forward
causal_mask = self._update_causal_mask(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1036, in _update_causal_mask
if AttentionMaskConverter._ignore_causal_mask_sdpa(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/modeling_attn_mask_utils.py", line 284, in _ignore_causal_mask_sdpa
elif (is_training or not is_tracing) and torch.all(attention_mask == 1):
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
NotImplementedError: aten::_local_scalar_dense: attempted to run this operator with Meta tensors, but there was no abstract impl or Meta kernel registered. You may have run into this message while using an operator with PT2 compilation APIs (torch.compile/torch.export); in order to use this operator with those APIs you'll need to add an abstract impl.Please see the following doc for next steps: https://docs.google.com/document/d/1_W62p8WJOQQUzPsJYa7s701JXt0qf2OfLub2sbkHOaU/edit

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/quantization.py", line 220, in fit
strategy.traverse()
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/strategy/auto.py", line 140, in traverse
super().traverse()
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/strategy/strategy.py", line 482, in traverse
self._setup_pre_tuning_algo_scheduler()
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/strategy/strategy.py", line 361, in _setup_pre_tuning_algo_scheduler
self.model = self._pre_tuning_algo_scheduler("pre_quantization")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/algorithm/algorithm.py", line 127, in call
self._q_model = algo(self._origin_model, self._q_model, self._adaptor, self._dataloader, self._calib_iter)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/algorithm/smooth_quant.py", line 89, in call
q_model = adaptor.smooth_quant(
^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/pytorch.py", line 1794, in smooth_quant
model._model = self.sq.transform(
^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py", line 429, in transform
self.absorb_to_layer = self._parse_absorb_to_layers(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py", line 363, in _parse_absorb_to_layers
input_mins, input_maxes = calib.calibrate(
^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/calibration.py", line 112, in calibrate
self._dump_min_max(calib_iter=calib_iter)
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/calibration.py", line 92, in _dump_min_max
model_forward(self.model, self.dataloader, calib_iter, self.device)
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/utils.py", line 102, in model_forward
output = forward_wrapper(model, input, device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/neural_compressor-3.1.dev25+g72398b6933-py3.12.egg/neural_compressor/adaptor/torch_utils/waq/utils.py", line 83, in forward_wrapper
output = model(input)
^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1167, in forward
outputs = self.model(
^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 935, in forward
inputs_embeds = self.embed_tokens(input_ids)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/modules/sparse.py", line 163, in forward
return F.embedding(
^^^^^^^^^^^^
File "/share_data/users/xuhaifeng/miniconda3/lib/python3.12/site-packages/torch/nn/functional.py", line 2264, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not tuple
2024-10-06 10:05:13 [ERROR][logger.py:96] Specified timeout or max trials is reached! Not found any quantized model which meet accuracy goal. Exit.
q_model: None
Traceback (most recent call last):
File "/share_data/users/xuhaifeng/test_lwq.py", line 221, in
lmq_quant()
File "/share_data/users/xuhaifeng/test_lwq.py", line 181, in lmq_quant
q_model.save(ouput_dir)
^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'save'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant