Skip to content

Commit

Permalink
fix a bug in autoround format with the latest transformers
Browse files Browse the repository at this point in the history
  • Loading branch information
wenhuach21 committed Jun 27, 2024
1 parent 3df8b03 commit 05eef70
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 14 deletions.
19 changes: 7 additions & 12 deletions auto_round/auto_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

from auto_round.utils import get_module, set_module, dynamic_import_inference_linear
import auto_round_extension.qbits.qlinear_qbits as qlinear_qbits

from enum import Enum
logger = getLogger(__name__)
import sys

Expand Down Expand Up @@ -194,6 +194,9 @@ def merge_quantization_configs(

return quantization_config

class AutoRoundQuantizationMethod(str, Enum):
AutoRound = "intel/auto-round"


@dataclass
class AutoRoundConfig(QuantizationConfigMixin):
Expand Down Expand Up @@ -222,6 +225,7 @@ def __init__(
weight_config: dict = None,
**kwargs,
):

self.bits = bits
self.tokenizer = tokenizer
self.dataset = dataset
Expand All @@ -232,7 +236,7 @@ def __init__(
if kwargs is not None:
for key in kwargs.keys():
setattr(self, key, kwargs[key])

self.quant_method = AutoRoundQuantizationMethod.AutoRound
self.post_init()

def get_loading_attributes(self):
Expand Down Expand Up @@ -378,11 +382,6 @@ def post_init_model(self, model):
The input model
"""

#
# if self.bits == 4: if get_device(model) == torch.device("cpu") or ( hasattr(model, "hf_device_map") and
# any(d in model.hf_device_map for d in ["cpu", "disk"]) ): raise ValueError( "Found modules on cpu/disk.
# Using Exllamav2 backend requires all the modules to be on GPU." "You can deactivate exllama backend by
# setting `disable_exllama=True` in the quantization config object" )

class StoreAttr(object):
pass
Expand All @@ -406,11 +405,7 @@ def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs
model = self.post_init_model(model)
else:
raise NotImplementedError
# if self.quantization_config.tokenizer is None:
# self.quantization_config.tokenizer = model.name_or_path
#
# self.optimum_quantizer.quantize_model(model, self.quantization_config.tokenizer)
# model.config.quantization_config = GPTQConfig.from_dict(self.optimum_quantizer.to_dict())


@property
def is_trainable(self, model: Optional["PreTrainedModel"] = None):
Expand Down
4 changes: 2 additions & 2 deletions auto_round/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class AutoRound(object):
not_use_best_mse (bool): Whether to use mean squared error (default is False).
dynamic_max_gap (int): The dynamic maximum gap (default is -1).
data_type (str): The data type to be used (default is "int").
scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
have different choices.
Returns:
Expand Down Expand Up @@ -1192,7 +1192,7 @@ class AutoOPTRound(AutoRound):
not_use_best_mse (bool): Whether to use mean squared error (default is False).
dynamic_max_gap (int): The dynamic maximum gap (default is -1).
data_type (str): The data type to be used (default is "int").
scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
have different choices.
**kwargs: Additional keyword arguments.
Expand Down

0 comments on commit 05eef70

Please sign in to comment.