fix a bug in autoround format with the latest transformers

intel · Jun 27, 2024 · 05eef70 · 05eef70
1 parent 3df8b03
commit 05eef70
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 14 deletions.
diff --git a/auto_round/auto_quantizer.py b/auto_round/auto_quantizer.py
@@ -43,7 +43,7 @@
 
 from auto_round.utils import get_module, set_module, dynamic_import_inference_linear
 import auto_round_extension.qbits.qlinear_qbits as qlinear_qbits
-
+from enum import Enum
 logger = getLogger(__name__)
 import sys
 
@@ -194,6 +194,9 @@ def merge_quantization_configs(
 
         return quantization_config
 
+class AutoRoundQuantizationMethod(str, Enum):
+    AutoRound = "intel/auto-round"
+
 
 @dataclass
 class AutoRoundConfig(QuantizationConfigMixin):
@@ -222,6 +225,7 @@ def __init__(
             weight_config: dict = None,
             **kwargs,
     ):
+
         self.bits = bits
         self.tokenizer = tokenizer
         self.dataset = dataset
@@ -232,7 +236,7 @@ def __init__(
         if kwargs is not None:
             for key in kwargs.keys():
                 setattr(self, key, kwargs[key])
-
+        self.quant_method = AutoRoundQuantizationMethod.AutoRound
         self.post_init()
 
     def get_loading_attributes(self):
@@ -378,11 +382,6 @@ def post_init_model(self, model):
                 The input model
         """
 
-        #
-        # if self.bits == 4: if get_device(model) == torch.device("cpu") or ( hasattr(model, "hf_device_map") and
-        # any(d in model.hf_device_map for d in ["cpu", "disk"]) ): raise ValueError( "Found modules on cpu/disk.
-        # Using Exllamav2 backend requires all the modules to be on GPU." "You can deactivate exllama backend by
-        # setting `disable_exllama=True` in the quantization config object" )
 
         class StoreAttr(object):
             pass
@@ -406,11 +405,7 @@ def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs
             model = self.post_init_model(model)
         else:
             raise NotImplementedError
-            # if self.quantization_config.tokenizer is None:
-            #     self.quantization_config.tokenizer = model.name_or_path
-            #
-            # self.optimum_quantizer.quantize_model(model, self.quantization_config.tokenizer)
-            # model.config.quantization_config = GPTQConfig.from_dict(self.optimum_quantizer.to_dict())
+
 
     @property
     def is_trainable(self, model: Optional["PreTrainedModel"] = None):

diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -87,7 +87,7 @@ class AutoRound(object):
         not_use_best_mse (bool): Whether to use mean squared error (default is False).
         dynamic_max_gap (int): The dynamic maximum gap (default is -1).
         data_type (str): The data type to be used (default is "int").
-        scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
+        scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
                            have different choices.
 
     Returns:
@@ -1192,7 +1192,7 @@ class AutoOPTRound(AutoRound):
         not_use_best_mse (bool): Whether to use mean squared error (default is False).
         dynamic_max_gap (int): The dynamic maximum gap (default is -1).
         data_type (str): The data type to be used (default is "int").
-        scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
+        scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
                            have different choices.
         **kwargs: Additional keyword arguments.