intel · wenhuach21 · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024
diff --git a/auto_round/auto_quantizer.py b/auto_round/auto_quantizer.py
@@ -255,7 +255,7 @@ class AutoRoundQuantizer(HfQuantizer):
 
     def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
         super().__init__(quantization_config, **kwargs)
-        self.exllama2_available = is_autoround_exllamav2_available
+        self.exllama2_available = is_autoround_exllamav2_available()
 
     def validate_environment(self, *args, **kwargs):
         if not is_auto_round_available():
@@ -311,7 +311,7 @@ def convert_model(self, model: nn.Module):
         return model
 
     def _dynamic_import_inference_linear(self, bits, backend):
-        if bits == 4 and self.exllama2_available and "exllama2" in backend:
+        if bits == 4 and self.exllama2_available and "exllamav2" in backend:
             from auto_round_extension.cuda.qliner_exllamav2 import QuantLinear
         else:
             from auto_round_extension.cuda.qliner_triton import QuantLinear

diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -70,14 +70,13 @@ def __init__(self, orig_layer, enable_minmax_tuning=True):
         self.scale_dtype = self.orig_layer.scale_dtype
         self.sym = self.orig_layer.sym
         weight_dtype = self.orig_layer.weight.dtype
-        weight_dtype = torch.float32  ##TODO revert the change to check the accuracy
+        weight_dtype = torch.float32
         self.value = torch.nn.Parameter(
             torch.zeros(self.orig_layer.weight.shape, device=self.orig_layer.weight.device, dtype=weight_dtype),
             requires_grad=True,
         )
         self.enable_minmax_tuning = enable_minmax_tuning
         shape = get_scale_shape(self.orig_layer.weight, self.group_size)
-        weight_dtype = self.orig_layer.weight.dtype
         if self.enable_minmax_tuning:
             self.min_scale = torch.nn.Parameter(
                 torch.zeros(shape, device=self.orig_layer.weight.device, dtype=weight_dtype), requires_grad=True
@@ -178,8 +177,7 @@ def __init__(self, orig_layer, enable_minmax_tuning=True):
         self.sym = self.orig_layer.sym
         self.scale_dtype = self.orig_layer.scale_dtype
         weight_dtype = self.orig_layer.weight.dtype
-        weight_dtype = torch.float32  ##TODO revert the change to check the accuracy
-
+        weight_dtype = torch.float32
         device = self.orig_layer.weight.device
         self.weight_t = self.orig_layer.weight.t()
         self.value = torch.nn.Parameter(