diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py index ae2f7317abe..103015642c6 100644 --- a/optimum/gptq/quantizer.py +++ b/optimum/gptq/quantizer.py @@ -245,10 +245,10 @@ def _replace_by_quant_layers(self, module: nn.Module, names: List[str], name: st out_features = layer.weight.shape[1] if not (self.desc_act) or self.group_size == -1: new_layer = QuantLinear( - self.bits, self.group_size, in_features, out_features, True, use_cuda_fp16=self.use_cuda_fp16 + self.bits, self.group_size, in_features, out_features, True, use_cuda_fp16=self.use_cuda_fp16, weight_dtype=layer.weight.dtype ) else: - new_layer = QuantLinear(self.bits, self.group_size, in_features, out_features, True) + new_layer = QuantLinear(self.bits, self.group_size, in_features, out_features, True, weight_dtype=layer.weight.dtype) new_layer.device = device setattr(module, attr, new_layer.to(device)) for name1, child in module.named_children(): diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py index d1729d85991..f262548975b 100644 --- a/optimum/utils/import_utils.py +++ b/optimum/utils/import_utils.py @@ -35,7 +35,7 @@ TORCH_MINIMUM_VERSION = packaging.version.parse("1.11.0") TRANSFORMERS_MINIMUM_VERSION = packaging.version.parse("4.25.0") DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.18.0") -AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.4.2") +AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.5.0") # This is the minimal required version to support some ONNX Runtime features