Skip to content

Commit

Permalink
change the code as the device is always meta
Browse files Browse the repository at this point in the history
  • Loading branch information
wenhuach21 committed Jun 11, 2024
1 parent ae6687b commit 14d8040
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions auto_round/auto_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,10 +316,10 @@ def convert_model(self, model: nn.Module):
self._replace_by_quant_layers(model, layer_configs, backend)
return model

def _dynamic_import_inference_linear(self, bits, backend, device):
if (str(device) == "cpu" and not torch.cuda.is_available()) or "qbits" in backend:
def _dynamic_import_inference_linear(self, bits, backend):
if (not torch.cuda.is_available()) or "qbits" in backend:
try:
from intel_extension_for_transformers import qbits # noqa: F401
from intel_extension_for_transformers import qbits # pylint: disable=E0401
except Exception as e:
raise ImportError("Please install Intel Extension for Transformers via 'pip install "
"intel-extension-for-transformers' to inference on Intel CPU")
Expand Down Expand Up @@ -351,7 +351,7 @@ def _replace_by_quant_layers(self, module: nn.Module, layer_configs, backend):

layer = get_module(module, layer_name)
device = get_device(layer)
QuantLinear = self._dynamic_import_inference_linear(bits, backend, device)
QuantLinear = self._dynamic_import_inference_linear(bits, backend)
if isinstance(layer, nn.Linear):
in_features = layer.in_features
out_features = layer.out_features
Expand Down

0 comments on commit 14d8040

Please sign in to comment.