Disable the exllama on all non-cuda devices. (#2003)

* Disable the exllama on all non-cuda devices. 1. Disable the exllama on all non-cuda devices. 2. Don't raise the error when running on non-cuda device. Signed-off-by: yuanwu <[email protected]> * Refine the code Signed-off-by: yuanwu <[email protected]> * Fix errors of make style Signed-off-by: yuanwu <[email protected]> * Add hpu device Signed-off-by: yuanwu <[email protected]> * Update optimum/gptq/constants.py Co-authored-by: Ilyas Moutawwakil <[email protected]> * Update optimum/gptq/quantizer.py Co-authored-by: Ilyas Moutawwakil <[email protected]> * Update optimum/gptq/quantizer.py Co-authored-by: Ilyas Moutawwakil <[email protected]> * Update optimum/gptq/quantizer.py Co-authored-by: Ilyas Moutawwakil <[email protected]> * Fix error of make style Signed-off-by: yuanwu <[email protected]> --------- Signed-off-by: yuanwu <[email protected]> Co-authored-by: Ilyas Moutawwakil <[email protected]>
huggingface · Sep 18, 2024 · 2179d33 · 2179d33
1 parent ca36fc4
commit 2179d33
Showing 1 changed file with 8 additions and 7 deletions.
diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py
@@ -546,7 +546,7 @@ def tmp(_, input, output):
 
         if self.bits == 4:
             # device not on gpu
-            if device == torch.device("cpu") or (has_device_map and any(d in devices for d in ["cpu", "disk"])):
+            if device.type != "cuda" or (has_device_map and any(d in devices for d in ["cpu", "disk", "hpu"])):
                 if not self.disable_exllama:
                     logger.warning(
                         "Found modules on cpu/disk. Using Exllama/Exllamav2 backend requires all the modules to be on GPU. Setting `disable_exllama=True`"
@@ -589,13 +589,14 @@ def post_init_model(self, model):
                 The input model
         """
         if self.bits == 4 and not self.disable_exllama:
-            if get_device(model) == torch.device("cpu") or (
-                hasattr(model, "hf_device_map") and any(d in model.hf_device_map for d in ["cpu", "disk"])
+            if get_device(model).type != "cuda" or (
+                hasattr(model, "hf_device_map") and any(d in model.hf_device_map for d in ["cpu", "disk", "hpu"])
             ):
-                raise ValueError(
-                    "Found modules on cpu/disk. Using Exllama or Exllamav2 backend requires all the modules to be on GPU."
-                    "You can deactivate exllama backend by setting `disable_exllama=True` in the quantization config object"
-                )
+                if not self.disable_exllama:
+                    logger.warning(
+                        "Found modules on cpu/disk. Using Exllama/Exllamav2 backend requires all the modules to be on GPU. Setting `disable_exllama=True`"
+                    )
+                    self.disable_exllama = True
 
         class StoreAttr(object):
             pass