Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix bug and limit numpy version #159

Merged
merged 3 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions auto_round/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
import torch
from torch.amp import autocast

from functools import lru_cache
@lru_cache(None)
def warning_once(self, msg: str):
self.warning(msg)


logging.Logger.warning_once = warning_once
logger = logging.getLogger("autoround")
logger.setLevel(logging.INFO)
logger.propagate = False
Expand All @@ -35,7 +42,6 @@

import importlib
import transformers
from functools import lru_cache

class LazyImport(object):
"""Lazy import python module till use."""
Expand Down Expand Up @@ -607,11 +613,6 @@ def get_autogptq_backend_config(backend, bits=4):
use_triton = False
return use_triton, disable_exllamav1, disable_exllamav2, use_qigen, disable_marlin

@lru_cache(None)
def warning_once(logger, msg: str):
logger.warning(msg)

logger.warning_once = warning_once
def dynamic_import_inference_linear(bits, group_size, backend):
"""Dynamically imports and returns the appropriate QuantLinear class based on the given bits and backend.

Expand Down Expand Up @@ -660,6 +661,7 @@ def dynamic_import_inference_linear(bits, group_size, backend):
elif bits == 4 and "exllamav2" in backend:
logger.warning_once("Please install auto-round from source to enable exllamav2 kernels, switch to triton "
"kernels for now")
from auto_round_extension.cuda.qliner_triton import QuantLinear
else:
from auto_round_extension.cuda.qliner_triton import QuantLinear
return QuantLinear
1 change: 1 addition & 0 deletions examples/language-modeling/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ auto-gptq
openpyxl
wandb
py-cpuinfo
numpy < 2.0

1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ sentencepiece
torch
transformers
triton
numpy < 2.0