Skip to content

Commit

Permalink
fix some issues
Browse files Browse the repository at this point in the history
  • Loading branch information
wenhuach21 committed Dec 10, 2024
1 parent 310cf21 commit 5b29135
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 4 deletions.
2 changes: 2 additions & 0 deletions auto_round/auto_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,8 @@ def remove_device_str(s, device_str):
"via `pip install git+https://github.com/AutoGPTQ/AutoGPTQ.git@b8b4127`")

QuantLinear = dynamic_import_inference_linear(layer_backend, bits, group_size, sym)
# from auto_round_extension.cuda.qlinear_exllamav2_gptq import QuantLinear


layer_device = get_device(layer)

Expand Down
2 changes: 2 additions & 0 deletions auto_round/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,8 @@ def dump_qinfo_to_layer_config(self):
for n, m in self.model.named_modules():
if n not in self.layer_config.keys():
continue
if hasattr(m,"orig_layer"):
m = m.orig_layer
if hasattr(m, "scale"):
self.layer_config[n]["scale"] = m.scale
self.layer_config[n]["zp"] = m.zp
Expand Down
2 changes: 1 addition & 1 deletion auto_round/data_type/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,4 +287,4 @@ def progressive_quant_fp8_int4(tensor, bits=4, group_size=-1, v=0, min_scale=1.0
q_scale_thresh=q_scale_thresh)
qdq_tensor = qdq_int4_tensor * scale_bf16_to_fp8

return qdq_tensor, scale_fp8_to_int4 * scale_bf16_to_fp8, None,
return qdq_tensor, scale_fp8_to_int4 * scale_bf16_to_fp8, zp_fp8_to_int4
8 changes: 5 additions & 3 deletions auto_round/export/export_to_autoround/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,9 @@ def pack_layer(name, model, layer_config, backend, pbar):
bits = config["bits"]
group_size = config["group_size"]
sym = config["sym"]

layer = get_module(model, name)
if hasattr(layer, "orig_layer"):
layer = layer.orig_layer

device = layer.weight.device

Expand All @@ -123,7 +124,7 @@ def pack_layer(name, model, layer_config, backend, pbar):
qlayer = new_layer
scale = layer_config[name]["scale"]
zero = layer_config[name]["zp"]
act_scale = layer_config[name]["act_scale"]
act_scale = layer.act_scale
# so far can only pack layer on CPU
qlayer.to("cpu")
##force to float32 to be compatible with torch 2.0
Expand Down Expand Up @@ -190,7 +191,7 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round:ex
logger.info(f"AutoRound format does not support {backend}, try to pack each layer with AutoGPTQ")
backend = backend.replace("auto_round", "auto_gptq")

model = kwargs["model"]
model = kwargs["model"].to(torch.float16) ##TODO change
to_quant_block_names = kwargs["to_quant_block_names"]
quant_block_list = kwargs.get("quant_block_list", None)
safe_serialization = True if 'safe_serialization' not in kwargs.keys() else kwargs["safe_serialization"]
Expand Down Expand Up @@ -229,6 +230,7 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round:ex
if len(extra_config) > 0:
quantization_config["extra_config"] = extra_config
names = list(layer_config.keys())

with ThreadPoolExecutor(max_workers=2) as executor:
with tqdm(total=len(names), leave=True) as pbar:
def wrapper(name):
Expand Down

0 comments on commit 5b29135

Please sign in to comment.