Skip to content

Commit

Permalink
copy val instead of assign
Browse files Browse the repository at this point in the history
Signed-off-by: Yi Liu <[email protected]>
  • Loading branch information
Yi4Liu committed Dec 16, 2024
1 parent e9df121 commit 80fb3f8
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion auto_round/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,7 @@ def quant_block(self, block, input_ids, input_others, q_input=None, device=torch
total_loss = 0

for i in range(self.iters):
# logger.warning(f"iter {i}")
logger.warning(f"iter {i}")
total_loss = 0
if self.sampler == "rand":
whole_indices = torch.randperm(nsamples)[:pick_samples]
Expand Down
2 changes: 1 addition & 1 deletion auto_round/export/export_to_autoround/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round:ex
logger.info(f"AutoRound format does not support {backend}, try to pack each layer with AutoGPTQ")
backend = backend.replace("auto_round", "auto_gptq")

model = kwargs["model"].to(torch.float16) ##TODO change
model = kwargs["model"].to(torch.bfloat16)
to_quant_block_names = kwargs["to_quant_block_names"]
quant_block_list = kwargs.get("quant_block_list", None)
safe_serialization = True if 'safe_serialization' not in kwargs.keys() else kwargs["safe_serialization"]
Expand Down
6 changes: 3 additions & 3 deletions auto_round/export/export_to_autoround/qlinear_triton_gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,9 @@ def pack(self, linear, scales, zeros, act_scales, w_bf16_to_fp8_scale, g_idx=Non
scales = scales.t().contiguous()
zeros = zeros.t().contiguous()
scale_zeros = zeros * scales
self.scales = scales.clone().half()
self.act_scales = act_scales.clone().contiguous()
self.w_bf16_to_fp8_scale = w_bf16_to_fp8_scale.clone().contiguous()
self.scales.data.copy_(scales.clone().contiguous())
self.act_scales.data.copy_(act_scales.squeeze().clone())
self.w_bf16_to_fp8_scale.data.copy_(w_bf16_to_fp8_scale.squeeze().clone())
if linear.bias is not None:
self.bias = linear.bias.clone().half()

Expand Down

0 comments on commit 80fb3f8

Please sign in to comment.