Skip to content

Commit

Permalink
Fix (examples/ptq): fix execution device
Browse files Browse the repository at this point in the history
  • Loading branch information
Giuseppe5 committed Feb 12, 2024
1 parent 6ce11e2 commit 930c2fd
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 13 deletions.
28 changes: 21 additions & 7 deletions src/brevitas_examples/imagenet_classification/ptq/ptq_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ def quantize_model(
weight_quant_type='sym',
act_quant_granularity='per_tensor',
uint_sym_act_for_unsigned_values=True,
dtype=torch.float32):
dtype=torch.float32,
device='cpu'):
# Define what quantize function to use and, based on the given configuration, its arguments
quantize_fn = QUANTIZE_MAP[backend]
weight_scale_type = scale_factor_type
Expand Down Expand Up @@ -222,6 +223,7 @@ def layerwise_bit_width_fn_weight(module):


quant_layer_map, quant_layerwise_layer_map, quant_act_map, quant_identity_map = create_quant_maps(dtype=dtype,
device=device,
uint_sym_act_for_unsigned_values=uint_sym_act_for_unsigned_values,
bias_bit_width=bias_bit_width,
weight_param_method=weight_param_method,
Expand Down Expand Up @@ -274,7 +276,8 @@ def create_quant_maps(
act_param_method=None,
act_quant_type=None,
act_quant_granularity=None,
act_quant_percentile=None):
act_quant_percentile=None,
device='cpu'):
"""
Starting from pre-defined quantizers, modify them to match the desired configuration
"""
Expand Down Expand Up @@ -323,25 +326,31 @@ def kwargs_prefix(prefix, weight_kwargs):
if weight_quant_type == 'asym':
weight_quant = weight_quant.let(zero_point_impl=ParameterFromStatsFromParameterZeroPoint)
if act_quant is not None:
act_quant = act_quant.let(**{'high_percentile_q': act_quant_percentile, 'dtype': dtype})
act_quant = act_quant.let(
**{
'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device})
if act_quant_type == 'asym' and act_quant_percentile is not None:
act_quant = act_quant.let(**{'low_percentile_q': 100 - act_quant_percentile})
if sym_act_quant is not None:
sym_act_quant = sym_act_quant.let(
**{
'high_percentile_q': act_quant_percentile, 'dtype': dtype})
'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device})
if per_tensor_act_quant is not None:
per_tensor_act_quant = per_tensor_act_quant.let(
**{
'high_percentile_q': act_quant_percentile, 'dtype': dtype})
'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device})
if act_quant_type == 'asym' and act_quant_percentile is not None:
per_tensor_act_quant = per_tensor_act_quant.let(
**{'low_percentile_q': 100 - act_quant_percentile})

weight_quant_dict = {'weight_quant': weight_quant}

quant_wbiol_kwargs = {
**weight_quant_dict, 'dtype': dtype, 'return_quant_tensor': False, 'bias_quant': bias_quant}
**weight_quant_dict,
'dtype': dtype,
'device': device,
'return_quant_tensor': False,
'bias_quant': bias_quant}

# yapf: disable
quant_mha_kwargs = {
Expand All @@ -361,6 +370,7 @@ def kwargs_prefix(prefix, weight_kwargs):
# since it supports only self-attention
'packed_in_proj': True,
'dtype': dtype,
'device': device,
'return_quant_tensor': False}
# yapf: enable

Expand Down Expand Up @@ -451,8 +461,12 @@ def apply_act_equalization(model, calib_loader, layerwise):
model.eval()
dtype = next(model.parameters()).dtype
device = next(model.parameters()).device
add_mul_node = layerwise
with torch.no_grad():
with activation_equalization_mode(model, alpha=0.5, layerwise=layerwise):
with activation_equalization_mode(model,
alpha=0.5,
layerwise=layerwise,
add_mul_node=add_mul_node):
for i, (images, target) in enumerate(tqdm(calib_loader)):
images = images.to(device)
images = images.to(dtype)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -362,14 +362,21 @@ def main():
else:
raise RuntimeError(f"{args.target_backend} backend not supported.")

# If available, use the selected GPU
if args.gpu is not None:
torch.cuda.set_device(args.gpu)
model = model.cuda(args.gpu)
cudnn.benchmark = False

if args.act_equalization is not None:
print("Applying activation equalization:")
apply_act_equalization(model, calib_loader, layerwise=args.act_equalization == 'layerwise')

device = next(iter(model.parameters())).device
# Define the quantized model
quant_model = quantize_model(
model,
dtype=dtype,
device=device,
backend=args.target_backend,
scale_factor_type=args.scale_factor_type,
bias_bit_width=args.bias_bit_width,
Expand All @@ -390,11 +397,6 @@ def main():
weight_exponent_bit_width=args.weight_exponent_bit_width,
act_mantissa_bit_width=args.act_mantissa_bit_width,
act_exponent_bit_width=args.act_exponent_bit_width)
# If available, use the selected GPU
if args.gpu is not None:
torch.cuda.set_device(args.gpu)
quant_model = quant_model.cuda(args.gpu)
cudnn.benchmark = False

# Calibrate the quant_model on the calibration dataloader
print("Starting activation calibration:")
Expand Down

0 comments on commit 930c2fd

Please sign in to comment.