Fix (examples/ptq): fix execution device

Xilinx · Feb 12, 2024 · 930c2fd · 930c2fd
1 parent 6ce11e2
commit 930c2fd
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 13 deletions.
diff --git a/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py b/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py
@@ -146,7 +146,8 @@ def quantize_model(
         weight_quant_type='sym',
         act_quant_granularity='per_tensor',
         uint_sym_act_for_unsigned_values=True,
-        dtype=torch.float32):
+        dtype=torch.float32,
+        device='cpu'):
     # Define what quantize function to use and, based on the given configuration, its arguments
     quantize_fn = QUANTIZE_MAP[backend]
     weight_scale_type = scale_factor_type
@@ -222,6 +223,7 @@ def layerwise_bit_width_fn_weight(module):
 
 
     quant_layer_map, quant_layerwise_layer_map, quant_act_map, quant_identity_map = create_quant_maps(dtype=dtype,
+                            device=device,
                             uint_sym_act_for_unsigned_values=uint_sym_act_for_unsigned_values,
                             bias_bit_width=bias_bit_width,
                             weight_param_method=weight_param_method,
@@ -274,7 +276,8 @@ def create_quant_maps(
         act_param_method=None,
         act_quant_type=None,
         act_quant_granularity=None,
-        act_quant_percentile=None):
+        act_quant_percentile=None,
+        device='cpu'):
     """
     Starting from pre-defined quantizers, modify them to match the desired configuration
     """
@@ -323,25 +326,31 @@ def kwargs_prefix(prefix, weight_kwargs):
     if weight_quant_type == 'asym':
         weight_quant = weight_quant.let(zero_point_impl=ParameterFromStatsFromParameterZeroPoint)
     if act_quant is not None:
-        act_quant = act_quant.let(**{'high_percentile_q': act_quant_percentile, 'dtype': dtype})
+        act_quant = act_quant.let(
+            **{
+                'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device})
         if act_quant_type == 'asym' and act_quant_percentile is not None:
             act_quant = act_quant.let(**{'low_percentile_q': 100 - act_quant_percentile})
     if sym_act_quant is not None:
         sym_act_quant = sym_act_quant.let(
             **{
-                'high_percentile_q': act_quant_percentile, 'dtype': dtype})
+                'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device})
     if per_tensor_act_quant is not None:
         per_tensor_act_quant = per_tensor_act_quant.let(
             **{
-                'high_percentile_q': act_quant_percentile, 'dtype': dtype})
+                'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device})
         if act_quant_type == 'asym' and act_quant_percentile is not None:
             per_tensor_act_quant = per_tensor_act_quant.let(
                 **{'low_percentile_q': 100 - act_quant_percentile})
 
     weight_quant_dict = {'weight_quant': weight_quant}
 
     quant_wbiol_kwargs = {
-        **weight_quant_dict, 'dtype': dtype, 'return_quant_tensor': False, 'bias_quant': bias_quant}
+        **weight_quant_dict,
+        'dtype': dtype,
+        'device': device,
+        'return_quant_tensor': False,
+        'bias_quant': bias_quant}
 
     # yapf: disable
     quant_mha_kwargs = {
@@ -361,6 +370,7 @@ def kwargs_prefix(prefix, weight_kwargs):
         # since it supports only self-attention
         'packed_in_proj': True,
         'dtype': dtype,
+        'device': device,
         'return_quant_tensor': False}
     # yapf: enable
 
@@ -451,8 +461,12 @@ def apply_act_equalization(model, calib_loader, layerwise):
     model.eval()
     dtype = next(model.parameters()).dtype
     device = next(model.parameters()).device
+    add_mul_node = layerwise
     with torch.no_grad():
-        with activation_equalization_mode(model, alpha=0.5, layerwise=layerwise):
+        with activation_equalization_mode(model,
+                                          alpha=0.5,
+                                          layerwise=layerwise,
+                                          add_mul_node=add_mul_node):
             for i, (images, target) in enumerate(tqdm(calib_loader)):
                 images = images.to(device)
                 images = images.to(dtype)

diff --git a/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py b/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py
@@ -362,14 +362,21 @@ def main():
     else:
         raise RuntimeError(f"{args.target_backend} backend not supported.")
 
+    # If available, use the selected GPU
+    if args.gpu is not None:
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+        cudnn.benchmark = False
+
     if args.act_equalization is not None:
         print("Applying activation equalization:")
         apply_act_equalization(model, calib_loader, layerwise=args.act_equalization == 'layerwise')
-
+    device = next(iter(model.parameters())).device
     # Define the quantized model
     quant_model = quantize_model(
         model,
         dtype=dtype,
+        device=device,
         backend=args.target_backend,
         scale_factor_type=args.scale_factor_type,
         bias_bit_width=args.bias_bit_width,
@@ -390,11 +397,6 @@ def main():
         weight_exponent_bit_width=args.weight_exponent_bit_width,
         act_mantissa_bit_width=args.act_mantissa_bit_width,
         act_exponent_bit_width=args.act_exponent_bit_width)
-    # If available, use the selected GPU
-    if args.gpu is not None:
-        torch.cuda.set_device(args.gpu)
-        quant_model = quant_model.cuda(args.gpu)
-        cudnn.benchmark = False
 
     # Calibrate the quant_model on the calibration dataloader
     print("Starting activation calibration:")