From f84a0e13e21a43378ff53e56f88c77a9bd894f65 Mon Sep 17 00:00:00 2001 From: Giuseppe Franco Date: Fri, 27 Sep 2024 09:50:59 +0100 Subject: [PATCH] Feat (examples/sdxl): move and consolidate export --- .../stable_diffusion/main.py | 87 +++++++++---------- .../stable_diffusion/sd_quant/export.py | 12 +-- 2 files changed, 49 insertions(+), 50 deletions(-) diff --git a/src/brevitas_examples/stable_diffusion/main.py b/src/brevitas_examples/stable_diffusion/main.py index 3fe24a321..c7b5367a4 100644 --- a/src/brevitas_examples/stable_diffusion/main.py +++ b/src/brevitas_examples/stable_diffusion/main.py @@ -463,18 +463,17 @@ def sdpa_zp_stats_type(): pipe.set_progress_bar_config(disable=True) - if args.dry_run: - with torch.no_grad(): - run_val_inference( - pipe, - args.resolution, [calibration_prompts[0]], - test_seeds, - args.device, - dtype, - total_steps=1, - use_negative_prompts=args.use_negative_prompts, - test_latents=latents, - guidance_scale=args.guidance_scale) + with torch.no_grad(): + run_val_inference( + pipe, + args.resolution, [calibration_prompts[0]], + test_seeds, + args.device, + dtype, + total_steps=1, + use_negative_prompts=args.use_negative_prompts, + test_latents=latents, + guidance_scale=args.guidance_scale) if args.load_checkpoint is not None: with load_quant_model_mode(pipe.unet): @@ -574,6 +573,38 @@ def sdpa_zp_stats_type(): torch.save( pipe.vae.state_dict(), os.path.join(output_dir, f"vae_{args.checkpoint_name}")) + if args.export_target: + # Move to cpu and to float32 to enable CPU export + if args.export_cpu_float32: + pipe.unet.to('cpu').to(torch.float32) + pipe.unet.eval() + device = next(iter(pipe.unet.parameters())).device + dtype = next(iter(pipe.unet.parameters())).dtype + + # Define tracing input + if is_sd_xl: + generate_fn = generate_unet_xl_rand_inputs + shape = SD_XL_EMBEDDINGS_SHAPE + else: + generate_fn = generate_unet_21_rand_inputs + shape = SD_2_1_EMBEDDINGS_SHAPE + trace_inputs = generate_fn( + embedding_shape=shape, + unet_input_shape=unet_input_shape(args.resolution), + device=device, + dtype=dtype) + + if args.export_target == 'onnx': + if args.weight_quant_granularity == 'per_group': + export_manager = BlockQuantProxyLevelManager + else: + export_manager = StdQCDQONNXManager + export_manager.change_weight_export(export_weight_q_node=args.export_weight_q_node) + export_onnx(pipe, trace_inputs, output_dir, export_manager) + if args.export_target == 'params_only': + pipe.to('cpu') + export_quant_params(pipe, output_dir, export_vae=args.vae_fp16_fix) + # Perform inference if args.prompt > 0 and not args.dry_run: # with brevitas_proxy_inference_mode(pipe.unet): @@ -619,38 +650,6 @@ def sdpa_zp_stats_type(): fid.update(quant_images_values, real=False) print(f"FID: {float(fid.compute())}") - if args.export_target: - # Move to cpu and to float32 to enable CPU export - if args.export_cpu_float32: - pipe.unet.to('cpu').to(torch.float32) - pipe.unet.eval() - device = next(iter(pipe.unet.parameters())).device - dtype = next(iter(pipe.unet.parameters())).dtype - - # Define tracing input - if is_sd_xl: - generate_fn = generate_unet_xl_rand_inputs - shape = SD_XL_EMBEDDINGS_SHAPE - else: - generate_fn = generate_unet_21_rand_inputs - shape = SD_2_1_EMBEDDINGS_SHAPE - trace_inputs = generate_fn( - embedding_shape=shape, - unet_input_shape=unet_input_shape(args.resolution), - device=device, - dtype=dtype) - - if args.export_target == 'onnx': - if args.weight_quant_granularity == 'per_group': - export_manager = BlockQuantProxyLevelManager - else: - export_manager = StdQCDQONNXManager - export_manager.change_weight_export(export_weight_q_node=args.export_weight_q_node) - export_onnx(pipe, trace_inputs, output_dir, export_manager) - if args.export_target == 'params_only': - pipe.to('cpu') - export_quant_params(pipe, output_dir, export_vae=args.vae_fp16_fix) - if __name__ == "__main__": parser = argparse.ArgumentParser(description='Stable Diffusion quantization') diff --git a/src/brevitas_examples/stable_diffusion/sd_quant/export.py b/src/brevitas_examples/stable_diffusion/sd_quant/export.py index a42a35204..7adef0723 100644 --- a/src/brevitas_examples/stable_diffusion/sd_quant/export.py +++ b/src/brevitas_examples/stable_diffusion/sd_quant/export.py @@ -36,9 +36,9 @@ def handle_quant_param(layer, layer_dict): output_scale = layer.output_quant.export_handler.symbolic_kwargs[ 'dequantize_symbolic_kwargs']['scale'].data - layer_dict['output_scale'] = output_scale.numpy().tolist() + layer_dict['output_scale'] = output_scale.cpu().numpy().tolist() layer_dict['output_scale_shape'] = output_scale.shape - layer_dict['input_scale'] = input_scale.numpy().tolist() + layer_dict['input_scale'] = input_scale.cpu().numpy().tolist() layer_dict['input_scale_shape'] = input_scale.shape layer_dict['input_zp'] = input_zp.to(torch.float32).cpu().numpy().tolist() layer_dict['input_zp_shape'] = input_zp.shape @@ -83,7 +83,7 @@ def export_quant_params(pipe, output_dir, export_vae=False): full_name = name smoothquant_param = module.scale.weight - layer_dict['smoothquant_mul'] = smoothquant_param.data.numpy().tolist() + layer_dict['smoothquant_mul'] = smoothquant_param.data.cpu().numpy().tolist() layer_dict['smoothquant_mul_shape'] = module.scale.runtime_shape layer_dict = handle_quant_param(module.layer, layer_dict) @@ -94,7 +94,7 @@ def export_quant_params(pipe, output_dir, export_vae=False): full_name = name smoothquant_param = module.scale.weight - layer_dict['smoothquant_mul'] = smoothquant_param.data.numpy().tolist() + layer_dict['smoothquant_mul'] = smoothquant_param.data.cpu().numpy().tolist() layer_dict['smoothquant_mul_shape'] = module.scale.runtime_shape quant_params[full_name] = layer_dict handled_quant_layers.add(id(module.layer)) @@ -113,9 +113,9 @@ def export_quant_params(pipe, output_dir, export_vae=False): 'dequantize_symbolic_kwargs']['scale'].data act_zp = module.act_quant.export_handler.symbolic_kwargs[ 'dequantize_symbolic_kwargs']['zero_point'].data - layer_dict['act_scale'] = act_scale.numpy().tolist() + layer_dict['act_scale'] = act_scale.cpu().numpy().tolist() layer_dict['act_scale_shape'] = act_scale.shape - layer_dict['act_zp'] = act_zp.to(torch.float32).numpy().tolist() + layer_dict['act_zp'] = act_zp.to(torch.float32).cpu().numpy().tolist() layer_dict['act_zp_shape'] = act_zp.shape layer_dict['act_zp_dtype'] = str(act_zp.dtype) quant_params[full_name] = layer_dict