Skip to content

Commit

Permalink
Feat (examples/sdxl): move and consolidate export
Browse files Browse the repository at this point in the history
  • Loading branch information
Giuseppe5 committed Oct 1, 2024
1 parent 1094eea commit f84a0e1
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 50 deletions.
87 changes: 43 additions & 44 deletions src/brevitas_examples/stable_diffusion/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,18 +463,17 @@ def sdpa_zp_stats_type():

pipe.set_progress_bar_config(disable=True)

if args.dry_run:
with torch.no_grad():
run_val_inference(
pipe,
args.resolution, [calibration_prompts[0]],
test_seeds,
args.device,
dtype,
total_steps=1,
use_negative_prompts=args.use_negative_prompts,
test_latents=latents,
guidance_scale=args.guidance_scale)
with torch.no_grad():
run_val_inference(
pipe,
args.resolution, [calibration_prompts[0]],
test_seeds,
args.device,
dtype,
total_steps=1,
use_negative_prompts=args.use_negative_prompts,
test_latents=latents,
guidance_scale=args.guidance_scale)

if args.load_checkpoint is not None:
with load_quant_model_mode(pipe.unet):
Expand Down Expand Up @@ -574,6 +573,38 @@ def sdpa_zp_stats_type():
torch.save(
pipe.vae.state_dict(), os.path.join(output_dir, f"vae_{args.checkpoint_name}"))

if args.export_target:
# Move to cpu and to float32 to enable CPU export
if args.export_cpu_float32:
pipe.unet.to('cpu').to(torch.float32)
pipe.unet.eval()
device = next(iter(pipe.unet.parameters())).device
dtype = next(iter(pipe.unet.parameters())).dtype

# Define tracing input
if is_sd_xl:
generate_fn = generate_unet_xl_rand_inputs
shape = SD_XL_EMBEDDINGS_SHAPE
else:
generate_fn = generate_unet_21_rand_inputs
shape = SD_2_1_EMBEDDINGS_SHAPE
trace_inputs = generate_fn(
embedding_shape=shape,
unet_input_shape=unet_input_shape(args.resolution),
device=device,
dtype=dtype)

if args.export_target == 'onnx':
if args.weight_quant_granularity == 'per_group':
export_manager = BlockQuantProxyLevelManager
else:
export_manager = StdQCDQONNXManager
export_manager.change_weight_export(export_weight_q_node=args.export_weight_q_node)
export_onnx(pipe, trace_inputs, output_dir, export_manager)
if args.export_target == 'params_only':
pipe.to('cpu')
export_quant_params(pipe, output_dir, export_vae=args.vae_fp16_fix)

# Perform inference
if args.prompt > 0 and not args.dry_run:
# with brevitas_proxy_inference_mode(pipe.unet):
Expand Down Expand Up @@ -619,38 +650,6 @@ def sdpa_zp_stats_type():
fid.update(quant_images_values, real=False)
print(f"FID: {float(fid.compute())}")

if args.export_target:
# Move to cpu and to float32 to enable CPU export
if args.export_cpu_float32:
pipe.unet.to('cpu').to(torch.float32)
pipe.unet.eval()
device = next(iter(pipe.unet.parameters())).device
dtype = next(iter(pipe.unet.parameters())).dtype

# Define tracing input
if is_sd_xl:
generate_fn = generate_unet_xl_rand_inputs
shape = SD_XL_EMBEDDINGS_SHAPE
else:
generate_fn = generate_unet_21_rand_inputs
shape = SD_2_1_EMBEDDINGS_SHAPE
trace_inputs = generate_fn(
embedding_shape=shape,
unet_input_shape=unet_input_shape(args.resolution),
device=device,
dtype=dtype)

if args.export_target == 'onnx':
if args.weight_quant_granularity == 'per_group':
export_manager = BlockQuantProxyLevelManager
else:
export_manager = StdQCDQONNXManager
export_manager.change_weight_export(export_weight_q_node=args.export_weight_q_node)
export_onnx(pipe, trace_inputs, output_dir, export_manager)
if args.export_target == 'params_only':
pipe.to('cpu')
export_quant_params(pipe, output_dir, export_vae=args.vae_fp16_fix)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Stable Diffusion quantization')
Expand Down
12 changes: 6 additions & 6 deletions src/brevitas_examples/stable_diffusion/sd_quant/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ def handle_quant_param(layer, layer_dict):
output_scale = layer.output_quant.export_handler.symbolic_kwargs[
'dequantize_symbolic_kwargs']['scale'].data

layer_dict['output_scale'] = output_scale.numpy().tolist()
layer_dict['output_scale'] = output_scale.cpu().numpy().tolist()
layer_dict['output_scale_shape'] = output_scale.shape
layer_dict['input_scale'] = input_scale.numpy().tolist()
layer_dict['input_scale'] = input_scale.cpu().numpy().tolist()
layer_dict['input_scale_shape'] = input_scale.shape
layer_dict['input_zp'] = input_zp.to(torch.float32).cpu().numpy().tolist()
layer_dict['input_zp_shape'] = input_zp.shape
Expand Down Expand Up @@ -83,7 +83,7 @@ def export_quant_params(pipe, output_dir, export_vae=False):
full_name = name
smoothquant_param = module.scale.weight

layer_dict['smoothquant_mul'] = smoothquant_param.data.numpy().tolist()
layer_dict['smoothquant_mul'] = smoothquant_param.data.cpu().numpy().tolist()
layer_dict['smoothquant_mul_shape'] = module.scale.runtime_shape
layer_dict = handle_quant_param(module.layer, layer_dict)

Expand All @@ -94,7 +94,7 @@ def export_quant_params(pipe, output_dir, export_vae=False):
full_name = name
smoothquant_param = module.scale.weight

layer_dict['smoothquant_mul'] = smoothquant_param.data.numpy().tolist()
layer_dict['smoothquant_mul'] = smoothquant_param.data.cpu().numpy().tolist()
layer_dict['smoothquant_mul_shape'] = module.scale.runtime_shape
quant_params[full_name] = layer_dict
handled_quant_layers.add(id(module.layer))
Expand All @@ -113,9 +113,9 @@ def export_quant_params(pipe, output_dir, export_vae=False):
'dequantize_symbolic_kwargs']['scale'].data
act_zp = module.act_quant.export_handler.symbolic_kwargs[
'dequantize_symbolic_kwargs']['zero_point'].data
layer_dict['act_scale'] = act_scale.numpy().tolist()
layer_dict['act_scale'] = act_scale.cpu().numpy().tolist()
layer_dict['act_scale_shape'] = act_scale.shape
layer_dict['act_zp'] = act_zp.to(torch.float32).numpy().tolist()
layer_dict['act_zp'] = act_zp.to(torch.float32).cpu().numpy().tolist()
layer_dict['act_zp_shape'] = act_zp.shape
layer_dict['act_zp_dtype'] = str(act_zp.dtype)
quant_params[full_name] = layer_dict
Expand Down

0 comments on commit f84a0e1

Please sign in to comment.