Small fixes to SDXL inference pipeline/exports/compile

nod-ai · Sep 24, 2024 · a4a6801 · a4a6801
1 parent dbc7635
commit a4a6801
Show file tree

Hide file tree

Showing 5 changed files with 6 additions and 7 deletions.
diff --git a/models/turbine_models/custom_models/pipeline_base.py b/models/turbine_models/custom_models/pipeline_base.py
@@ -368,6 +368,8 @@ def __init__(
                 target, dict
             ), "Device and target triple must be both dicts or both strings."
             for submodel in self.map.keys():
+                if self.map[submodel].get("load") == False:
+                    continue
                 assert submodel in device.keys(), f"Device for {submodel} not found."
                 assert (
                     submodel in target.keys()

diff --git a/models/turbine_models/custom_models/sd_inference/sd_pipeline.py b/models/turbine_models/custom_models/sd_inference/sd_pipeline.py
@@ -120,6 +120,8 @@
             "decomp_attn": None,
         },
     },
+}
+sdxl_compiled_pipeline_map = {
     "unetloop": {
         "module_name": "sdxl_compiled_pipeline",
         "load": False,
@@ -434,7 +436,7 @@ def load_scheduler(
             if self.is_sd3:
                 export_fn = sd3_schedulers.export_scheduler_model
             else:
-                export_fn = scheduler.export_scheduler_model
+                export_fn = schedulers.export_scheduler_model
             self.map["scheduler"] = {
                 "module_name": "compiled_scheduler",
                 "export_fn": export_fn,

diff --git a/models/turbine_models/custom_models/sd_inference/utils.py b/models/turbine_models/custom_models/sd_inference/utils.py
@@ -476,7 +476,7 @@ def get_mfma_spec_path(target_chip, save_dir, masked_attention=False, use_punet=
         url = "https://raw.githubusercontent.com/nod-ai/sdxl-scripts/main/int8-model/specs/attention_and_matmul_spec.mlir"
     elif not masked_attention:
         suffix = ""
-        url = "https://sharkpublic.blob.core.windows.net/sharkpublic/specs/no_pad/attention_and_matmul_spec_mfma.mlir"
+        url = "https://raw.githubusercontent.com/iree-org/iree/refs/heads/main/build_tools/pkgci/external_test_suite/attention_and_matmul_spec.mlir"
     else:
         suffix = "_pad"
         url = "https://sharkpublic.blob.core.windows.net/sharkpublic/specs/latest/attention_and_matmul_spec_gfx942.mlir"

diff --git a/models/turbine_models/custom_models/sd_inference/vae.py b/models/turbine_models/custom_models/sd_inference/vae.py
@@ -171,7 +171,6 @@ def export_vae_model(
             vae_model,
             external_weights,
             external_weight_path,
-            vae_harness=vae_harness,
         )
     if weights_only:
         return external_weight_path

diff --git a/models/turbine_models/custom_models/sdxl_inference/unet.py b/models/turbine_models/custom_models/sdxl_inference/unet.py
@@ -205,10 +205,6 @@ def export_unet_model(
     if not attn_spec:
         if (not decomp_attn) and use_punet:
             attn_spec = "punet"
-        elif (not decomp_attn) and "gfx9" in target:
-            attn_spec = "mfma"
-        elif (not decomp_attn) and "gfx11" in target:
-            attn_spec = "wmma"
     safe_name = utils.create_safe_name(
         hf_model_name,
         f"_bs{batch_size}_{max_length}_{height}x{width}_{precision}_{submodel_name}",