Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consolidate the stateless llama logic #729

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
289 changes: 289 additions & 0 deletions models/turbine_models/custom_models/llm_cmd_opts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
import argparse
import os
from pathlib import Path
gpetters-amd marked this conversation as resolved.
Show resolved Hide resolved


def path_expand(s):
gpetters-amd marked this conversation as resolved.
Show resolved Hide resolved
return Path(s).expanduser().resolve()


def is_valid_file(arg):
gpetters-amd marked this conversation as resolved.
Show resolved Hide resolved
if not os.path.exists(arg):
return None
else:
return arg


# Note: this is where command-line options for the scripts in this directory
# are defined along with their defaults. Thus, they should not be referenced
# within modelling or inference code, only at the entry point to the script.

# We should consider separating out the options that are "model configs" from
# the options that control the compiler, runtime, and script behavior,
# when applicable, as the former would best be kept in a separate
# config or imported from huggingface.

p = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

##############################################################################
# SDXL Huggingface Options
gpetters-amd marked this conversation as resolved.
Show resolved Hide resolved
##############################################################################

p.add_argument(
"--hf_auth_token",
type=str,
help="The Hugging Face auth token, if required",
default=None,
)
p.add_argument(
"--hf_model_name",
type=str,
help="HF model name",
default="Trelis/Llama-2-7b-chat-hf-function-calling-v2",
)
p.add_argument(
"--scheduler_id",
type=str,
help="Scheduler ID",
default="Euler",
)

##############################################################################
# SDXL Inference Options
# These options are used to control runtime parameters for SDXL inference.
##############################################################################

p.add_argument(
"--prompt",
type=str,
default=" a cat under the snow with blue eyes, covered by snow, cinematic style, medium shot, professional photo, animal",
help="Prompt input to stable diffusion.",
)

p.add_argument(
"--negative_prompt",
type=str,
default="Watermark, blurry, oversaturated, low resolution, pollution",
help="Negative prompt input to stable diffusion.",
)

p.add_argument(
"--num_inference_steps", type=int, default=30, help="Number of UNet inference steps"
)

p.add_argument(
"--batch_count",
type=int,
default=1,
help="Number of batches to run for a single prompt",
)

p.add_argument(
"--guidance_scale",
type=float,
default=7.5,
help="Scale by which to adjust prompt guidance to the unconditional noise prediction output of UNet after each iteration.",
)

p.add_argument(
"--seed", type=float, default=0, help="Seed for random number/latents generation."
)

p.add_argument(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary weight flags for llama. We are only using 1 external weight file so could remove external_weights_dir, and I don't think we need external_weight_file below,

"--external_weight_path",
type=str,
default="",
help="Path to external weights file, for jobs with one weights filepath. When importing, this is used to specify where to save the model weights, and at runtime, this is used to specify where to load the model weights from.",
)

p.add_argument(
"--external_weights_dir",
type=str,
default="",
help="Directory containing external weights for a job that requires more than one weights file. When importing, this is used to specify where to save the model weights, and at runtime, this is used to specify where to load the model weights from. Files will then be saved according to the parameters that make them unique, i.e. <hf_model_name>_<precision>_<submodel>_<submodel-specific>.<external_weights>",
)

p.add_argument(
"--vmfb_path", type=str, default="", help="path to vmfb containing compiled module"
)

p.add_argument(
"--pipeline_vmfb_path",
type=str,
default="",
help="path to vmfb containing compiled meta-module",
)

p.add_argument(
"--external_weight_file",
type=str,
default=None,
help="Path to external weights, used in benchmark scripts.",
)

p.add_argument(
"--pipeline_dir",
type=str,
default=None,
help="Directory to save pipeline artifacts",
)

p.add_argument(
"--compiled_pipeline",
default=False,
action="store_true",
help="Do one-shot inference from tokens to image in a shrink-wrapped pipeline binary.",
)

##############################################################################
# SDXL Modelling Options
# These options are used to control model defining parameters for SDXL.
# These are MLIR - changing variables! If you change them, you will need
# to import/download and recompile the model.
##############################################################################

p.add_argument("--batch_size", type=int, default=1, help="Batch size for inference")
p.add_argument(
"--height", type=int, default=1024, help="Height of Stable Diffusion output image."
)
p.add_argument(
"--width", type=int, default=1024, help="Width of Stable Diffusion output image"
)
p.add_argument(
"--precision",
type=str,
default="fp16",
help="Precision of Stable Diffusion weights and graph.",
)
p.add_argument(
"--max_length", type=int, default=64, help="Sequence Length of Stable Diffusion"
)
p.add_argument("--vae_variant", type=str, default="decode", help="encode, decode")
p.add_argument(
"--return_index",
action="store_true",
help="Make scheduled unet compiled module return the step index.",
)

p.add_argument(
"--vae_decomp_attn",
type=bool,
default=False,
help="Decompose attention for VAE decode only at fx graph level",
)

##############################################################################
# SDXL script general options.
##############################################################################

p.add_argument("--compile_to", type=str, default="mlir", help="torch, linalg, vmfb")

p.add_argument(
"--external_weights",
type=str,
default=None,
choices=["safetensors", "irpa", "gguf", None],
help="Externalizes model weights from the torch dialect IR and its successors",
)

# See --external_weight_path and --external_weight_dir to specify where to save the model weights.

p.add_argument(
"--compare_vs_torch",
action="store_true",
help="Runs both turbine vmfb and a torch model to compare results",
)
p.add_argument(
"--decomp_attn",
default=False,
action="store_true",
help="Decompose attention at fx graph level",
)
p.add_argument(
"--exit_on_vmfb",
default=True,
action="store_false",
help="Exit program on vmfb compilation completion. Most scripts will also save .mlir if this is disabled.",
)
p.add_argument(
"--input_mlir",
type=str,
default=None,
help="Path to input mlir file to compile. Comma-separate paths to provide more than one input to pipelines.",
)
p.add_argument(
"--download_mlir",
default=False,
action="store_true",
help="Download missing mlir files from Azure storage.",
)
p.add_argument(
"--container_name",
type=str,
default=None,
help="Azure storage container name to download mlir files from.",
)


##############################################################################
# IREE Compiler Options
##############################################################################

p.add_argument("--device", type=str, default="cpu", help="cpu, cuda, vulkan, rocm")

p.add_argument(
"--rt_device",
type=str,
default="local-task",
help="local-task, local-sync, vulkan://0, rocm://0, cuda://0, etc.",
)

# TODO: Bring in detection for target triple
p.add_argument(
"--iree_target_triple",
type=str,
default="",
help="Specify vulkan target triple or rocm/cuda target device.",
)

p.add_argument("--ireec_flags", type=str, default="", help="extra iree-compile options")

p.add_argument(
"--attn_flags",
type=str,
default="",
help="extra iree-compile options for models with iree_linalg_ext.attention ops.",
)

p.add_argument(
"--attn_spec",
type=str,
default=None,
help="extra iree-compile options for models with iree_linalg_ext.attention ops. Set this to 'default' if you are using mfma-capable hardware with ROCM.",
)

p.add_argument(
"--clip_flags",
type=str,
default="",
help="extra iree-compile options to send for compiling CLIP/prompt_encoder. Only use this for testing bleeding edge flags! Any default options should be added to sd_inference/utils.py",
)

p.add_argument(
"--vae_flags",
type=str,
default="",
help="extra iree-compile options to send for compiling VAE. Only use this for testing bleeding edge flags! Any default options should be added to sd_inference/utils.py",
)

p.add_argument(
"--unet_flags",
type=str,
default="",
help="extra iree-compile options to send for compiling unet. Only use this for testing bleeding edge flags! Any default options should be added to sd_inference/utils.py",
)


args, unknown = p.parse_known_args()
6 changes: 3 additions & 3 deletions models/turbine_models/custom_models/sd_inference/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@
],
"unet": [
"--iree-flow-enable-aggressive-fusion",
"--iree-flow-enable-fuse-horizontal-contractions=true",
# "--iree-flow-enable-fuse-horizontal-contractions=true",
"--iree-opt-aggressively-propagate-transposes=true",
"--iree-codegen-llvmgpu-use-vector-distribution=true",
],
"clip": [
"--iree-flow-enable-aggressive-fusion",
"--iree-flow-enable-fuse-horizontal-contractions=true",
# "--iree-flow-enable-fuse-horizontal-contractions=true",
"--iree-opt-aggressively-propagate-transposes=true",
],
"vae": [
Expand All @@ -58,7 +58,7 @@
"--iree-opt-const-eval=false",
"--iree-opt-aggressively-propagate-transposes=true",
"--iree-flow-enable-aggressive-fusion",
"--iree-flow-enable-fuse-horizontal-contractions=true",
# "--iree-flow-enable-fuse-horizontal-contractions=true",
"--iree-codegen-gpu-native-math-precision=true",
"--iree-codegen-llvmgpu-use-vector-distribution=true",
"--iree-codegen-llvmgpu-enable-transform-dialect-jit=false",
Expand Down
Loading
Loading