[UI] Correct "convert_weight_only" to "convert_weights_only"

This is a frequent typo among multiple developers, as "weights" is typically plural. This commit updates the command-line-argument from `--convert-weight-only` to `--convert-weights-only`. For backwards compatibility, the original spelling is kept as an equivalent usage.
Lunderberg · Nov 9, 2023 · 42eb0bb · 42eb0bb
1 parent 834811f
commit 42eb0bb
Showing 1 changed file with 57 additions and 4 deletions.
diff --git a/mlc_llm/core.py b/mlc_llm/core.py
@@ -31,7 +31,10 @@
     stablelm_3b,
 )
 from mlc_llm.relax_model.commons import create_shard_info_func, create_shard_transformation_func
-from mlc_llm.relax_model.param_manager import transform_params_for_each_rank, chain_parameter_transforms
+from mlc_llm.relax_model.param_manager import (
+    transform_params_for_each_rank,
+    chain_parameter_transforms,
+)
 from mlc_llm.transform import fuse_split_rotary_embedding, rewrite_attention
 
 
@@ -50,75 +53,103 @@ class BuildArgs:
         The name of the model to build. If it is ``auto``, we will automatically
         set the model name according to ``--model-path``, ``hf-path``, or the model
         folders under ``--artifact-path/models``.
+
     hf_path: str
         Hugging Face path from which to download params, tokenizer, and config.
+
     quantization: str
         The quantization mode we use to compile.
+
     max_seq_len: int
         The maximum allowed sequence length for the model.
+
     target: str
         The target platform to compile the model for.
+
     db_path: str
         Path to log database for all models. Default: ``./log_db/``.
+
     reuse_lib: str
         Whether to reuse a previously generated lib.
+
     artifact_path: str
         Where to store the output.
+
     use_cache: int
         Whether to use previously pickled IRModule and skip trace.
-    convert_weight_only: bool
+
+    convert_weights_only: bool
         Whether to only convert model weights and not build the model. If both
         ``convert_weight_only`` and ``build_model_only`` are set, the behavior is undefined.
+
     build_model_only: bool
         Whether to only build model and do not convert model weights.
+
     debug_dump: bool
         Whether to dump debugging files during compilation.
+
     debug_load_script: bool
         Whether to load the script for debugging.
+
     llvm_mingw: str
         ``/path/to/llvm-mingw-root``, use llvm-mingw to cross compile to windows.
+
     system_lib: bool
         A parameter to ``relax.build``.
+
     sep_embed: bool
         Build with separated embedding layer, only applicable to LlaMa. This
         feature is in testing stage, and will be formally replaced after massive
         overhaul of embedding feature for all models and use cases.
+
     sliding_window: int
         The sliding window size in sliding window attention (SWA). This optional field
         overrides the `sliding_window` in config.json for those models that use SWA.
         Currently only useful when compiling Mistral.
+
     sliding_window_chunk_size: int
         The chunk size in sliding window attention (SWA) during prefilling. By default,
         the chunk size is the same as sliding window. Currently only useful when compiling Mistral.
+
     cc_path: str
         ``/path/to/cross_compiler_path``; currently only used for cross-compile
         for nvidia/jetson device.
+
     use_safetensors: bool
         Specifies whether to use ``.safetensors`` instead of the default ``.bin``
         when loading in model weights.
+
     enable_batching: bool
         Build the model for batched inference.
         This is a temporary flag used to control the model execution flow in single-
         sequence and batching settings for now. We will eventually merge two flows
         in the future and remove this flag then.
+
     no_cutlass_attn: bool
         Disable offloading attention operations to CUTLASS.
+
     no_cutlass_norm: bool
         Disable offloading layer and RMS norm operations to CUTLASS.
+
     no_cublas: bool
         Disable the step that offloads matmul to cuBLAS. Without this flag,
         matmul will be offloaded to cuBLAS if quantization mode is ``q0f16`` or
         ``q0f32``, target is CUDA and TVM has been built with cuBLAS enabled.
+
     use_cuda_graph: bool
         Specifies whether to enable CUDA Graph for the decoder. MLP and QKV
         projection between two attention layers are put into a graph.
+
     num_shards: int
         Number of shards to split the model into in tensor parallelism multi-gpu
         inference. Only useful when ``build_model_only`` is set.
+
     use_flash_attn_mqa: bool
         Offload multi-query attention workload to Flash Attention.
+
     pdb: bool
         If set, drop into a pdb debugger on error.
+
     use_vllm_attention: bool
         Use vLLM paged KV cache and attention kernel, only relevant when enable_batching=True.
     """
@@ -159,8 +190,9 @@ class BuildArgs:
         default=1,
         metadata={"help": "Whether to use previously pickled IRModule and skip trace."},
     )
-    convert_weight_only: bool = field(
+    convert_weights_only: bool = field(
         default=False,
+        dest="convert_weights_only",
         metadata={
             "help": "Whether to only convert model weights and not build the model.",
             "action": "store_true",
@@ -333,6 +365,11 @@ class BuildArgs:
         },
     )
 
+    @property
+    def convert_weight_only(self):
+        """A backwards-compatibility helper"""
+        return self.convert_weights_only
+
 
 def convert_build_args_to_argparser() -> argparse.ArgumentParser:
     """Convert from BuildArgs to an equivalent ArgumentParser."""
@@ -347,6 +384,20 @@ def convert_build_args_to_argparser() -> argparse.ArgumentParser:
             args.add_argument(field_name, default=field.default, **kwargs)
         else:
             args.add_argument(field_name, type=field.type, default=field.default, **kwargs)
+
+    # Most models contain more than a single parameter (citation
+    # needed), so "weights" should be plural.  The initial use of
+    # "--convert-weight-only" caused enough typos that it is worth
+    # fixing.  The old argument spelling is retained for backwards
+    # compatibility.
+    args.add_argument(
+        "--convert-weight-only",
+        default=False,
+        dest="convert_weights_only",
+        action="store_true",
+        help="Equivalent to --convert-weights-only, retained for backwards compatibility.",
+    )
+
     return args
 
 
@@ -794,7 +845,9 @@ def build_model_from_args(args: argparse.Namespace):
             mod_transform = seq(mod_transform)
 
             params = utils.convert_weights(mod_transform, param_manager, params, args)
-            utils.save_params(params, args.artifact_path, args.num_shards if args.use_presharded_weights else 1)
+            utils.save_params(
+                params, args.artifact_path, args.num_shards if args.use_presharded_weights else 1
+            )
 
             if args.model_category != "minigpt":
                 utils.copy_tokenizer(args)