axolotl-ai-cloud · bursteratom · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024
diff --git a/examples/hymba/fft-1.5b.yml b/examples/hymba/fft-1.5b.yml
@@ -0,0 +1,58 @@
+base_model: nvidia/Hymba-1.5B-Base
+
+load_in_8bit: false
+load_in_4bit: false
+strict: false
+
+datasets:
+  - path: tatsu-lab/alpaca
+    type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.05
+output_dir: ./outputs/out
+
+sequence_len: 2048
+sample_packing: true
+pad_to_sequence_len: true
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 2
+micro_batch_size: 2
+num_epochs: 1
+optimizer: paged_adamw_8bit
+lr_scheduler: cosine
+learning_rate: 2e-5
+
+train_on_inputs: false
+group_by_length: false
+bf16: auto
+fp16:
+tf32: false
+
+trust_remote_code: true
+
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+early_stopping_patience:
+resume_from_checkpoint:
+logging_steps: 1
+xformers_attention:
+flash_attention: true
+
+warmup_steps: 5
+evals_per_epoch: 2
+eval_table_size:
+saves_per_epoch: 1
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+special_tokens:
+  pad_token: <|end_of_text|>
diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py
@@ -25,6 +25,7 @@
     "gemmoe",
     "starcoder2",
     "deepseek_v2",
+    "hymba",
 ]
 
 

diff --git a/src/axolotl/train.py b/src/axolotl/train.py
@@ -23,7 +23,12 @@
 from axolotl.logging_config import configure_logging
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.freeze import freeze_layers_except
-from axolotl.utils.models import load_model, load_processor, load_tokenizer
+from axolotl.utils.models import (
+    load_model,
+    load_model_config,
+    load_processor,
+    load_tokenizer,
+)
 from axolotl.utils.trainer import setup_trainer
 
 try:
@@ -145,7 +150,11 @@ def train(
         os.makedirs(cfg.output_dir, exist_ok=True)
     tokenizer.save_pretrained(str(Path(cfg.output_dir)))
     if hasattr(model, "config"):
-        model.config.save_pretrained(str(Path(cfg.output_dir)))
+        try:
+            model.config.save_pretrained(str(Path(cfg.output_dir)))
+        except TypeError:  # required to deal with Hymba in its current state
+            model_config = load_model_config(cfg)
+            model_config.save_pretrained(str(Path(cfg.output_dir)))
 
     # In case we want to stop early with ctrl+c, this is a nice to have to save the pretrained model
     if cfg.local_rank == 0:

diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py
@@ -1573,3 +1573,19 @@ def check_adopt_torch_version(cls, data):
                     "ADOPT optimizer is incompatible with torch version < 2.5.1"
                 )
         return data
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_hymba_torch_version(cls, data):
+        if "hymba" in data.get("base_model", {}).lower():
+            env_capabilities = data.get("env_capabilities", {})
+            torch_version = env_capabilities.get("torch_version")
+
+            if torch_version is None:
+                import torch
+
+                torch_version = str(torch.__version__).split("+", maxsplit=1)[0]
+
+            if version.parse(torch_version) < version.parse("2.5.0"):
+                raise ValueError("Hymba requires torch version >= 2.5")
+        return data
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
@@ -397,10 +397,17 @@ def apply_patches(self) -> None:
             and self.cfg.flash_attention
             and self.cfg.sample_packing
         ):
-            has_remote_code = (
-                "auto_map" in self.model_config
-                and "AutoModelForCausalLM" in self.model_config["auto_map"]
-            )
+            # some model config objects are not subscriptable
+            try:
+                has_remote_code = (
+                    "auto_map" in self.model_config
+                    and "AutoModelForCausalLM" in self.model_config["auto_map"]
+                )
+            except TypeError:
+                has_remote_code = hasattr(
+                    self.model_config, "auto_map"
+                ) and "AutoModelForCausalLM" in getattr(self.model_config, "auto_map")
+
             if has_remote_code and self.cfg.trust_remote_code is False:
                 # if explicitly set in the YAML, we should prefer that, for example if explicitly disabled
                 has_remote_code = self.cfg.trust_remote_code