diff --git a/cicd/multigpu.py b/cicd/multigpu.py index 0ea4c8cc11..511e31c8e5 100644 --- a/cicd/multigpu.py +++ b/cicd/multigpu.py @@ -1,6 +1,6 @@ """ - modal application to run axolotl gpu tests in Modal - """ +modal application to run axolotl gpu tests in Modal +""" # pylint: disable=duplicate-code import os diff --git a/src/axolotl/monkeypatch/attention/differential.py b/src/axolotl/monkeypatch/attention/differential.py index 36e3821af6..a07b629b6b 100644 --- a/src/axolotl/monkeypatch/attention/differential.py +++ b/src/axolotl/monkeypatch/attention/differential.py @@ -12,7 +12,6 @@ def patch_llama_attention_classes(): """Patch transformers to support differential attention""" - # Add our attention class to the registry LLAMA_ATTENTION_CLASSES["differential_eager"] = LlamaDifferentialAttention LLAMA_ATTENTION_CLASSES["differential_sdpa"] = LlamaDifferentialSdpaAttention diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 8c8bd0e38f..6eaa020da0 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -843,7 +843,6 @@ def _configure_zero3_memory_efficient_loading(): if self.cfg.is_multimodal: self.model_config.text_config = self.text_model_config - # self.model._attn_implementation_autoset = False self.model = self.AutoModelLoader.from_pretrained( self.base_model, config=self.model_config, diff --git a/tests/e2e/integrations/convert_differential_transformer/__init__.py b/tests/e2e/integrations/convert_differential_transformer/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/e2e/integrations/convert_differential_transformer/conftest.py b/tests/e2e/integrations/convert_differential_transformer/conftest.py new file mode 100644 index 0000000000..17a424ddbe --- /dev/null +++ b/tests/e2e/integrations/convert_differential_transformer/conftest.py @@ -0,0 +1,28 @@ +"""Shared fixtures for differential transformer conversion tests.""" + +import pytest + + +@pytest.fixture() +def base_config(): + """Basic config for testing.""" + return { + "base_model": "HuggingFaceTB/SmolLM2-135M", + "plugins": [ + "axolotl.integrations.differential_transformer.DifferentialTransformerPlugin", + ], + "datasets": [ + { + "path": "axolotl-ai-co/alpaca_100_test", + "type": "alpaca", + }, + ], + "gradient_accumulation_steps": 1, + "learning_rate": 1e-4, + "val_set_size": 0.1, + "micro_batch_size": 1, + "sequence_len": 2048, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, + } diff --git a/tests/e2e/integrations/convert_differential_transformer/test_convert_and_evaluate.py b/tests/e2e/integrations/convert_differential_transformer/test_convert_and_evaluate.py new file mode 100644 index 0000000000..1cf569693c --- /dev/null +++ b/tests/e2e/integrations/convert_differential_transformer/test_convert_and_evaluate.py @@ -0,0 +1,53 @@ +"""End-to-end tests for differential transformer conversion and evaluation.""" +# pylint: disable=duplicate-code + +from pathlib import Path + +import yaml +from pytest import approx + +from axolotl.cli import load_cfg +from axolotl.cli.evaluate import do_evaluate +from axolotl.cli.integrations.convert_differential_transformer import ( + convert_differential_transformer, +) +from axolotl.common.cli import ConvertDiffTransformerCliArgs, EvaluateCliArgs + + +def test_conversion_and_eval_cli(tmp_path: Path, base_config): + output_dir = tmp_path / "converted" + base_config["output_dir"] = str(output_dir) + + config_path = tmp_path / "config.yml" + with open(config_path, "w", encoding="utf-8") as file: + yaml.dump(base_config, file) + + cfg = load_cfg(str(config_path)) + cli_args = ConvertDiffTransformerCliArgs( + debug=True, zero_init=True, sublayer_norm=False + ) + _, debug_info = convert_differential_transformer(cfg, cli_args, str(config_path)) + + assert debug_info["generations_match"] is True + assert (output_dir / "model.safetensors").exists() + assert (output_dir / "config.json").exists() + assert (output_dir / "axolotl_config.yml").exists() + + eval_cfg = load_cfg(str(output_dir)) + eval_cli_args = EvaluateCliArgs() + all_metrics = do_evaluate(eval_cfg, eval_cli_args) + + assert list(all_metrics.keys()) == [ + "train_loss", + "train_model_preparation_time", + "train_runtime", + "train_samples_per_second", + "train_steps_per_second", + "eval_loss", + "eval_model_preparation_time", + "eval_runtime", + "eval_samples_per_second", + "eval_steps_per_second", + ] + assert all_metrics["train_loss"] == approx(1.7307, rel=1e-4) + assert all_metrics["eval_loss"] == approx(1.8387, rel=1e-4) diff --git a/tests/e2e/integrations/test_convert_differential_transformer.py b/tests/e2e/integrations/convert_differential_transformer/test_convert_differential_transformer.py similarity index 62% rename from tests/e2e/integrations/test_convert_differential_transformer.py rename to tests/e2e/integrations/convert_differential_transformer/test_convert_differential_transformer.py index 9ddcf57674..4349287bdc 100644 --- a/tests/e2e/integrations/test_convert_differential_transformer.py +++ b/tests/e2e/integrations/convert_differential_transformer/test_convert_differential_transformer.py @@ -1,44 +1,18 @@ """End-to-end tests for differential transformer conversion.""" # pylint: disable=redefined-outer-name +# pylint: disable=duplicate-code from pathlib import Path from typing import Optional import pytest import yaml -from pytest import approx from axolotl.cli import load_cfg -from axolotl.cli.evaluate import do_evaluate from axolotl.cli.integrations.convert_differential_transformer import ( convert_differential_transformer, ) -from axolotl.common.cli import ConvertDiffTransformerCliArgs, EvaluateCliArgs - - -@pytest.fixture() -def base_config(): - """Basic config for testing.""" - return { - "base_model": "HuggingFaceTB/SmolLM2-135M", - "plugins": [ - "axolotl.integrations.differential_transformer.DifferentialTransformerPlugin", - ], - "datasets": [ - { - "path": "axolotl-ai-co/alpaca_100_test", - "type": "alpaca", - }, - ], - "gradient_accumulation_steps": 1, - "learning_rate": 1e-4, - "val_set_size": 0.1, - "micro_batch_size": 1, - "sequence_len": 2048, - "special_tokens": { - "pad_token": "<|endoftext|>", - }, - } +from axolotl.common.cli import ConvertDiffTransformerCliArgs def test_conversion_cli_basic(tmp_path: Path, base_config): @@ -132,42 +106,3 @@ def test_conversion_cli_repoduce_attentions( assert (output_dir / "model.safetensors").exists() assert (output_dir / "config.json").exists() assert (output_dir / "axolotl_config.yml").exists() - - -def test_conversion_and_eval_cli(tmp_path: Path, base_config): - output_dir = tmp_path / "converted" - base_config["output_dir"] = str(output_dir) - - config_path = tmp_path / "config.yml" - with open(config_path, "w", encoding="utf-8") as file: - yaml.dump(base_config, file) - - cfg = load_cfg(str(config_path)) - cli_args = ConvertDiffTransformerCliArgs( - debug=True, zero_init=True, sublayer_norm=False - ) - _, debug_info = convert_differential_transformer(cfg, cli_args, str(config_path)) - - assert debug_info["generations_match"] is True - assert (output_dir / "model.safetensors").exists() - assert (output_dir / "config.json").exists() - assert (output_dir / "axolotl_config.yml").exists() - - eval_cfg = load_cfg(str(output_dir)) - eval_cli_args = EvaluateCliArgs() - all_metrics = do_evaluate(eval_cfg, eval_cli_args) - - assert list(all_metrics.keys()) == [ - "train_loss", - "train_model_preparation_time", - "train_runtime", - "train_samples_per_second", - "train_steps_per_second", - "eval_loss", - "eval_model_preparation_time", - "eval_runtime", - "eval_samples_per_second", - "eval_steps_per_second", - ] - assert all_metrics["train_loss"] == approx(1.7307, rel=1e-4) - assert all_metrics["eval_loss"] == approx(1.8387, rel=1e-4)