From dfd8f85bd335a927e84a2d31b86cc4c9e347cafc Mon Sep 17 00:00:00 2001
From: zyaoj <ulysses.yao@gmail.com>
Date: Fri, 20 Dec 2024 14:24:24 +0000
Subject: [PATCH 1/3] resolve mypy

---
 src/fairseq2/optim/lr_scheduler/factory.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/fairseq2/optim/lr_scheduler/factory.py b/src/fairseq2/optim/lr_scheduler/factory.py
index 1682a585b..a5b3134e6 100644
--- a/src/fairseq2/optim/lr_scheduler/factory.py
+++ b/src/fairseq2/optim/lr_scheduler/factory.py
@@ -94,20 +94,16 @@ def create_cosine_annealing_lr(
     # Validate config and set final_lr
     if (config.final_lr is not None) and (config.final_lr_scale is not None):
         raise ValueError(
-            f"Invalid configuration: Both `final_lr` ({config.final_lr}) and `final_lr_scale` "
-            f"({config.final_lr_scale}) are set. Please specify only one."
-        )
-
-    if (config.final_lr is None) and (config.final_lr_scale is None):
-        raise ValueError(
-            "Invalid configuration: Either `final_lr` or `final_lr_scale` must be specified."
+            f"Invalid configuration: Both `final_lr` ({config.final_lr}) and `final_lr_scale` ({config.final_lr_scale}) are set. Please specify only one."
         )
 
     # Compute final_lr based on the configuration
     if config.final_lr_scale is not None:
         final_lr = optimizer.param_groups[0]["lr"] * config.final_lr_scale
+    elif config.final_lr is not None:
+        final_lr = config.final_lr
     else:
-        final_lr = config.final_lr  # type: ignore
+        raise ValueError("Invalid configuration: Either `final_lr` or `final_lr_scale` must be specified.")
 
     if final_lr > optimizer.param_groups[0]["lr"]:
         log.warning(

From c663e4d9663dc6ffe39262182ccc9576a9f152c2 Mon Sep 17 00:00:00 2001
From: zyaoj <ulysses.yao@gmail.com>
Date: Fri, 20 Dec 2024 14:30:08 +0000
Subject: [PATCH 2/3] add unit tests

---
 tests/unit/optim/test_lr_scheduler.py | 117 ++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)

diff --git a/tests/unit/optim/test_lr_scheduler.py b/tests/unit/optim/test_lr_scheduler.py
index 196393664..cc4b8e36d 100644
--- a/tests/unit/optim/test_lr_scheduler.py
+++ b/tests/unit/optim/test_lr_scheduler.py
@@ -23,6 +23,14 @@
     PolynomialDecayLR,
     TriStageLR,
 )
+from fairseq2.optim.lr_scheduler.factory import (
+    CosineAnnealingLRConfig,
+    PolynomialDecayLRConfig,
+    TriStageLRConfig,
+    create_cosine_annealing_lr,
+    create_polynomial_decay_lr,
+    create_tri_stage_lr,
+)
 
 
 class LRSchedulerTestNet(Module):
@@ -544,3 +552,112 @@ def test_tristage(self) -> None:
 
         assert lr1 == pytest.approx(final_lr1)
         assert lr2 == pytest.approx(final_lr2)
+
+
+class TestLRSchedulerFactory:
+    def setup_method(self) -> None:
+        self.base_lr1 = 0.05
+        self.base_lr2 = 0.5
+
+        self.net = LRSchedulerTestNet()
+        self.opt = SGD(
+            params=[  # type: ignore[arg-type]
+                {"params": self.net.conv1.parameters()},
+                {"params": self.net.conv2.parameters(), "lr": self.base_lr2},
+            ],
+            lr=self.base_lr1,
+        )
+
+    def test_create_cosine_annealing_lr(self) -> None:
+        # Test with final_lr
+        config = CosineAnnealingLRConfig(
+            cycle_len=80,
+            num_warmup_steps=100,
+            cycle_mul=1.2,
+            lr_mul=0.5,
+            start_lr=0.01,
+            final_lr=0.02,
+            final_lr_scale=None,
+        )
+
+        scheduler = create_cosine_annealing_lr(config, self.opt, max_num_steps=1000)
+
+        assert isinstance(scheduler, CosineAnnealingLR)
+        assert scheduler.get_last_lr() == [0.01, 0.01]
+
+        # Test with final_lr_scale
+        config = CosineAnnealingLRConfig(
+            cycle_len=80,
+            num_warmup_steps=100,
+            final_lr=None,
+            final_lr_scale=0.2,
+        )
+
+        scheduler = create_cosine_annealing_lr(config, self.opt, max_num_steps=1000)
+        assert isinstance(scheduler, CosineAnnealingLR)
+
+        # Test error when both final_lr and final_lr_scale are set
+        with pytest.raises(
+            ValueError, match="Both `final_lr` .* and `final_lr_scale` .* are set"
+        ):
+            config = CosineAnnealingLRConfig(final_lr=0.02, final_lr_scale=0.2)
+            create_cosine_annealing_lr(config, self.opt, max_num_steps=1000)
+
+        # Test error when neither final_lr nor final_lr_scale are set
+        with pytest.raises(
+            ValueError, match="Either `final_lr` or `final_lr_scale` must be specified"
+        ):
+            config = CosineAnnealingLRConfig(final_lr=None, final_lr_scale=None)
+            create_cosine_annealing_lr(config, self.opt, max_num_steps=1000)
+
+        # Test error when cycle_len is None and max_num_steps is None
+        with pytest.raises(ValueError, match="`cycle_len` must be specified"):
+            config = CosineAnnealingLRConfig(cycle_len=None)
+            create_cosine_annealing_lr(config, self.opt, max_num_steps=None)
+
+    def test_create_polynomial_decay_lr(self) -> None:
+        config = PolynomialDecayLRConfig(
+            num_steps=200,
+            num_warmup_steps=100,
+            power=1.5,
+            start_lr=0.01,
+            final_lr=0.02,
+        )
+
+        scheduler = create_polynomial_decay_lr(config, self.opt, max_num_steps=1000)
+
+        assert isinstance(scheduler, PolynomialDecayLR)
+        assert scheduler.get_last_lr() == [0.01, 0.01]
+
+        # Test with num_steps=None and max_num_steps provided
+        config = PolynomialDecayLRConfig(num_steps=None)
+        scheduler = create_polynomial_decay_lr(config, self.opt, max_num_steps=1000)
+        assert isinstance(scheduler, PolynomialDecayLR)
+
+        # Test error when both num_steps and max_num_steps are None
+        with pytest.raises(ValueError, match="`max_num_steps` must be specified"):
+            config = PolynomialDecayLRConfig(num_steps=None)
+            create_polynomial_decay_lr(config, self.opt, max_num_steps=None)
+
+    def test_create_tri_stage_lr(self) -> None:
+        config = TriStageLRConfig(
+            num_steps=200,
+            stage_ratio=(0.1, 0.4, 0.5),
+            start_lr_scale=0.05,
+            final_lr_scale=0.1,
+        )
+
+        scheduler = create_tri_stage_lr(config, self.opt, max_num_steps=1000)
+
+        assert isinstance(scheduler, TriStageLR)
+        assert scheduler.get_last_lr() == [self.base_lr1 * 0.05, self.base_lr2 * 0.05]
+
+        # Test with num_steps=None and max_num_steps provided
+        config = TriStageLRConfig(num_steps=None)
+        scheduler = create_tri_stage_lr(config, self.opt, max_num_steps=1000)
+        assert isinstance(scheduler, TriStageLR)
+
+        # Test error when both num_steps and max_num_steps are None
+        with pytest.raises(ValueError, match="`max_num_steps` must be specified"):
+            config = TriStageLRConfig(num_steps=None)
+            create_tri_stage_lr(config, self.opt, max_num_steps=None)

From 913171163c7d081684fc4e330d714614e4a95a66 Mon Sep 17 00:00:00 2001
From: zyaoj <ulysses.yao@gmail.com>
Date: Fri, 20 Dec 2024 15:56:32 +0000
Subject: [PATCH 3/3] cleanup

---
 src/fairseq2/optim/lr_scheduler/factory.py |   4 +-
 tests/unit/optim/test_lr_scheduler.py      | 131 +++++++++++++--------
 2 files changed, 82 insertions(+), 53 deletions(-)

diff --git a/src/fairseq2/optim/lr_scheduler/factory.py b/src/fairseq2/optim/lr_scheduler/factory.py
index a5b3134e6..6a65e03df 100644
--- a/src/fairseq2/optim/lr_scheduler/factory.py
+++ b/src/fairseq2/optim/lr_scheduler/factory.py
@@ -103,7 +103,9 @@ def create_cosine_annealing_lr(
     elif config.final_lr is not None:
         final_lr = config.final_lr
     else:
-        raise ValueError("Invalid configuration: Either `final_lr` or `final_lr_scale` must be specified.")
+        raise ValueError(
+            "Invalid configuration: Either `final_lr` or `final_lr_scale` must be specified."
+        )
 
     if final_lr > optimizer.param_groups[0]["lr"]:
         log.warning(
diff --git a/tests/unit/optim/test_lr_scheduler.py b/tests/unit/optim/test_lr_scheduler.py
index cc4b8e36d..765fd9406 100644
--- a/tests/unit/optim/test_lr_scheduler.py
+++ b/tests/unit/optim/test_lr_scheduler.py
@@ -555,109 +555,136 @@ def test_tristage(self) -> None:
 
 
 class TestLRSchedulerFactory:
-    def setup_method(self) -> None:
-        self.base_lr1 = 0.05
-        self.base_lr2 = 0.5
+    # Common constants
+    BASE_LR1: float = 0.05
+    BASE_LR2: float = 0.5
+    NUM_WARMUP_STEPS: int = 100
+    START_LR: float = 0.01
+    FINAL_LR: float = 0.02
+    NUM_STEPS: int = 200
+
+    # CosineAnnealingLR constants
+    CYCLE_LEN: int = 80
+    CYCLE_MUL: float = 1.2
+    LR_MUL: float = 0.5
+    FINAL_LR_SCALE: float = 0.2
+    MAX_NUM_STEPS: int = 1000
+
+    # PolynomialDecayLR constants
+    POLY_POWER: float = 1.5
+
+    # TriStageLR constants
+    TRI_STAGE_RATIO: tuple[float, float, float] = (0.1, 0.4, 0.5)
+    TRI_START_LR_SCALE: float = 0.05
+    TRI_FINAL_LR_SCALE: float = 0.1
 
+    def setup_method(self) -> None:
+        """Set up the test environment with base learning rates and an optimizer."""
         self.net = LRSchedulerTestNet()
         self.opt = SGD(
             params=[  # type: ignore[arg-type]
                 {"params": self.net.conv1.parameters()},
-                {"params": self.net.conv2.parameters(), "lr": self.base_lr2},
+                {"params": self.net.conv2.parameters(), "lr": self.BASE_LR2},
             ],
-            lr=self.base_lr1,
+            lr=self.BASE_LR1,
         )
 
     def test_create_cosine_annealing_lr(self) -> None:
+        """Test creation of a CosineAnnealingLR with various configurations."""
         # Test with final_lr
         config = CosineAnnealingLRConfig(
-            cycle_len=80,
-            num_warmup_steps=100,
-            cycle_mul=1.2,
-            lr_mul=0.5,
-            start_lr=0.01,
-            final_lr=0.02,
+            cycle_len=self.CYCLE_LEN,
+            num_warmup_steps=self.NUM_WARMUP_STEPS,
+            cycle_mul=self.CYCLE_MUL,
+            lr_mul=self.LR_MUL,
+            start_lr=self.START_LR,
+            final_lr=self.FINAL_LR,
             final_lr_scale=None,
         )
-
-        scheduler = create_cosine_annealing_lr(config, self.opt, max_num_steps=1000)
+        scheduler = create_cosine_annealing_lr(config, self.opt, self.MAX_NUM_STEPS)
 
         assert isinstance(scheduler, CosineAnnealingLR)
-        assert scheduler.get_last_lr() == [0.01, 0.01]
+        assert scheduler.get_last_lr() == [self.START_LR, self.START_LR]
 
         # Test with final_lr_scale
         config = CosineAnnealingLRConfig(
-            cycle_len=80,
-            num_warmup_steps=100,
+            cycle_len=self.CYCLE_LEN,
+            num_warmup_steps=self.NUM_WARMUP_STEPS,
             final_lr=None,
-            final_lr_scale=0.2,
+            final_lr_scale=self.FINAL_LR_SCALE,
         )
-
-        scheduler = create_cosine_annealing_lr(config, self.opt, max_num_steps=1000)
+        scheduler = create_cosine_annealing_lr(config, self.opt, None)
         assert isinstance(scheduler, CosineAnnealingLR)
 
-        # Test error when both final_lr and final_lr_scale are set
-        with pytest.raises(
-            ValueError, match="Both `final_lr` .* and `final_lr_scale` .* are set"
-        ):
-            config = CosineAnnealingLRConfig(final_lr=0.02, final_lr_scale=0.2)
-            create_cosine_annealing_lr(config, self.opt, max_num_steps=1000)
-
-        # Test error when neither final_lr nor final_lr_scale are set
-        with pytest.raises(
-            ValueError, match="Either `final_lr` or `final_lr_scale` must be specified"
-        ):
-            config = CosineAnnealingLRConfig(final_lr=None, final_lr_scale=None)
-            create_cosine_annealing_lr(config, self.opt, max_num_steps=1000)
+    @pytest.mark.parametrize(
+        "final_lr, final_lr_scale, match_pattern",
+        [
+            (0.02, 0.2, "Both `final_lr` .* and `final_lr_scale` .* are set"),
+            (None, None, "Either `final_lr` or `final_lr_scale` must be specified"),
+        ],
+    )
+    def test_cosine_annealing_lr_final_lr_errors(
+        self, final_lr: float | None, final_lr_scale: float | None, match_pattern: str
+    ) -> None:
+        """Test error scenarios for final_lr and final_lr_scale in CosineAnnealingLR."""
+        config = CosineAnnealingLRConfig(
+            final_lr=final_lr, final_lr_scale=final_lr_scale
+        )
+        with pytest.raises(ValueError, match=match_pattern):
+            create_cosine_annealing_lr(config, self.opt, self.MAX_NUM_STEPS)
 
-        # Test error when cycle_len is None and max_num_steps is None
+    def test_cosine_annealing_lr_cycle_len_error(self) -> None:
+        """Test error when cycle_len is None and max_num_steps is also None."""
         with pytest.raises(ValueError, match="`cycle_len` must be specified"):
             config = CosineAnnealingLRConfig(cycle_len=None)
-            create_cosine_annealing_lr(config, self.opt, max_num_steps=None)
+            create_cosine_annealing_lr(config, self.opt, None)
 
     def test_create_polynomial_decay_lr(self) -> None:
+        """Test creation of a PolynomialDecayLR with various configurations."""
         config = PolynomialDecayLRConfig(
-            num_steps=200,
-            num_warmup_steps=100,
-            power=1.5,
-            start_lr=0.01,
-            final_lr=0.02,
+            num_steps=self.NUM_STEPS,
+            num_warmup_steps=self.NUM_WARMUP_STEPS,
+            power=self.POLY_POWER,
+            start_lr=self.START_LR,
+            final_lr=self.FINAL_LR,
         )
-
-        scheduler = create_polynomial_decay_lr(config, self.opt, max_num_steps=1000)
+        scheduler = create_polynomial_decay_lr(config, self.opt, None)
 
         assert isinstance(scheduler, PolynomialDecayLR)
-        assert scheduler.get_last_lr() == [0.01, 0.01]
+        assert scheduler.get_last_lr() == [self.START_LR, self.START_LR]
 
         # Test with num_steps=None and max_num_steps provided
         config = PolynomialDecayLRConfig(num_steps=None)
-        scheduler = create_polynomial_decay_lr(config, self.opt, max_num_steps=1000)
+        scheduler = create_polynomial_decay_lr(config, self.opt, self.MAX_NUM_STEPS)
         assert isinstance(scheduler, PolynomialDecayLR)
 
         # Test error when both num_steps and max_num_steps are None
         with pytest.raises(ValueError, match="`max_num_steps` must be specified"):
             config = PolynomialDecayLRConfig(num_steps=None)
-            create_polynomial_decay_lr(config, self.opt, max_num_steps=None)
+            create_polynomial_decay_lr(config, self.opt, None)
 
     def test_create_tri_stage_lr(self) -> None:
+        """Test creation of a TriStageLR with various configurations."""
         config = TriStageLRConfig(
-            num_steps=200,
-            stage_ratio=(0.1, 0.4, 0.5),
-            start_lr_scale=0.05,
-            final_lr_scale=0.1,
+            num_steps=self.NUM_STEPS,
+            stage_ratio=self.TRI_STAGE_RATIO,
+            start_lr_scale=self.TRI_START_LR_SCALE,
+            final_lr_scale=self.TRI_FINAL_LR_SCALE,
         )
+        scheduler = create_tri_stage_lr(config, self.opt, None)
 
-        scheduler = create_tri_stage_lr(config, self.opt, max_num_steps=1000)
+        expected_lr1 = self.BASE_LR1 * self.TRI_START_LR_SCALE
+        expected_lr2 = self.BASE_LR2 * self.TRI_START_LR_SCALE
 
         assert isinstance(scheduler, TriStageLR)
-        assert scheduler.get_last_lr() == [self.base_lr1 * 0.05, self.base_lr2 * 0.05]
+        assert scheduler.get_last_lr() == [expected_lr1, expected_lr2]
 
         # Test with num_steps=None and max_num_steps provided
         config = TriStageLRConfig(num_steps=None)
-        scheduler = create_tri_stage_lr(config, self.opt, max_num_steps=1000)
+        scheduler = create_tri_stage_lr(config, self.opt, self.MAX_NUM_STEPS)
         assert isinstance(scheduler, TriStageLR)
 
         # Test error when both num_steps and max_num_steps are None
         with pytest.raises(ValueError, match="`max_num_steps` must be specified"):
             config = TriStageLRConfig(num_steps=None)
-            create_tri_stage_lr(config, self.opt, max_num_steps=None)
+            create_tri_stage_lr(config, self.opt, None)