From dfd8f85bd335a927e84a2d31b86cc4c9e347cafc Mon Sep 17 00:00:00 2001 From: zyaoj Date: Fri, 20 Dec 2024 14:24:24 +0000 Subject: [PATCH 1/3] resolve mypy --- src/fairseq2/optim/lr_scheduler/factory.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/fairseq2/optim/lr_scheduler/factory.py b/src/fairseq2/optim/lr_scheduler/factory.py index 1682a585b..a5b3134e6 100644 --- a/src/fairseq2/optim/lr_scheduler/factory.py +++ b/src/fairseq2/optim/lr_scheduler/factory.py @@ -94,20 +94,16 @@ def create_cosine_annealing_lr( # Validate config and set final_lr if (config.final_lr is not None) and (config.final_lr_scale is not None): raise ValueError( - f"Invalid configuration: Both `final_lr` ({config.final_lr}) and `final_lr_scale` " - f"({config.final_lr_scale}) are set. Please specify only one." - ) - - if (config.final_lr is None) and (config.final_lr_scale is None): - raise ValueError( - "Invalid configuration: Either `final_lr` or `final_lr_scale` must be specified." + f"Invalid configuration: Both `final_lr` ({config.final_lr}) and `final_lr_scale` ({config.final_lr_scale}) are set. Please specify only one." ) # Compute final_lr based on the configuration if config.final_lr_scale is not None: final_lr = optimizer.param_groups[0]["lr"] * config.final_lr_scale + elif config.final_lr is not None: + final_lr = config.final_lr else: - final_lr = config.final_lr # type: ignore + raise ValueError("Invalid configuration: Either `final_lr` or `final_lr_scale` must be specified.") if final_lr > optimizer.param_groups[0]["lr"]: log.warning( From c663e4d9663dc6ffe39262182ccc9576a9f152c2 Mon Sep 17 00:00:00 2001 From: zyaoj Date: Fri, 20 Dec 2024 14:30:08 +0000 Subject: [PATCH 2/3] add unit tests --- tests/unit/optim/test_lr_scheduler.py | 117 ++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/tests/unit/optim/test_lr_scheduler.py b/tests/unit/optim/test_lr_scheduler.py index 196393664..cc4b8e36d 100644 --- a/tests/unit/optim/test_lr_scheduler.py +++ b/tests/unit/optim/test_lr_scheduler.py @@ -23,6 +23,14 @@ PolynomialDecayLR, TriStageLR, ) +from fairseq2.optim.lr_scheduler.factory import ( + CosineAnnealingLRConfig, + PolynomialDecayLRConfig, + TriStageLRConfig, + create_cosine_annealing_lr, + create_polynomial_decay_lr, + create_tri_stage_lr, +) class LRSchedulerTestNet(Module): @@ -544,3 +552,112 @@ def test_tristage(self) -> None: assert lr1 == pytest.approx(final_lr1) assert lr2 == pytest.approx(final_lr2) + + +class TestLRSchedulerFactory: + def setup_method(self) -> None: + self.base_lr1 = 0.05 + self.base_lr2 = 0.5 + + self.net = LRSchedulerTestNet() + self.opt = SGD( + params=[ # type: ignore[arg-type] + {"params": self.net.conv1.parameters()}, + {"params": self.net.conv2.parameters(), "lr": self.base_lr2}, + ], + lr=self.base_lr1, + ) + + def test_create_cosine_annealing_lr(self) -> None: + # Test with final_lr + config = CosineAnnealingLRConfig( + cycle_len=80, + num_warmup_steps=100, + cycle_mul=1.2, + lr_mul=0.5, + start_lr=0.01, + final_lr=0.02, + final_lr_scale=None, + ) + + scheduler = create_cosine_annealing_lr(config, self.opt, max_num_steps=1000) + + assert isinstance(scheduler, CosineAnnealingLR) + assert scheduler.get_last_lr() == [0.01, 0.01] + + # Test with final_lr_scale + config = CosineAnnealingLRConfig( + cycle_len=80, + num_warmup_steps=100, + final_lr=None, + final_lr_scale=0.2, + ) + + scheduler = create_cosine_annealing_lr(config, self.opt, max_num_steps=1000) + assert isinstance(scheduler, CosineAnnealingLR) + + # Test error when both final_lr and final_lr_scale are set + with pytest.raises( + ValueError, match="Both `final_lr` .* and `final_lr_scale` .* are set" + ): + config = CosineAnnealingLRConfig(final_lr=0.02, final_lr_scale=0.2) + create_cosine_annealing_lr(config, self.opt, max_num_steps=1000) + + # Test error when neither final_lr nor final_lr_scale are set + with pytest.raises( + ValueError, match="Either `final_lr` or `final_lr_scale` must be specified" + ): + config = CosineAnnealingLRConfig(final_lr=None, final_lr_scale=None) + create_cosine_annealing_lr(config, self.opt, max_num_steps=1000) + + # Test error when cycle_len is None and max_num_steps is None + with pytest.raises(ValueError, match="`cycle_len` must be specified"): + config = CosineAnnealingLRConfig(cycle_len=None) + create_cosine_annealing_lr(config, self.opt, max_num_steps=None) + + def test_create_polynomial_decay_lr(self) -> None: + config = PolynomialDecayLRConfig( + num_steps=200, + num_warmup_steps=100, + power=1.5, + start_lr=0.01, + final_lr=0.02, + ) + + scheduler = create_polynomial_decay_lr(config, self.opt, max_num_steps=1000) + + assert isinstance(scheduler, PolynomialDecayLR) + assert scheduler.get_last_lr() == [0.01, 0.01] + + # Test with num_steps=None and max_num_steps provided + config = PolynomialDecayLRConfig(num_steps=None) + scheduler = create_polynomial_decay_lr(config, self.opt, max_num_steps=1000) + assert isinstance(scheduler, PolynomialDecayLR) + + # Test error when both num_steps and max_num_steps are None + with pytest.raises(ValueError, match="`max_num_steps` must be specified"): + config = PolynomialDecayLRConfig(num_steps=None) + create_polynomial_decay_lr(config, self.opt, max_num_steps=None) + + def test_create_tri_stage_lr(self) -> None: + config = TriStageLRConfig( + num_steps=200, + stage_ratio=(0.1, 0.4, 0.5), + start_lr_scale=0.05, + final_lr_scale=0.1, + ) + + scheduler = create_tri_stage_lr(config, self.opt, max_num_steps=1000) + + assert isinstance(scheduler, TriStageLR) + assert scheduler.get_last_lr() == [self.base_lr1 * 0.05, self.base_lr2 * 0.05] + + # Test with num_steps=None and max_num_steps provided + config = TriStageLRConfig(num_steps=None) + scheduler = create_tri_stage_lr(config, self.opt, max_num_steps=1000) + assert isinstance(scheduler, TriStageLR) + + # Test error when both num_steps and max_num_steps are None + with pytest.raises(ValueError, match="`max_num_steps` must be specified"): + config = TriStageLRConfig(num_steps=None) + create_tri_stage_lr(config, self.opt, max_num_steps=None) From 913171163c7d081684fc4e330d714614e4a95a66 Mon Sep 17 00:00:00 2001 From: zyaoj Date: Fri, 20 Dec 2024 15:56:32 +0000 Subject: [PATCH 3/3] cleanup --- src/fairseq2/optim/lr_scheduler/factory.py | 4 +- tests/unit/optim/test_lr_scheduler.py | 131 +++++++++++++-------- 2 files changed, 82 insertions(+), 53 deletions(-) diff --git a/src/fairseq2/optim/lr_scheduler/factory.py b/src/fairseq2/optim/lr_scheduler/factory.py index a5b3134e6..6a65e03df 100644 --- a/src/fairseq2/optim/lr_scheduler/factory.py +++ b/src/fairseq2/optim/lr_scheduler/factory.py @@ -103,7 +103,9 @@ def create_cosine_annealing_lr( elif config.final_lr is not None: final_lr = config.final_lr else: - raise ValueError("Invalid configuration: Either `final_lr` or `final_lr_scale` must be specified.") + raise ValueError( + "Invalid configuration: Either `final_lr` or `final_lr_scale` must be specified." + ) if final_lr > optimizer.param_groups[0]["lr"]: log.warning( diff --git a/tests/unit/optim/test_lr_scheduler.py b/tests/unit/optim/test_lr_scheduler.py index cc4b8e36d..765fd9406 100644 --- a/tests/unit/optim/test_lr_scheduler.py +++ b/tests/unit/optim/test_lr_scheduler.py @@ -555,109 +555,136 @@ def test_tristage(self) -> None: class TestLRSchedulerFactory: - def setup_method(self) -> None: - self.base_lr1 = 0.05 - self.base_lr2 = 0.5 + # Common constants + BASE_LR1: float = 0.05 + BASE_LR2: float = 0.5 + NUM_WARMUP_STEPS: int = 100 + START_LR: float = 0.01 + FINAL_LR: float = 0.02 + NUM_STEPS: int = 200 + + # CosineAnnealingLR constants + CYCLE_LEN: int = 80 + CYCLE_MUL: float = 1.2 + LR_MUL: float = 0.5 + FINAL_LR_SCALE: float = 0.2 + MAX_NUM_STEPS: int = 1000 + + # PolynomialDecayLR constants + POLY_POWER: float = 1.5 + + # TriStageLR constants + TRI_STAGE_RATIO: tuple[float, float, float] = (0.1, 0.4, 0.5) + TRI_START_LR_SCALE: float = 0.05 + TRI_FINAL_LR_SCALE: float = 0.1 + def setup_method(self) -> None: + """Set up the test environment with base learning rates and an optimizer.""" self.net = LRSchedulerTestNet() self.opt = SGD( params=[ # type: ignore[arg-type] {"params": self.net.conv1.parameters()}, - {"params": self.net.conv2.parameters(), "lr": self.base_lr2}, + {"params": self.net.conv2.parameters(), "lr": self.BASE_LR2}, ], - lr=self.base_lr1, + lr=self.BASE_LR1, ) def test_create_cosine_annealing_lr(self) -> None: + """Test creation of a CosineAnnealingLR with various configurations.""" # Test with final_lr config = CosineAnnealingLRConfig( - cycle_len=80, - num_warmup_steps=100, - cycle_mul=1.2, - lr_mul=0.5, - start_lr=0.01, - final_lr=0.02, + cycle_len=self.CYCLE_LEN, + num_warmup_steps=self.NUM_WARMUP_STEPS, + cycle_mul=self.CYCLE_MUL, + lr_mul=self.LR_MUL, + start_lr=self.START_LR, + final_lr=self.FINAL_LR, final_lr_scale=None, ) - - scheduler = create_cosine_annealing_lr(config, self.opt, max_num_steps=1000) + scheduler = create_cosine_annealing_lr(config, self.opt, self.MAX_NUM_STEPS) assert isinstance(scheduler, CosineAnnealingLR) - assert scheduler.get_last_lr() == [0.01, 0.01] + assert scheduler.get_last_lr() == [self.START_LR, self.START_LR] # Test with final_lr_scale config = CosineAnnealingLRConfig( - cycle_len=80, - num_warmup_steps=100, + cycle_len=self.CYCLE_LEN, + num_warmup_steps=self.NUM_WARMUP_STEPS, final_lr=None, - final_lr_scale=0.2, + final_lr_scale=self.FINAL_LR_SCALE, ) - - scheduler = create_cosine_annealing_lr(config, self.opt, max_num_steps=1000) + scheduler = create_cosine_annealing_lr(config, self.opt, None) assert isinstance(scheduler, CosineAnnealingLR) - # Test error when both final_lr and final_lr_scale are set - with pytest.raises( - ValueError, match="Both `final_lr` .* and `final_lr_scale` .* are set" - ): - config = CosineAnnealingLRConfig(final_lr=0.02, final_lr_scale=0.2) - create_cosine_annealing_lr(config, self.opt, max_num_steps=1000) - - # Test error when neither final_lr nor final_lr_scale are set - with pytest.raises( - ValueError, match="Either `final_lr` or `final_lr_scale` must be specified" - ): - config = CosineAnnealingLRConfig(final_lr=None, final_lr_scale=None) - create_cosine_annealing_lr(config, self.opt, max_num_steps=1000) + @pytest.mark.parametrize( + "final_lr, final_lr_scale, match_pattern", + [ + (0.02, 0.2, "Both `final_lr` .* and `final_lr_scale` .* are set"), + (None, None, "Either `final_lr` or `final_lr_scale` must be specified"), + ], + ) + def test_cosine_annealing_lr_final_lr_errors( + self, final_lr: float | None, final_lr_scale: float | None, match_pattern: str + ) -> None: + """Test error scenarios for final_lr and final_lr_scale in CosineAnnealingLR.""" + config = CosineAnnealingLRConfig( + final_lr=final_lr, final_lr_scale=final_lr_scale + ) + with pytest.raises(ValueError, match=match_pattern): + create_cosine_annealing_lr(config, self.opt, self.MAX_NUM_STEPS) - # Test error when cycle_len is None and max_num_steps is None + def test_cosine_annealing_lr_cycle_len_error(self) -> None: + """Test error when cycle_len is None and max_num_steps is also None.""" with pytest.raises(ValueError, match="`cycle_len` must be specified"): config = CosineAnnealingLRConfig(cycle_len=None) - create_cosine_annealing_lr(config, self.opt, max_num_steps=None) + create_cosine_annealing_lr(config, self.opt, None) def test_create_polynomial_decay_lr(self) -> None: + """Test creation of a PolynomialDecayLR with various configurations.""" config = PolynomialDecayLRConfig( - num_steps=200, - num_warmup_steps=100, - power=1.5, - start_lr=0.01, - final_lr=0.02, + num_steps=self.NUM_STEPS, + num_warmup_steps=self.NUM_WARMUP_STEPS, + power=self.POLY_POWER, + start_lr=self.START_LR, + final_lr=self.FINAL_LR, ) - - scheduler = create_polynomial_decay_lr(config, self.opt, max_num_steps=1000) + scheduler = create_polynomial_decay_lr(config, self.opt, None) assert isinstance(scheduler, PolynomialDecayLR) - assert scheduler.get_last_lr() == [0.01, 0.01] + assert scheduler.get_last_lr() == [self.START_LR, self.START_LR] # Test with num_steps=None and max_num_steps provided config = PolynomialDecayLRConfig(num_steps=None) - scheduler = create_polynomial_decay_lr(config, self.opt, max_num_steps=1000) + scheduler = create_polynomial_decay_lr(config, self.opt, self.MAX_NUM_STEPS) assert isinstance(scheduler, PolynomialDecayLR) # Test error when both num_steps and max_num_steps are None with pytest.raises(ValueError, match="`max_num_steps` must be specified"): config = PolynomialDecayLRConfig(num_steps=None) - create_polynomial_decay_lr(config, self.opt, max_num_steps=None) + create_polynomial_decay_lr(config, self.opt, None) def test_create_tri_stage_lr(self) -> None: + """Test creation of a TriStageLR with various configurations.""" config = TriStageLRConfig( - num_steps=200, - stage_ratio=(0.1, 0.4, 0.5), - start_lr_scale=0.05, - final_lr_scale=0.1, + num_steps=self.NUM_STEPS, + stage_ratio=self.TRI_STAGE_RATIO, + start_lr_scale=self.TRI_START_LR_SCALE, + final_lr_scale=self.TRI_FINAL_LR_SCALE, ) + scheduler = create_tri_stage_lr(config, self.opt, None) - scheduler = create_tri_stage_lr(config, self.opt, max_num_steps=1000) + expected_lr1 = self.BASE_LR1 * self.TRI_START_LR_SCALE + expected_lr2 = self.BASE_LR2 * self.TRI_START_LR_SCALE assert isinstance(scheduler, TriStageLR) - assert scheduler.get_last_lr() == [self.base_lr1 * 0.05, self.base_lr2 * 0.05] + assert scheduler.get_last_lr() == [expected_lr1, expected_lr2] # Test with num_steps=None and max_num_steps provided config = TriStageLRConfig(num_steps=None) - scheduler = create_tri_stage_lr(config, self.opt, max_num_steps=1000) + scheduler = create_tri_stage_lr(config, self.opt, self.MAX_NUM_STEPS) assert isinstance(scheduler, TriStageLR) # Test error when both num_steps and max_num_steps are None with pytest.raises(ValueError, match="`max_num_steps` must be specified"): config = TriStageLRConfig(num_steps=None) - create_tri_stage_lr(config, self.opt, max_num_steps=None) + create_tri_stage_lr(config, self.opt, None)