RUCAIBox · elloza · Jun 24, 2022 · Jun 24, 2022 · Mar 27, 2023
diff --git a/docs/source/user_guide/config/training_settings.rst b/docs/source/user_guide/config/training_settings.rst
@@ -22,6 +22,8 @@ Training settings are designed to set parameters about model training.
   evaluated on the valid dataset. Defaults to ``1``.
 - ``stopping_step (int)`` : The threshold for validation-based early stopping.
   Defaults to ``10``.
+- ``min_delta (float)`` : any change in the evaluation measure, no matter how fractional, will be considered an improvement. This parameter allows you to set the minimum amount of difference between the best value obtained and the new value that represents an improvement.
+  Defaults to ``0.001``.
 - ``clip_grad_norm (dict)`` : The args of `clip_grad_norm_ <https://pytorch.org/docs/stable/generated/torch.nn.utils.clip_grad_norm_.html>`_
   which will clip gradient norm of model. Defaults to ``None``.
 - ``loss_decimal_place(int)``: The decimal place of training loss. Defaults to ``4``.

diff --git a/recbole/properties/overall.yaml b/recbole/properties/overall.yaml
@@ -16,6 +16,7 @@ log_wandb: False                # (bool) Whether or not to use Weights & Biases(
 wandb_project: 'recbole'        # (str) The project to conduct experiments in W&B.
 shuffle: True                   # (bool) Whether or not to shuffle the training data before each epoch.
 
+=======
 # Training Settings
 epochs: 300                     # (int) The number of training epochs.
 train_batch_size: 2048          # (int) The training batch size.
@@ -29,6 +30,7 @@ train_neg_sample_args:          # (dict) Negative sampling configuration for mod
   candidate_num: 0              # (int) The number of candidate negative items when dynamic negative sampling.
 eval_step: 1                    # (int) The number of training epochs before an evaluation on the valid dataset.
 stopping_step: 10               # (int) The threshold for validation-based early stopping.
+min_delta: 0.001                # (float) The min value change in the evaluation measure, no matter how fractional, will be considered an improvement.
 clip_grad_norm: ~               # (dict) The args of clip_grad_norm_ which will clip gradient norm of model. 
 weight_decay: 0.0               # (float) The weight decay value (L2 penalty) for optimizers.
 loss_decimal_place: 4           # (int) The decimal place of training loss.

diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py
@@ -114,6 +114,7 @@ def __init__(self, config, model):
         self.logger = getLogger()
         self.tensorboard = get_tensorboard(self.logger)
         self.wandblogger = WandbLogger(config)
+
         self.learner = config["learner"]
         self.learning_rate = config["learning_rate"]
         self.epochs = config["epochs"]
@@ -122,12 +123,14 @@ def __init__(self, config, model):
         self.clip_grad_norm = config["clip_grad_norm"]
         self.valid_metric = config["valid_metric"].lower()
         self.valid_metric_bigger = config["valid_metric_bigger"]
+        self.min_delta = config['min_delta']
         self.test_batch_size = config["eval_batch_size"]
         self.gpu_available = torch.cuda.is_available() and config["use_gpu"]
         self.device = config["device"]
         self.checkpoint_dir = config["checkpoint_dir"]
         self.enable_amp = config["enable_amp"]
         self.enable_scaler = torch.cuda.is_available() and config["enable_scaler"]
+
         ensure_dir(self.checkpoint_dir)
         saved_model_file = "{}-{}.pth".format(self.config["model"], get_local_time())
         self.saved_model_file = os.path.join(self.checkpoint_dir, saved_model_file)
@@ -474,6 +477,8 @@ def fit(
                     self.cur_step,
                     max_step=self.stopping_step,
                     bigger=self.valid_metric_bigger,
+                    min_delta=self.min_delta
+
                 )
                 valid_end_time = time()
                 valid_score_output = (
@@ -910,6 +915,8 @@ def __init__(self, config, model):
 
         self.stopping_step = config["stopping_step"]
         self.valid_metric_bigger = config["valid_metric_bigger"]
+        self.min_delta = config['min_delta']
+
         self.cur_step = 0
         self.best_valid_score = -np.inf if self.valid_metric_bigger else np.inf
         self.best_valid_result = None
@@ -1018,6 +1025,8 @@ def fit(
                     self.cur_step,
                     max_step=self.stopping_step,
                     bigger=self.valid_metric_bigger,
+                    min_delta=self.min_delta
+
                 )
 
                 valid_end_time = time()
@@ -1383,6 +1392,8 @@ def fit(
                     self.cur_step,
                     max_step=self.stopping_step,
                     bigger=self.valid_metric_bigger,
+                    min_delta=self.min_delta
+
                 )
                 valid_end_time = time()
                 valid_score_output = (

diff --git a/recbole/utils/argument_list.py b/recbole/utils/argument_list.py
@@ -23,8 +23,7 @@
 training_arguments = [
     'epochs', 'train_batch_size',
     'learner', 'learning_rate',
-    'train_neg_sample_args',
-    'eval_step', 'stopping_step',
+    'eval_step', 'stopping_step', 'min_delta',
     'clip_grad_norm',
     'weight_decay',
     'loss_decimal_place',

diff --git a/recbole/utils/utils.py b/recbole/utils/utils.py
@@ -110,9 +110,10 @@ def get_trainer(model_type, model_name):
             return getattr(importlib.import_module("recbole.trainer"), "Trainer")
 
 
-def early_stopping(value, best, cur_step, max_step, bigger=True):
+def early_stopping(value, best, cur_step, max_step, bigger=True, min_delta=0.001):
     r"""validation-based early stopping
 
+
     Args:
         value (float): current result
         best (float): best result
@@ -134,7 +135,7 @@ def early_stopping(value, best, cur_step, max_step, bigger=True):
     stop_flag = False
     update_flag = False
     if bigger:
-        if value >= best:
+        if value >= best and abs(value - best) >= min_delta:
             cur_step = 0
             best = value
             update_flag = True
@@ -143,7 +144,7 @@ def early_stopping(value, best, cur_step, max_step, bigger=True):
             if cur_step > max_step:
                 stop_flag = True
     else:
-        if value <= best:
+        if value <= best and abs(best - value) >= min_delta:
             cur_step = 0
             best = value
             update_flag = True

diff --git a/tests/config/test_config.py b/tests/config/test_config.py
@@ -26,6 +26,7 @@
 
 class TestConfigClass(unittest.TestCase):
     def test_default_settings(self):
+
         config = Config(model="BPR", dataset="ml-100k")
 
         self.assertEqual(config["model"], "BPR")
@@ -44,6 +45,7 @@ def test_default_settings(self):
         self.assertIsInstance(config["train_neg_sample_args"], dict)
         self.assertIsInstance(config["eval_step"], int)
         self.assertIsInstance(config["stopping_step"], int)
+        self.assertIsInstance(config['min_delta'], float)
         self.assertIsInstance(config["checkpoint_dir"], str)
 
         self.assertIsInstance(config["eval_args"], dict)

diff --git a/tests/config/test_overall.py b/tests/config/test_overall.py
@@ -91,6 +91,12 @@ def test_stopping_step(self):
         settings = {"epochs": 100}
         self.assertTrue(run_parms({"stopping_step": [0, 1, 2]}))
 
+    def test_min_delta(self):
+        settings = {
+            'epochs': 100
+        }
+        self.assertTrue(run_parms({'min_delta': [0.01, 0.001, 0.0001]}))
+
     def test_checkpoint_dir(self):
         self.assertTrue(run_parms({"checkpoint_dir": ["saved_1/", "./saved_2"]}))