Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding min_delta setting for early stopping in training settings #1328

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/user_guide/config/training_settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Training settings are designed to set parameters about model training.
evaluated on the valid dataset. Defaults to ``1``.
- ``stopping_step (int)`` : The threshold for validation-based early stopping.
Defaults to ``10``.
- ``min_delta (float)`` : any change in the evaluation measure, no matter how fractional, will be considered an improvement. This parameter allows you to set the minimum amount of difference between the best value obtained and the new value that represents an improvement.
Defaults to ``0.001``.
- ``clip_grad_norm (dict)`` : The args of `clip_grad_norm_ <https://pytorch.org/docs/stable/generated/torch.nn.utils.clip_grad_norm_.html>`_
which will clip gradient norm of model. Defaults to ``None``.
- ``loss_decimal_place(int)``: The decimal place of training loss. Defaults to ``4``.
Expand Down
2 changes: 2 additions & 0 deletions recbole/properties/overall.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ log_wandb: False # (bool) Whether or not to use Weights & Biases(
wandb_project: 'recbole' # (str) The project to conduct experiments in W&B.
shuffle: True # (bool) Whether or not to shuffle the training data before each epoch.

=======
# Training Settings
epochs: 300 # (int) The number of training epochs.
train_batch_size: 2048 # (int) The training batch size.
Expand All @@ -29,6 +30,7 @@ train_neg_sample_args: # (dict) Negative sampling configuration for mod
candidate_num: 0 # (int) The number of candidate negative items when dynamic negative sampling.
eval_step: 1 # (int) The number of training epochs before an evaluation on the valid dataset.
stopping_step: 10 # (int) The threshold for validation-based early stopping.
min_delta: 0.001 # (float) The min value change in the evaluation measure, no matter how fractional, will be considered an improvement.
clip_grad_norm: ~ # (dict) The args of clip_grad_norm_ which will clip gradient norm of model.
weight_decay: 0.0 # (float) The weight decay value (L2 penalty) for optimizers.
loss_decimal_place: 4 # (int) The decimal place of training loss.
Expand Down
11 changes: 11 additions & 0 deletions recbole/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def __init__(self, config, model):
self.logger = getLogger()
self.tensorboard = get_tensorboard(self.logger)
self.wandblogger = WandbLogger(config)

self.learner = config["learner"]
self.learning_rate = config["learning_rate"]
self.epochs = config["epochs"]
Expand All @@ -122,12 +123,14 @@ def __init__(self, config, model):
self.clip_grad_norm = config["clip_grad_norm"]
self.valid_metric = config["valid_metric"].lower()
self.valid_metric_bigger = config["valid_metric_bigger"]
self.min_delta = config['min_delta']
self.test_batch_size = config["eval_batch_size"]
self.gpu_available = torch.cuda.is_available() and config["use_gpu"]
self.device = config["device"]
self.checkpoint_dir = config["checkpoint_dir"]
self.enable_amp = config["enable_amp"]
self.enable_scaler = torch.cuda.is_available() and config["enable_scaler"]

ensure_dir(self.checkpoint_dir)
saved_model_file = "{}-{}.pth".format(self.config["model"], get_local_time())
self.saved_model_file = os.path.join(self.checkpoint_dir, saved_model_file)
Expand Down Expand Up @@ -474,6 +477,8 @@ def fit(
self.cur_step,
max_step=self.stopping_step,
bigger=self.valid_metric_bigger,
min_delta=self.min_delta

)
valid_end_time = time()
valid_score_output = (
Expand Down Expand Up @@ -910,6 +915,8 @@ def __init__(self, config, model):

self.stopping_step = config["stopping_step"]
self.valid_metric_bigger = config["valid_metric_bigger"]
self.min_delta = config['min_delta']

self.cur_step = 0
self.best_valid_score = -np.inf if self.valid_metric_bigger else np.inf
self.best_valid_result = None
Expand Down Expand Up @@ -1018,6 +1025,8 @@ def fit(
self.cur_step,
max_step=self.stopping_step,
bigger=self.valid_metric_bigger,
min_delta=self.min_delta

)

valid_end_time = time()
Expand Down Expand Up @@ -1383,6 +1392,8 @@ def fit(
self.cur_step,
max_step=self.stopping_step,
bigger=self.valid_metric_bigger,
min_delta=self.min_delta

)
valid_end_time = time()
valid_score_output = (
Expand Down
3 changes: 1 addition & 2 deletions recbole/utils/argument_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
training_arguments = [
'epochs', 'train_batch_size',
'learner', 'learning_rate',
'train_neg_sample_args',
'eval_step', 'stopping_step',
'eval_step', 'stopping_step', 'min_delta',
'clip_grad_norm',
'weight_decay',
'loss_decimal_place',
Expand Down
7 changes: 4 additions & 3 deletions recbole/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,10 @@ def get_trainer(model_type, model_name):
return getattr(importlib.import_module("recbole.trainer"), "Trainer")


def early_stopping(value, best, cur_step, max_step, bigger=True):
def early_stopping(value, best, cur_step, max_step, bigger=True, min_delta=0.001):
r"""validation-based early stopping


Args:
value (float): current result
best (float): best result
Expand All @@ -134,7 +135,7 @@ def early_stopping(value, best, cur_step, max_step, bigger=True):
stop_flag = False
update_flag = False
if bigger:
if value >= best:
if value >= best and abs(value - best) >= min_delta:
cur_step = 0
best = value
update_flag = True
Expand All @@ -143,7 +144,7 @@ def early_stopping(value, best, cur_step, max_step, bigger=True):
if cur_step > max_step:
stop_flag = True
else:
if value <= best:
if value <= best and abs(best - value) >= min_delta:
cur_step = 0
best = value
update_flag = True
Expand Down
2 changes: 2 additions & 0 deletions tests/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

class TestConfigClass(unittest.TestCase):
def test_default_settings(self):

config = Config(model="BPR", dataset="ml-100k")

self.assertEqual(config["model"], "BPR")
Expand All @@ -44,6 +45,7 @@ def test_default_settings(self):
self.assertIsInstance(config["train_neg_sample_args"], dict)
self.assertIsInstance(config["eval_step"], int)
self.assertIsInstance(config["stopping_step"], int)
self.assertIsInstance(config['min_delta'], float)
self.assertIsInstance(config["checkpoint_dir"], str)

self.assertIsInstance(config["eval_args"], dict)
Expand Down
6 changes: 6 additions & 0 deletions tests/config/test_overall.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ def test_stopping_step(self):
settings = {"epochs": 100}
self.assertTrue(run_parms({"stopping_step": [0, 1, 2]}))

def test_min_delta(self):
settings = {
'epochs': 100
}
self.assertTrue(run_parms({'min_delta': [0.01, 0.001, 0.0001]}))

def test_checkpoint_dir(self):
self.assertTrue(run_parms({"checkpoint_dir": ["saved_1/", "./saved_2"]}))

Expand Down