Skip to content

Commit

Permalink
[Infra] Disable bias weight decay and log params per optimizer group
Browse files Browse the repository at this point in the history
  • Loading branch information
albertfgu committed Jul 8, 2023
1 parent 070384a commit fffbeee
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
8 changes: 8 additions & 0 deletions configs/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ train:

ckpt: null # Resume training

optimizer_param_grouping:
bias_weight_decay: False
normalization_weight_decay: False

disable_dataset: False # Disable dataset loading
validate_at_start: false

Expand All @@ -53,6 +57,10 @@ train:
_name_: null
decay: 0.7

# PL 2.0 seems to have gotten rid of the trainer.track_grad_norm flag
# We have a custom Callback (TrackNorms) that implements something similar
track_grad_norms: False

tolerance: # fault tolerance for training on preemptible machines
logdir: ./resume
id: null # must be set to resume training on preemption
Expand Down
2 changes: 2 additions & 0 deletions src/utils/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,11 @@ def log_optimizer(logger, optimizer, keys):
keys = sorted(keys)
for i, g in enumerate(optimizer.param_groups):
group_hps = {k: g.get(k, None) for k in keys}
n_params = sum(p.numel() for p in g['params'])
logger.info(' | '.join([
f"Optimizer group {i}",
f"{len(g['params'])} tensors",
f"{n_params} parameters",
] + [f"{k} {v}" for k, v in group_hps.items()]))
# print(f"Optimizer group {i} | {len(g['params'])} tensors | lr {g['lr']} | wd {g.get('weight_decay', None)}")

Expand Down

0 comments on commit fffbeee

Please sign in to comment.