Skip to content

Commit

Permalink
Merge branch 'huggingface:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
ischlag authored Jul 1, 2024
2 parents adde82a + ee785d6 commit 15fc6a2
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 3 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/trufflehog.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
on:
push:

name: Secret Leaks

jobs:
trufflehog:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Secret Scanning
uses: trufflesecurity/trufflehog@main
1 change: 1 addition & 0 deletions examples/llama/convert_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def get_config_mapping(nt_to_hf: bool = True) -> dict[str, str]:
"pretraining_tp": "pretraining_tp",
"rms_norm_eps": "rms_norm_eps",
"rope_scaling": "rope_scaling",
"rope_theta": "rope_theta",
"tie_word_embeddings": "tie_word_embeddings",
"use_cache": "use_cache",
"vocab_size": "vocab_size",
Expand Down
6 changes: 3 additions & 3 deletions src/nanotron/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ class LRSchedulerArgs:
lr_warmup_steps: number of steps to warmup the learning rate
lr_warmup_style: linear or constant
lr_decay_style: linear or cosine
lr_decay_style: linear, cosine or 1-sqrt
min_decay_lr: minimum learning rate after decay
lr_decay_steps: optional number of steps to decay the learning rate otherwise will default to train_steps - lr_warmup_steps
lr_decay_starting_step: optional number of steps to decay the learning rate otherwise will default to train_steps - lr_warmup_steps
Expand All @@ -272,9 +272,9 @@ def __post_init__(self):
self.lr_warmup_style = "linear"
if self.lr_decay_style is None:
self.lr_decay_style = "linear"
if self.lr_decay_style not in ["linear", "cosine"]:
if self.lr_decay_style not in ["linear", "cosine", "1-sqrt"]:
raise ValueError(
f"lr_decay_style should be a string selected in ['linear', 'cosine'] and not {self.lr_decay_style}"
f"lr_decay_style should be a string selected in ['linear', 'cosine', '1-sqrt'] and not {self.lr_decay_style}"
)
if self.min_decay_lr is None:
self.min_decay_lr = self.learning_rate
Expand Down
6 changes: 6 additions & 0 deletions src/nanotron/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ def lr_lambda(current_step: int, initial_lr: float):
* (lr_decay_steps - (current_step - lr_decay_starting_step))
/ lr_decay_steps
)
elif lr_scheduler_args.lr_decay_style == "1-sqrt":
lmbda = (
lr_scheduler_args.min_decay_lr
+ (initial_lr - lr_scheduler_args.min_decay_lr)
* (1 - math.sqrt((current_step - lr_decay_starting_step) / lr_decay_steps))
)
else:
raise ValueError(f"Unknown decay style {lr_scheduler_args.lr_decay_style}")

Expand Down

0 comments on commit 15fc6a2

Please sign in to comment.