Skip to content

Commit

Permalink
Update neox_args.py
Browse files Browse the repository at this point in the history
Changed some default values to correspond to values that we generally recommend people use.
  • Loading branch information
StellaAthena authored Dec 26, 2023
1 parent 1148a0f commit 31cb364
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions megatron/neox_arguments/neox_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ class NeoXArgsTraining(NeoXArgsTemplate):
See https://arxiv.org/abs/1911.02116 for more details
"""

weighted_sampler_alpha: float = 0.3
weighted_sampler_alpha: float = 1.0
"""
Alpha value for `weight_by_num_documents`. Only has an effect if `weight_by_num_documents` = True.
Expand Down Expand Up @@ -923,17 +923,17 @@ class NeoXArgsTraining(NeoXArgsTemplate):
Exit the program after the iteration is divisible by this value.
"""

attention_dropout: float = 0.1
attention_dropout: float = 0.0
"""
Post attention dropout probability.
"""

hidden_dropout: float = 0.1
hidden_dropout: float = 0.0
"""
Dropout probability for hidden state transformer.
"""

weight_decay: float = 0.01
weight_decay: float = 0.1
"""
Weight decay coefficient for L2 regularization.
"""
Expand Down Expand Up @@ -982,7 +982,7 @@ class NeoXArgsTraining(NeoXArgsTemplate):
gas: int = None
"""gradient_accumulation_steps""" # TODO this is a duplicate, remove?

clip_grad: float = None
clip_grad: float = 1.0
"""
Gradient clipping based on global L2 norm.
"""
Expand Down

0 comments on commit 31cb364

Please sign in to comment.