diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md index 1ef203ecd..985a6b8aa 100644 --- a/configs/neox_arguments.md +++ b/configs/neox_arguments.md @@ -111,7 +111,7 @@ Logging Arguments - **git_hash**: str - Default = 66d0666 + Default = 8ebf7c6 current git hash of repository diff --git a/megatron/neox_arguments/neox_args.py b/megatron/neox_arguments/neox_args.py index e1a58b6d9..8d0953da2 100644 --- a/megatron/neox_arguments/neox_args.py +++ b/megatron/neox_arguments/neox_args.py @@ -390,7 +390,7 @@ class NeoXArgsOptimizer(NeoXArgsTemplate): "adam", "onebitadam", "cpu_adam", "cpu_torch_adam", "sm3", "madgrad_wd", "sgd", "lion" ] = "adam" """ - Type of optimizer to use. Choose from ['adam', 'onebitadam', 'cpu_adam', 'cpu_torch_adam', 'sm3', 'madgrad_wd', 'sgd'] + Type of optimizer to use. Choose from ['adam', 'onebitadam', 'cpu_adam', 'cpu_torch_adam', 'sm3', 'madgrad_wd', 'sgd', 'lion'] NOTE: sgd will use MuSGD from Mup. Mup must be enabled for this optimizer. """