From e277bc71f4aa88bdd213c0c68d7c490f2165c9e0 Mon Sep 17 00:00:00 2001 From: Jacob Hatef <74274091+jahatef@users.noreply.github.com> Date: Mon, 30 Oct 2023 21:48:59 -0400 Subject: [PATCH 1/2] fix lion optimizer documentation (#1067) * fix lion optimizer documentation * Update NeoXArgs docs automatically --------- Co-authored-by: github-actions --- configs/neox_arguments.md | 2 +- megatron/neox_arguments/neox_args.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md index 1f3511456..c88f90e5d 100644 --- a/configs/neox_arguments.md +++ b/configs/neox_arguments.md @@ -111,7 +111,7 @@ Logging Arguments - **git_hash**: str - Default = a97bd1f + Default = 8ebf7c6 current git hash of repository diff --git a/megatron/neox_arguments/neox_args.py b/megatron/neox_arguments/neox_args.py index e1a58b6d9..8d0953da2 100644 --- a/megatron/neox_arguments/neox_args.py +++ b/megatron/neox_arguments/neox_args.py @@ -390,7 +390,7 @@ class NeoXArgsOptimizer(NeoXArgsTemplate): "adam", "onebitadam", "cpu_adam", "cpu_torch_adam", "sm3", "madgrad_wd", "sgd", "lion" ] = "adam" """ - Type of optimizer to use. Choose from ['adam', 'onebitadam', 'cpu_adam', 'cpu_torch_adam', 'sm3', 'madgrad_wd', 'sgd'] + Type of optimizer to use. Choose from ['adam', 'onebitadam', 'cpu_adam', 'cpu_torch_adam', 'sm3', 'madgrad_wd', 'sgd', 'lion'] NOTE: sgd will use MuSGD from Mup. Mup must be enabled for this optimizer. """ From f574f22bbfaa65a81a668e96cbb9a71283460db6 Mon Sep 17 00:00:00 2001 From: Quentin Anthony Date: Mon, 30 Oct 2023 21:50:27 -0400 Subject: [PATCH 2/2] Fix preprocess_data.py link (#1064) * Fix preprocess_data.py link * Update NeoXArgs docs automatically * Update NeoXArgs docs automatically --------- Co-authored-by: github-actions --- configs/neox_arguments.md | 2 +- tools/datasets/corpora.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md index c88f90e5d..2d7794105 100644 --- a/configs/neox_arguments.md +++ b/configs/neox_arguments.md @@ -111,7 +111,7 @@ Logging Arguments - **git_hash**: str - Default = 8ebf7c6 + Default = 2ff807d current git hash of repository diff --git a/tools/datasets/corpora.py b/tools/datasets/corpora.py index 35977b908..9056b8f97 100644 --- a/tools/datasets/corpora.py +++ b/tools/datasets/corpora.py @@ -141,7 +141,7 @@ def tokenize(self): [os.path.join(parent_folder, os.path.basename(url)) for url in self.urls] ) - cmd = f"python tools/preprocess_data.py \ + cmd = f"python tools/datasets/preprocess_data.py \ --input {jsonl_filepath} \ --output-prefix {parent_folder}/{self.name} \ --vocab {self.vocab_file} \