diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md index 8188a79f6..6a9d02c9d 100644 --- a/configs/neox_arguments.md +++ b/configs/neox_arguments.md @@ -111,7 +111,7 @@ Logging Arguments - **git_hash**: str - Default = efaee8d + Default = bb1b145 current git hash of repository diff --git a/megatron/logging.py b/megatron/logging.py index 3a40864b5..afde680a5 100644 --- a/megatron/logging.py +++ b/megatron/logging.py @@ -92,17 +92,15 @@ def get_flops(neox_args, iter_time_s) -> float: hidden_size = neox_args.hidden_size num_layers = neox_args.num_layers ckpt_activations_factor = 4 if neox_args.checkpoint_activations else 3 - flops_calc1 = ( + flops_per_iteration = ( 24 * ckpt_activations_factor * batch_size * seq_len * num_layers * (hidden_size**2) - * (1.0 + (seq_len / (6.0 * hidden_size))) + * (1.0 + (seq_len / (6.0 * hidden_size)) + (vocab_size / (16.0 * num_layers * hidden_size))) ) - flops_calc2 = vocab_size / (16.0 * num_layers * hidden_size) - flops_per_iteration = flops_calc1 + flops_calc2 return flops_per_iteration / (iter_time_s * world_size)