From 01e74f472c8a361f1db4636f01c5c50ba1774bd7 Mon Sep 17 00:00:00 2001 From: Quentin Anthony Date: Sun, 8 Sep 2024 17:28:35 -0700 Subject: [PATCH] hotfix activation typo from https://github.com/EleutherAI/gpt-neox/pull/1212 (#1271) --- megatron/model/transformer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/megatron/model/transformer.py b/megatron/model/transformer.py index d2b93eb06..f14076a17 100644 --- a/megatron/model/transformer.py +++ b/megatron/model/transformer.py @@ -979,6 +979,7 @@ def __init__( self.gpt_j_tied = neox_args.gpt_j_tied self.mlp_type = neox_args.mlp_type self.moe_type = neox_args.moe_type + self.activation = neox_args.activation if self.gpt_j_residual: # GPT-J style layers allow us to defer the reduction of results across TP ranks until the end of the two sublayers.