Skip to content

Commit

Permalink
Merge pull request #1072 from kyuheejang/Fixing-neox-to-huggingface
Browse files Browse the repository at this point in the history
�Fixing convert neox to huggingface bug
  • Loading branch information
StellaAthena authored Nov 7, 2023
2 parents 90aa131 + 04dc2ba commit f214358
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 3 additions & 1 deletion tools/ckpts/convert_module_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,9 @@ def convert(input_checkpoint_path, loaded_config, output_checkpoint_path):
"mlp.dense_4h_to_h.bias",
"attention.dense.bias",
]:
state_dict[key] = sum([t[key] for t in loaded_tp_ranks])
state_dict[key] = sum([t[key] for t in loaded_tp_ranks]) / len(
loaded_tp_ranks
)

# Just take one
state_dict["attention.rotary_emb.inv_freq"] = loaded_tp_ranks[0][
Expand Down
4 changes: 3 additions & 1 deletion tools/ckpts/convert_sequential_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,9 @@ def convert(input_checkpoint_path, loaded_config, output_checkpoint_path):
"mlp.dense_4h_to_h.bias",
"attention.dense.bias",
]:
state_dict[key] = sum(get_state(loaded_tp_ranks, key, layer_i + 2))
state_dict[key] = sum(get_state(loaded_tp_ranks, key, layer_i + 2)) / len(
loaded_tp_ranks
)

# Just take one
state_dict["attention.rotary_emb.inv_freq"] = get_state(
Expand Down

0 comments on commit f214358

Please sign in to comment.