From c4477a577302c6aecd35d5c6cb7aa872e4e75ab9 Mon Sep 17 00:00:00 2001 From: Xu Song Date: Wed, 20 Sep 2023 11:18:28 +0800 Subject: [PATCH 1/2] Fix register_buffer parameter --- tools/convert_module_to_hf.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/convert_module_to_hf.py b/tools/convert_module_to_hf.py index 905bdfa16..2cbf390b9 100644 --- a/tools/convert_module_to_hf.py +++ b/tools/convert_module_to_hf.py @@ -227,10 +227,12 @@ def convert(input_checkpoint_path, loaded_config, output_checkpoint_path): state_dict["attention.rotary_emb.inv_freq"] = loaded_tp_ranks[0][ "attention.rotary_emb.inv_freq" ] - state_dict["attention.bias"] = hf_layer.state_dict()["attention.bias"] - state_dict["attention.masked_bias"] = hf_layer.state_dict()[ - "attention.masked_bias" - ] + if "attention.bias" in hf_layer.state_dict(): + state_dict["attention.bias"] = hf_layer.state_dict()["attention.bias"] + if "attention.masked_bias" in hf_layer.state_dict(): + state_dict["attention.masked_bias"] = hf_layer.state_dict()[ + "attention.masked_bias" + ] # load state_dict into layer hf_layer.load_state_dict(state_dict) From a157c6e79b4ae621bac82baff26601b6c29e5cc4 Mon Sep 17 00:00:00 2001 From: Xu Song Date: Wed, 20 Sep 2023 11:22:12 +0800 Subject: [PATCH 2/2] Fix register_buffer parameter --- tools/convert_sequential_to_hf.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/convert_sequential_to_hf.py b/tools/convert_sequential_to_hf.py index 5a66219bf..13b92437d 100644 --- a/tools/convert_sequential_to_hf.py +++ b/tools/convert_sequential_to_hf.py @@ -247,10 +247,12 @@ def convert(input_checkpoint_path, loaded_config, output_checkpoint_path): loaded_tp_ranks, "attention.rotary_emb.inv_freq", layer_i + 2 )[0] - state_dict["attention.bias"] = hf_layer.state_dict()["attention.bias"] - state_dict["attention.masked_bias"] = hf_layer.state_dict()[ - "attention.masked_bias" - ] + if "attention.bias" in hf_layer.state_dict(): + state_dict["attention.bias"] = hf_layer.state_dict()["attention.bias"] + if "attention.masked_bias" in hf_layer.state_dict(): + state_dict["attention.masked_bias"] = hf_layer.state_dict()[ + "attention.masked_bias" + ] # load state_dict into layer hf_layer.load_state_dict(state_dict)