From 637ed095a0db0cc8195a2cacf32d603a28edb6a2 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Sun, 29 Oct 2023 21:32:37 +0900 Subject: [PATCH] fix(config): Set eos/bos to tokenizer if different (#801) * fix(config): Set eos/bos to tokenizer if different * chore: fix lint --- src/axolotl/utils/models.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index d0042abc49..5ce7de5e51 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -386,6 +386,20 @@ def load_model( ) model.config.max_position_embeddings = cfg.sequence_len + if ( + hasattr(model.config, "bos_token_id") + and model.config.bos_token_id + and model.config.bos_token_id != tokenizer.bos_token_id + ): + model.config.bos_token_id = tokenizer.bos_token_id + + if ( + hasattr(model.config, "eos_token_id") + and model.config.eos_token_id + and model.config.eos_token_id != tokenizer.eos_token_id + ): + model.config.eos_token_id = tokenizer.eos_token_id + if model.device.type == "cuda": log_gpu_memory_usage(LOG, "after model load", model.device)