From 7548a8b76426fbf64e52343ad1846022793de58d Mon Sep 17 00:00:00 2001 From: AI-WAIFU <67525070+AI-WAIFU@users.noreply.github.com> Date: Thu, 5 Sep 2024 21:16:02 +0100 Subject: [PATCH] add assert for missing tokenizer_type in config (#1267) --- megatron/tokenizer/tokenizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/megatron/tokenizer/tokenizer.py b/megatron/tokenizer/tokenizer.py index 348c7cefe..e450504c8 100644 --- a/megatron/tokenizer/tokenizer.py +++ b/megatron/tokenizer/tokenizer.py @@ -31,6 +31,8 @@ def build_tokenizer(args): """Initialize tokenizer.""" if args.rank == 0: print("> building {} tokenizer ...".format(args.tokenizer_type), flush=True) + + assert args.tokenizer_type is not None, "tokenizer_type must be specified in the .yml config" # Select and instantiate the tokenizer. if args.tokenizer_type.lower() == "GPT2BPETokenizer".lower():