From c582bf8e0e0df0b6c9f805ca66d9a28fad2f9672 Mon Sep 17 00:00:00 2001 From: AI_WAIFU Date: Wed, 4 Sep 2024 18:27:25 +0100 Subject: [PATCH] add assert for missing tokenizer_type in config --- megatron/tokenizer/tokenizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/megatron/tokenizer/tokenizer.py b/megatron/tokenizer/tokenizer.py index 348c7cefe..e450504c8 100644 --- a/megatron/tokenizer/tokenizer.py +++ b/megatron/tokenizer/tokenizer.py @@ -31,6 +31,8 @@ def build_tokenizer(args): """Initialize tokenizer.""" if args.rank == 0: print("> building {} tokenizer ...".format(args.tokenizer_type), flush=True) + + assert args.tokenizer_type is not None, "tokenizer_type must be specified in the .yml config" # Select and instantiate the tokenizer. if args.tokenizer_type.lower() == "GPT2BPETokenizer".lower():