diff --git a/megatron/tokenizer/tokenizer.py b/megatron/tokenizer/tokenizer.py index 348c7cefe..e450504c8 100644 --- a/megatron/tokenizer/tokenizer.py +++ b/megatron/tokenizer/tokenizer.py @@ -31,6 +31,8 @@ def build_tokenizer(args): """Initialize tokenizer.""" if args.rank == 0: print("> building {} tokenizer ...".format(args.tokenizer_type), flush=True) + + assert args.tokenizer_type is not None, "tokenizer_type must be specified in the .yml config" # Select and instantiate the tokenizer. if args.tokenizer_type.lower() == "GPT2BPETokenizer".lower():