From 35a4bcb08984f341d0a16a61e35055784bbd099e Mon Sep 17 00:00:00 2001 From: evannorstrand-mp <105453591+evannorstrand-mp@users.noreply.github.com> Date: Thu, 25 May 2023 09:55:10 -0400 Subject: [PATCH] Update build_dataset.py to fix TOKENIZER --- Andromeda/build_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Andromeda/build_dataset.py b/Andromeda/build_dataset.py index 5e0a02c..8902ae1 100644 --- a/Andromeda/build_dataset.py +++ b/Andromeda/build_dataset.py @@ -14,7 +14,7 @@ class CFG: DATASET_NAME: str = "EleutherAI/the_pile_deduplicated" def built_dataset(args): - tokenizer = AutoTokenizer.from_pretrained(CFG.Tokenizer) + tokenizer = AutoTokenizer.from_pretrained(CFG.TOKENIZER) train_dataset = load_dataset(CFG.DATASET_NAME, split="train")