try sequential sampling

huggingface · Dec 22, 2023 · 85b7205 · 85b7205
1 parent 3a1be44
commit 85b7205
Showing 1 changed file with 6 additions and 1 deletion.
diff --git a/scripts/train_jat_tokenized.py b/scripts/train_jat_tokenized.py
@@ -73,6 +73,11 @@ class DataTrainingArguments:
 os.environ["WANDB_PROJECT"] = "jat"
 
 
+class MyTrainer(Trainer):
+    def _get_train_sampler(self) -> None:
+        return None
+
+
 def main():
     parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
 
@@ -148,7 +153,7 @@ def main():
         raise ValueError("Make sure to pass `--dispatch_batches False`.")
 
     # Why the training continue after exauhsting the dataset? https://github.com/huggingface/transformers/issues/26635
-    trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, tokenizer=processor)
+    trainer = MyTrainer(model=model, args=training_args, train_dataset=train_dataset, tokenizer=processor)
     trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)