diff --git a/megatron/data/gpt2_dataset.py b/megatron/data/gpt2_dataset.py index e37c558d2..c4729cc3e 100644 --- a/megatron/data/gpt2_dataset.py +++ b/megatron/data/gpt2_dataset.py @@ -123,7 +123,7 @@ def __getitem__(self, idx): samples.append( dataset.get( self.doc_idx[doc_index_f], - offset=offset_l, + offset=offset_f, length=offset_l - offset_f + 1, ) )