Skip to content

Commit

Permalink
Link to gia-dataset for oscar (instead of Clément's repo) (#140)
Browse files Browse the repository at this point in the history
  • Loading branch information
qgallouedec authored Nov 22, 2023
1 parent 5b36e2f commit f7ca594
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 8 deletions.
5 changes: 1 addition & 4 deletions scripts/download_all_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,5 @@
print(f"Loading {task}...")
cache_path = f"{HF_DATASETS_CACHE}/gia-project/gia-dataset/{task}"
if not os.path.exists(cache_path):
if task == "oscar":
dataset = load_dataset("ClementRomac/cleaned_deduplicated_oscar")
else:
dataset = load_dataset("gia-project/gia-dataset", task)
dataset = load_dataset("gia-project/gia-dataset", task)
dataset.save_to_disk(cache_path)
5 changes: 1 addition & 4 deletions scripts/train_gia.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,7 @@ def main():
dataset_dict[task] = {s: d.to_iterable_dataset() for s, d in dataset.items()}
else:
for task in tasks:
if task == "oscar":
dataset_dict[task] = load_dataset("ClementRomac/cleaned_deduplicated_oscar", streaming=True)
else:
dataset_dict[task] = load_dataset("gia-project/gia-dataset", task, streaming=True)
dataset_dict[task] = load_dataset("gia-project/gia-dataset", task, streaming=True)

# Preprocess the dataset
for task in dataset_dict.keys():
Expand Down

0 comments on commit f7ca594

Please sign in to comment.