diff --git a/data_juicer/ops/mapper/generate_instruction_mapper.py b/data_juicer/ops/mapper/generate_instruction_mapper.py index e42f697d5..f75c54153 100644 --- a/data_juicer/ops/mapper/generate_instruction_mapper.py +++ b/data_juicer/ops/mapper/generate_instruction_mapper.py @@ -111,6 +111,10 @@ def __init__(self, super().__init__(*args, **kwargs) self.num_proc = 1 + if not seed_file: + raise ValueError('Please provide `seed_file` parameter, a file in chatml format. '\ + 'Reference data: data-juicer/demos/data/demo-dataset-chatml.jsonl ') + self.instruct_num = instruct_num self.similarity_threshold = similarity_threshold self.similarity_type = 'rouge_l'