diff --git a/data_juicer/ops/mapper/nlpaug_en_mapper.py b/data_juicer/ops/mapper/nlpaug_en_mapper.py index 6a5148c7b..8509c1ba0 100644 --- a/data_juicer/ops/mapper/nlpaug_en_mapper.py +++ b/data_juicer/ops/mapper/nlpaug_en_mapper.py @@ -125,7 +125,7 @@ def process(self, samples): if key == self.text_key: res_samples[self.text_key] += aug_texts else: - res_samples[key] += res_samples[key] * self.aug_num + res_samples[key] += res_samples[key] * len(aug_texts) else: # apply each aug method to generate several augmented texts for aug_method in self.aug: @@ -134,6 +134,6 @@ def process(self, samples): # add other replicate fields for key in res_samples: if key != self.text_key: - res_samples[key] += res_samples[key] * self.aug_num \ - * len(self.aug) + res_samples[key] = res_samples[key] * \ + len(res_samples[self.text_key]) return res_samples