Skip to content

Commit

Permalink
* fix bug in nlpaug_en_mapper: nlpaug could generate an indefinite nu…
Browse files Browse the repository at this point in the history
…mber of augmented samples
  • Loading branch information
HYLcool committed Nov 15, 2023
1 parent de984f3 commit baddb14
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions data_juicer/ops/mapper/nlpaug_en_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def process(self, samples):
if key == self.text_key:
res_samples[self.text_key] += aug_texts
else:
res_samples[key] += res_samples[key] * self.aug_num
res_samples[key] += res_samples[key] * len(aug_texts)
else:
# apply each aug method to generate several augmented texts
for aug_method in self.aug:
Expand All @@ -134,6 +134,6 @@ def process(self, samples):
# add other replicate fields
for key in res_samples:
if key != self.text_key:
res_samples[key] += res_samples[key] * self.aug_num \
* len(self.aug)
res_samples[key] = res_samples[key] * \
len(res_samples[self.text_key])
return res_samples

0 comments on commit baddb14

Please sign in to comment.