Skip to content

Commit

Permalink
fix: missing args in load_formatter of Analyzer
Browse files Browse the repository at this point in the history
  • Loading branch information
zhijianma committed Sep 9, 2024
1 parent 387e775 commit b1fe01d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
9 changes: 6 additions & 3 deletions data_juicer/core/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@ def __init__(self, cfg=None):

# setup formatter
logger.info('Setting up data formatter...')
self.formatter = load_formatter(self.cfg.dataset_path,
self.cfg.text_keys, self.cfg.suffixes,
self.cfg.add_suffix)
self.formatter = load_formatter(
dataset_path=self.cfg.dataset_path,
generated_dataset_config=self.cfg.generated_dataset_config,
text_keys=self.cfg.text_keys,
suffixes=self.cfg.suffixes,
add_suffix=self.cfg.add_suffix)

# prepare exporter and check export path suffix
# NOTICE: no need to export dataset texts for analyzer
Expand Down
10 changes: 6 additions & 4 deletions data_juicer/core/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,12 @@ def __init__(self, cfg=None):

# setup formatter
logger.info('Setting up data formatter...')
self.formatter = load_formatter(self.cfg.dataset_path,
self.cfg.generated_dataset_config,
self.cfg.text_keys, self.cfg.suffixes,
self.cfg.add_suffix)
self.formatter = load_formatter(
dataset_path=self.cfg.dataset_path,
generated_dataset_config=self.cfg.generated_dataset_config,
text_keys=self.cfg.text_keys,
suffixes=self.cfg.suffixes,
add_suffix=self.cfg.add_suffix)

# whether to use checkpoint mechanism. If it's true, Executor will
# check if there are existing checkpoints first and try to load the
Expand Down

0 comments on commit b1fe01d

Please sign in to comment.