Skip to content

Commit

Permalink
bugfix for un-aligned types (int v.s. float) caused by two OPs (#147)
Browse files Browse the repository at this point in the history
* - bugfix for un-aligned types (int v.s. float) caused by two OPs
- improve the usability for logfile_name when running several exps in the same workdir

* - bugfix for un-aligned types (int v.s. float) caused by two OPs
- improve the usability for logfile_name when running several exps in the same workdir

* - bugfix for un-aligned types (int v.s. float) caused by two OPs
- improve the usability for logfile_name when running several exps in the same workdir
  • Loading branch information
yxdyc authored Dec 21, 2023
1 parent c85fd47 commit b53d2dc
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions data_juicer/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,14 +324,14 @@ def init_setup_from_cfg(cfg):
"""

cfg.export_path = os.path.abspath(cfg.export_path)
export_path = cfg.export_path
cfg.work_dir = os.path.dirname(export_path)
cfg.work_dir = os.path.dirname(cfg.export_path)
export_rel_path = os.path.relpath(cfg.export_path, start=cfg.work_dir)
log_dir = os.path.join(cfg.work_dir, 'log')
if not os.path.exists(log_dir):
os.makedirs(log_dir, exist_ok=True)
timestamp = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
cfg.timestamp = timestamp
logfile_name = timestamp + '.txt'
logfile_name = f'export_{export_rel_path}_time_{timestamp}.txt'
setup_logger(save_dir=log_dir,
filename=logfile_name,
redirect=cfg.executor_type == 'default')
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/maximum_line_length_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def compute_stats(self, sample, context=False):
sample[Fields.context][context_key] = lines
line_lengths = list(map(len, lines))
sample[Fields.stats][StatsKeys.max_line_length] = max(
line_lengths) if line_lengths else 0.0
line_lengths) if line_lengths else 0
return sample

def process(self, sample):
Expand Down

0 comments on commit b53d2dc

Please sign in to comment.