From bb55332259943c53958e0d1d0fa3e599698403e6 Mon Sep 17 00:00:00 2001 From: "lielin.hyl" Date: Thu, 16 Nov 2023 12:19:46 +0800 Subject: [PATCH] * opt: convert relative paths only when it's necessary --- data_juicer/format/formatter.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/data_juicer/format/formatter.py b/data_juicer/format/formatter.py index 86ea481db..c47ad2414 100644 --- a/data_juicer/format/formatter.py +++ b/data_juicer/format/formatter.py @@ -208,11 +208,16 @@ def non_empty_text(sample, target_keys): # 3. convert relative paths to absolute paths if global_cfg: + ds_dir = global_cfg.dataset_dir + image_key = global_cfg.image_key + + if image_key not in dataset.features: + # no image path list in dataset, no need to convert + return dataset + logger.info('Converting relative paths in the dataset to their ' 'absolute version. (Based on the directory of input ' 'dataset file)') - ds_dir = global_cfg.dataset_dir - image_key = global_cfg.image_key # function to convert relative paths to absolute paths def rel2abs(sample, path_keys, dataset_dir):