Skip to content

Commit

Permalink
* set mp method for each OP during probing as well
Browse files Browse the repository at this point in the history
  • Loading branch information
HYLcool committed Sep 25, 2024
1 parent 784d21c commit 06b12c8
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions data_juicer/core/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from datasets.config import DEFAULT_MAX_BATCH_SIZE

from data_juicer.core.monitor import Monitor
from data_juicer.ops import UNFORKABLE
from data_juicer.utils.process_utils import setup_mp


class Adapter:
Expand Down Expand Up @@ -34,7 +36,12 @@ def execute_and_probe(dataset, operators, sample_interval=0.5):
# resource utilization list
resource_util_list = []
# probe for each OP
unforkable_operators = set(UNFORKABLE.modules.keys())
for op in operators:
# select suitable mp method for each OP
mp_context = ['forkserver', 'spawn'] if (
op.use_cuda() or op._name in unforkable_operators) else None
setup_mp(mp_context)
# expand the test dataset according to the runtime number of
# processes to ensure enough data for a batch and probe the true
# resource utilization for each OP
Expand Down

0 comments on commit 06b12c8

Please sign in to comment.