Skip to content

Commit

Permalink
* support OP fusion based on probed speed of each OP
Browse files Browse the repository at this point in the history
  • Loading branch information
HYLcool committed Sep 18, 2024
1 parent 5a6c071 commit f6e017f
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions data_juicer/core/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ def execute_and_probe(dataset, operators, sample_interval=0.5):
resource_util_list = []
# probe for each OP
for op in operators:
# set num_proc to 1 for each OP to focus on the influence of batch
# size only.
old_num_proc = op.num_proc
op.num_proc = 1

# number of test samples
sample_num = len(dataset)
# run single op and monitor the resource utilization
Expand All @@ -42,6 +47,9 @@ def execute_and_probe(dataset, operators, sample_interval=0.5):
'speed'] = sample_num / resource_util_per_op['time']
resource_util_list.append(resource_util_per_op)

# restore to the original num_proc
op.num_proc = old_num_proc

return resource_util_list

@staticmethod
Expand Down

0 comments on commit f6e017f

Please sign in to comment.