Skip to content

Commit

Permalink
+ add visualization graphs for monitor results
Browse files Browse the repository at this point in the history
  • Loading branch information
HYLcool committed Oct 10, 2024
1 parent 06b12c8 commit f65c02c
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
7 changes: 6 additions & 1 deletion data_juicer/core/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,13 @@ def process(self,
dataset.cleanup_cache_files()
checkpointer.save_ckpt(dataset)
if work_dir:
with open(os.path.join(work_dir, 'monitor.json'), 'w') as out:
monitor_dir = os.path.join(work_dir, 'monitor')
os.makedirs(monitor_dir, exist_ok=True)
with open(os.path.join(monitor_dir, 'monitor.json'),
'w') as out:
json.dump(resource_util_list, out)
Monitor.draw_resource_util_graph(resource_util_list,
monitor_dir)
return dataset

def map(self, *args, **kargs):
Expand Down
24 changes: 24 additions & 0 deletions data_juicer/core/monitor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import time
from functools import partial
from multiprocessing import get_context
Expand Down Expand Up @@ -28,6 +29,7 @@ class Monitor:
'''python
{
'time': 10,
'sampling interval': 0.5,
'resource': [
{
'timestamp': xxx,
Expand All @@ -50,6 +52,7 @@ class Monitor:
'''python
{
'time': 10,
'sampling interval': 0.5,
'resource': [...],
'resource_analysis': {
'GPU free mem.': {
Expand Down Expand Up @@ -118,6 +121,24 @@ def monitor_current_resources():

return resource_dict

@staticmethod
def draw_resource_util_graph(resource_util_list, store_dir):
import matplotlib.pyplot as plt
for idx, resource_util_dict in enumerate(resource_util_list):
resource_list = resource_util_dict['resource']
interval = resource_util_dict['sampling interval']
for focus_metric in Monitor.DYNAMIC_FIELDS:
fn = f'func_{idx}_{focus_metric.replace(" ", "_")}.jpg'
ylbl = '%' if focus_metric.endswith('util.') else 'MB'
metric_list = [item[focus_metric] for item in resource_list]
plt.plot([i * interval for i in range(len(metric_list))],
metric_list)
plt.title(focus_metric)
plt.xlabel('Time (s)')
plt.ylabel(ylbl)
plt.savefig(os.path.join(store_dir, fn), bbox_inches='tight')
plt.clf()

@staticmethod
def analyze_resource_util_list(resource_util_list):
"""
Expand Down Expand Up @@ -209,6 +230,9 @@ def monitor_func(func, args=None, sample_interval=0.5):

resource_util_dict['resource'] = mdict['resource']

# record interval
resource_util_dict['sampling interval'] = sample_interval

# calculate speed
resource_util_dict['time'] = end - start

Expand Down

0 comments on commit f65c02c

Please sign in to comment.