-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_plot_files.py
369 lines (325 loc) · 13.6 KB
/
create_plot_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
import glob
import re
import argparse
import bisect
import sys
import numpy
acceptable_queries = ['Pass', 'Target', 'Queue', 'Bytes_Xfered', 'Ops'
'Elapsed', 'Bandwidth', 'IOPS', 'Latency', 'Pct_CPU',
'Xfer_Size']
def get_total_num_passes(basedir):
num_passes = 0
# Just grabbing the first file to find total number of passes
file_names = glob.glob(basedir + '/' + '*.txt')
with open(file_names[0],'r') as fp:
xdd_input = fp.readlines()
# Now finding out the number of passes. The issue is if xdd
# was not run with verbose, then there is only 1 combined
# pass output. If not, there are multiple passes to account
# for.
for line in xdd_input:
if 'TARGET_PASS' in line:
num_passes += 1
# If there is only 1 combined pass, then we just set to one
if num_passes == 0:
num_passes = 1
return num_passes
def is_passes_xdd_data(basedir):
# Just grabbing the first file to figure out if it is a pass or run file
first_file_name = ''
file_names = glob.glob(basedir + '/' + '*.txt')
for file_name in file_names:
first_file_name = file_name
break
# Checking to see if file name as passes
if 'passes' in first_file_name:
return True
else:
# Assumed here that the file name contains runs
return False
# Getting the threads used for each test
def get_threads(threads, base_dir):
all_threads = []
file_names = glob.glob(base_dir + '/' + '*.txt')
for name in file_names:
m = re.search('threads_[0-9]+', name)
if m:
thread_str = m.group(0)
m = re.search('[0-9]+', thread_str)
bisect.insort(all_threads, int(m.group(0)))
reduced_threads = list(set(all_threads))
for val in reduced_threads:
bisect.insort(threads, val)
# Getting the total number of runs in input files and threads run
def get_total_num_runs(base_dir):
all_threads = []
# Just grab the first file as all files should contain
# the same number of total runs
file_names = glob.glob(base_dir + '/' + '*.txt')
with open(file_names[0]) as fp:
line = fp.readline()
run_and_runs = [int(s) for s in line.split() if s.isdigit()]
# Total Number of Runs
total_runs = run_and_runs[1]
return total_runs
def parse_input_file(input_file, queries, chart_titles, y_labels):
with open(input_file, 'r') as fp:
line = fp.readline()
# Getting rid of comments at top of file
while '#' in line:
line = fp.readline()
line_split = line.split()
for i in line_split:
queries.append(i)
for x in xrange(len(queries)):
line = fp.readline()
# Getting rid of possible comments
while '#' in line:
line = fp.readline()
chart_titles.append(line)
line = fp.readline()
# Getting rid of possible comments
while '#' in line:
line = fp.readline()
y_labels.append(line)
# Making sure Queries are valid
for val in queries:
if val not in acceptable_queries:
print val + ' is not a valid query'
print 'Exiting program...'
exit(1)
def parse_test_case_header(fp, queries):
#looking for DD command output, so will skip everything else
line = fp.readline()
while line.find(queries[0]) == -1:
line = fp.readline()
fp.readline()
def write_output_file(output_file_name, queries, threads, data_points, \
std_devs, std_errs, chart_titles, y_labels):
fp = open(output_file_name, 'w')
# Writing out how many data sets there are along with each
# datasets corresponding values
fp.write(str(len(data_points)) + ' ' + '3\n')
# Writing out the x values
fp.write('# x-title,Number of I/O Threads\n')
for x in threads:
fp.write(str(x) + ' ')
fp.write('\n')
for x in xrange(len(data_points)):
y_label_split = y_labels[x].split(',')
# Writing out chart titles
fp.write('# ')
fp.write(chart_titles[x].rstrip() + ',' + queries[x] + '\n')
# Writing out the y labels
fp.write('# y-title,')
fp.write(y_labels[x].rstrip() + ',' + queries[x] + '\n')
# Writing out y data values
for y in xrange(len(data_points[x])):
fp.write(str('%.3f' % data_points[x][y]) + ' ')
fp.write('\n')
# Writing out standard deviations
fp.write('# STDDEV,' + queries[x] + '\n')
for y in xrange(len(std_devs[x])):
fp.write(str('%.3f' % std_devs[x][y]) + ' ')
fp.write('\n')
# Writing out standard errors
fp.write('# STDERR,' + queries[x] + '\n')
for y in xrange(len(std_errs[x])):
fp.write(str('%.3f' % std_errs[x][y]) + ' ')
fp.write('\n')
fp.close()
def get_offsets(base_dir, offsets, queries):
file_names = glob.glob(base_dir + '/' + '*.txt')
# Just grabbing the first file as it doesn't really matter
# what the file is, because of the offsets will be the same
with open(file_names[0]) as fp:
line = fp.readline()
while line.find(queries[0]) == -1:
line = fp.readline()
line_list = line.split()
for x in queries:
offsets.append(line_list.index(x))
def getting_data(base_dir, queries, chart_titles, y_labels, threads, grab):
offsets = []
data_points = []
std_devs = []
std_errs = []
run_datapoints = []
num_passes = 0
# If we are using XDD pass data, we need to find total number
# of passes
if is_passes_xdd_data(base_dir):
num_passes = get_total_num_passes(base_dir)
else:
runs = get_total_num_runs(base_dir)
# Getting offsets of points of interest
get_offsets(base_dir, offsets, queries)
# Initializing data points, std_devs, std_errs
data_points = [[] for l in range(len(queries))]
std_devs = [[] for l in range(len(queries))]
std_errs = [[] for l in range(len(queries))]
for t in threads:
# For each thread we will get the median, standard
# deviations, and standard error for all queries
input_file = base_dir + '/'
path_split = base_dir.split('/')
input_file += path_split[-1]
if is_passes_xdd_data(base_dir):
input_file += '_passes'
else:
input_file += '_run_'
#initializing current runs data sets
run_datapoints = [[] for l in range(len(queries))]
if is_passes_xdd_data(base_dir):
current_input = input_file
current_input += '_threads_' + str(t) + '.txt'
input_fp = open(current_input, 'r')
# Parsing out XDD normal header info
parse_test_case_header(input_fp, queries)
# Getting all datapoints
for p in xrange(num_passes):
line = input_fp.readline()
line_list = line.split()
# Getting data points of interest for this run
for x in xrange(len(queries)):
new_line_list = []
# Hacky work around to fix extra tab inserted
# in XDD output for certain PASS'es. Our only
# hope of fixing this error is if the tab appears
# In a floating point number...
if len(line_list) > 13:
new_line_list = []
while len(line_list) != 0:
curr_val = line_list.pop(0)
float_missing_dec = re.search('^\d+\.\D?$', curr_val)
if float_missing_dec:
# Found decimal point value missing
# values after decimal point, so just
# need to combine the values
new_line_list.append(curr_val + line_list.pop(0))
else:
new_line_list.append(curr_val)
line_list = new_line_list
run_datapoints[x].append(float(line_list[offsets[x]]))
input_fp.close()
else:
for r in range(1,runs+1):
# Getting current input based on run #
current_input = input_file
current_input += str(r) + '_threads_' + str(t) + '.txt'
input_fp = open(current_input, 'r')
# Parsing out XDD normal header info
parse_test_case_header(input_fp, queries)
line = input_fp.readline()
line_list = line.split()
# Getting data points of interest for this run
for x in xrange(len(queries)):
run_datapoints[x].append(float(line_list[offsets[x]]))
input_fp.close()
for x in xrange(len(queries)):
if grab == 'mean':
# Getting mean of run values
data_points[x].append(numpy.mean(run_datapoints[x]))
if grab == 'median':
# Getting median of run values
data_points[x].append(numpy.median(run_datapoints[x]))
if grab == 'min':
# Getting min of run values
data_points[x].append(numpy.min(run_datapoints[x]))
if grab == 'max':
# Getting max of run values
data_points[x].append(numpy.max(run_datapoints[x]))
# Getting Standard Deviation for Data Points
std_devs[x].append(numpy.std(run_datapoints[x]))
# Getting Standard Error for Data Points
if is_passes_xdd_data(base_dir):
std_errs[x].append(numpy.std(run_datapoints[x])/(num_passes**0.5))
else:
std_errs[x].append(numpy.std(run_datapoints[x])/(runs**0.5))
# Writing out all output to output file
base_dir_list = base_dir.split('/')
output_file_name = base_dir_list[len(base_dir_list) - 1]
output_file_name += '_data.txt'
write_output_file(output_file_name, queries, threads, data_points, \
std_devs, std_errs, chart_titles, y_labels)
def create_bulk_plot_files(plot_dir, xdd_dir, grab):
total_runs = 0
threads = []
chart_titles = []
y_labels = []
queries = []
xdd_data_dirs = []
total_runs = -1
# Grabbing all plot files from plot directory
plot_file_names = glob.glob(plot_dir + '/' + '*.plot')
# Generating all xdd data directory paths
for plot_file_name in plot_file_names:
base_dir = plot_file_name.split('/')
base_dir_str = base_dir[len(base_dir) - 1]
xdd_data_dirs.append(xdd_dir + '/' + base_dir_str.strip('.plot'))
# Getting the all the threads used, should be same for all.
# Because of this, just using first directory
get_threads(threads, xdd_data_dirs[0])
# Now just generating all plot files
for plot_file, xdd_curr_dir in zip(plot_file_names, xdd_data_dirs):
parse_input_file(plot_file, queries, chart_titles, y_labels)
getting_data(xdd_curr_dir,
queries,
chart_titles,
y_labels,
threads,
grab)
queries = []
chart_titles = []
y_labels = []
def main():
threads = []
queries = []
chart_titles = []
y_labels = []
total_runs = -1
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory', type=str,
help='The base directory to pull input files from, either this and -i are required unless -b is used')
parser.add_argument('-i', '--input_file', type=str,
help='Plot configuration input file, erither this and -d are required unless -b is used')
parser.add_argument('-b', '--bulk', type=str,
help='Do bulk plot operation, either this or -d and -i are required (-b plot_file_dir,xdd_data_dir)')
parser.add_argument('-g', '--grab', type=str,
help='Value to grab can be (mean, median, max, min), median by default')
# Getting Command Line Arguements
args = parser.parse_args()
# Checking to see if user passed in grab value
grab = args.grab
if grab != 'mean' and grab != 'median' and grab != 'max' and grab != 'min':
grab = 'median'
print 'Grabbing Median Value'
else:
print 'Grabbing ' + grab + ' value'
# Checking to make sure requirements are meet
if args.directory is None and args.input_file is None and args.bulk is None:
# Error Either -d and -i must be set or -b must be set
parser.print_help()
sys.exit(0)
elif args.bulk is None:
if args.directory is None or args.input_file is None:
# If not using bulk then then -d and -i must bet set
parser.print_help()
sys.exit(0)
if args.bulk is not None:
bulk_list = args.bulk.split(',')
plot_dir = bulk_list[0].rstrip('/')
xdd_dir = bulk_list[1].rstrip('/')
create_bulk_plot_files(plot_dir, xdd_dir, grab)
else:
dir_path = args.directory.rstrip('/')
get_threads(threads, dir_path)
parse_input_file(args.input_file, queries, chart_titles, y_labels)
getting_data(dir_path,
queries,
chart_titles,
y_labels,
threads,
grab)
if __name__ == "__main__":
main()