Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating the scripts for plotting #130

Open
wants to merge 10 commits into
base: dev
Choose a base branch
from
15 changes: 8 additions & 7 deletions tools/report_df/BenchmarkData.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import pandas as pd
from pandas import DataFrame
import numpy as np
import sys
import json
import numpy as np
import pandas as pd
from pandas import DataFrame
from collections import Mapping, Iterable
import Constants

INDEX_NAMES = ['Fuzzer', 'Target','Program','Campaign','Metric','BugID']

#TODO add retrival of experiment infomation (Campaign duration)
# TODO add retrieval of experiment information (Campaign duration)
class BenchmarkData:

def __init__(self,filename, **kwargs):
Expand Down Expand Up @@ -62,7 +63,7 @@ def update_dict(d, u):
df.rename(columns={0: 'Time'}, inplace=True)
# change index names
df.rename_axis(index=INDEX_NAMES, inplace=True)
#Sorting for later performance gain
# sorting for later performance gain
self._df = df.sort_index()

# save configuration parameters
Expand All @@ -75,11 +76,11 @@ def frame(self):

@property
def duration(self):
return self._config.get('duration', 24 * 60 * 60)
return self._config.get('duration', Constants.DEFAULT_DURATION)

@property
def trials(self):
return self._config.get('trials', 10)
return self._config.get('trials', Constants.DEFAULT_TRIALS)

@property
def version(self):
Expand Down
4 changes: 4 additions & 0 deletions tools/report_df/Constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Time used to run the fuzzing campaigns (in seconds)
DEFAULT_DURATION = 7 * 24 * 60 * 60
# Number of fuzzing campaigns run per program
DEFAULT_TRIALS = 10
33 changes: 17 additions & 16 deletions tools/report_df/DataProcessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

def average_time_to_metric_data(bd,metric) :
"""
Reshapes the intial dataframe in a way to obtain the mean and
Reshapes the initial dataframe in a way to obtain the mean and
variance of the number of bugs that have satisfied the metric

:param bd: { A BenchmarkData object loaded from experiment summary file }
Expand Down Expand Up @@ -176,7 +176,7 @@ def number_of_unique_bugs_found_data(bd):
df = bd.frame
#Extracting all found bugs
df_triggered = df.iloc[df.index.get_level_values('Metric') == Metric.TRIGGERED.value]
#Reseting the index is necessary to get the number of unique bugs triggered by each fuzzer
#Resetting the index is necessary to get the number of unique bugs triggered by each fuzzer
num_trigg = df_triggered.reset_index().groupby(['Fuzzer'])['BugID'].nunique().to_frame()
num_trigg.columns = ['Bugs']
return num_trigg
Expand Down Expand Up @@ -207,13 +207,13 @@ def bug_list(bd,fuzzer,target,metric):
df_bugs = df_bugs.loc[fuzzer,target].groupby('BugID')['Time'].apply(list)
#Preparing the new index to be the bugs
index = df_bugs.index.tolist()
#Reseting the index and converting the data in the column Time into a new Dataframe
#Resetting the index and converting the data in the column Time into a new Dataframe
d = pd.DataFrame(df_bugs.reset_index()['Time'].to_list(),index = index)
return d

def line_plot_data(bd,target,metric) :
"""
Returns a Dataframe that has a row for every fuzzer and 3 columns (x,y,ci) representing repectively
Returns a Dataframe that has a row for every fuzzer and 3 columns (x,y,ci) representing respectively
the datapoints to place on x and y axis alongside with the error margin

:param bd: { A BenchmarkData object loaded from experiment summary file }
Expand Down Expand Up @@ -261,7 +261,7 @@ def step_val(series,x):
x_plot.columns = ['x']
y_plot = x_plot
index = x_plot.index
#Reseting index to be able to pass index values as argument
#Resetting index to be able to pass index values as argument
y_plot = y_plot.reset_index()
y_plot = y_plot.apply(lambda f : get_step_value(f['Fuzzer'],f['x'],df_lib),axis=1).to_frame()
y_plot.index = index
Expand Down Expand Up @@ -302,15 +302,15 @@ def fillmissing(group, supergroup_name):
metrics = set(['reached', 'triggered'])
group_metrics = set(group['Metric'].unique())
for metric in metrics.difference(group_metrics):
new_row = pd.Series({
new_row = pd.DataFrame({
'Fuzzer': fuzzer,
'Target': target,
'Program': program,
'Campaign': 0,
'Metric': metric,
'BugID': bug
})
group = group.append(new_row, ignore_index=True)
}, index=[metric])
group = pd.concat([group, new_row], ignore_index=True)
return group

name = group.name
Expand All @@ -319,19 +319,20 @@ def fillmissing(group, supergroup_name):
for fuzzer in fuzzers:
if fuzzer in fuzzers_in_group:
continue
new_rows = [
pd.Series({
# reached bugs
new_row = pd.DataFrame({
'Fuzzer': fuzzer,
'Metric': 'reached'
}),
pd.Series({
}, index=['Metric'])
group = pd.concat([group, new_row], ignore_index=True)
# triggered bugs
new_row = pd.DataFrame({
'Fuzzer': fuzzer,
'Metric': 'triggered'
}),
]
group = group.append(new_rows, ignore_index=True)
}, index=['Metric'])
group = pd.concat([group, new_row], ignore_index=True)

group = group.groupby('Fuzzer').apply(fillmissing, name).reset_index(drop=True)
group = group.groupby('Fuzzer', group_keys=False).apply(fillmissing, name).reset_index(drop=True)

subgroups = group.groupby(['Fuzzer','Metric']).apply(fit_kmf_one, name, N)
return subgroups
Expand Down
14 changes: 6 additions & 8 deletions tools/report_df/MatplotlibPlotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def expected_time_to_trigger(bd, outdir):
def unique_bugs_per_target(bd, outdir, metric, libraries=None, symmetric=False, ncols=3):
"""
Creates a 2D array plot representing the statistical significance
between every pair of fuzzers on a target libary
between every pair of fuzzers on a target library

:param bd: { A BenchmarkData object loaded from experiment summary file }
:type bd: { BenchmarkData }
Expand Down Expand Up @@ -109,7 +109,6 @@ def unique_bugs_per_target(bd, outdir, metric, libraries=None, symmetric=False,

for ax in axs.flat[len(libraries):]:
fig.delaxes(ax)
fig.tight_layout(pad=2.0)

sigmatrix, path = output(outdir, 'plot', 'summary_signplot.svg')
fig.savefig(path, bbox_inches='tight')
Expand All @@ -135,7 +134,7 @@ def unique_bugs_per_target(bd, outdir, metric, libraries=None, symmetric=False,
def bug_metric_boxplot(bd, outdir):
"""
Create box plot graph showing the time distribution
of bugs who satisfid the metric
of bugs who satisfied the metric

:param bd: { A BenchmarkData object loaded from experiment summary file }
:type bd: { BenchmarkData }
Expand Down Expand Up @@ -179,7 +178,7 @@ def plot_boxes(df):

return outfiles

def line_plot_unqiue_bugs(bd, outdir, fuzzers, target, metric) :
def line_plot_unique_bugs(bd, outdir, fuzzers, target, metric) :
"""
Creates a line plot for each fuzzer,target pair
If fuzzers is empty then a plot for every known fuzzer will be computed
Expand Down Expand Up @@ -226,7 +225,6 @@ def line_plot_unqiue_bugs(bd, outdir, fuzzers, target, metric) :
axes.set_title(fuzzer)
axes.set_ylim((0, y_max + 5))
axes.set_xlim((x_min, x_max + 5))
plt.tight_layout(pad=2.0)

name, path = output(outdir, 'plot', 'lineplot.svg')
fig.savefig(path, bbox_inches='tight')
Expand Down Expand Up @@ -390,17 +388,17 @@ def series_to_mask(series, df):
hiliter.template = style_tpl
heatmap.template = style_tpl

table_html = re.sub(r'colspan=(\d+)', r'colspan="\1"', styler.render())
table_html = re.sub(r'colspan=(\d+)', r'colspan="\1"', styler.to_html())
table_name, path = output(outdir, 'data', 'mean_survival.html')
with open(path, 'w') as f:
f.write(table_html)

hiliter_css = '\n'.join(hiliter.render().split('\n')[1:-1]) + '}'
hiliter_css = '\n'.join(hiliter.to_html().split('\n')[1:-1]) + '}'
hiliter_name, path = output(outdir, 'css', 'survival_hiliter.css')
with open(path, 'w') as f:
f.write(hiliter_css)

heatmap_css = '\n'.join(heatmap.render().split('\n')[1:-1]) + '}'
heatmap_css = '\n'.join(heatmap.to_html().split('\n')[1:-1]) + '}'
heatmap_name, path = output(outdir, 'css', 'survival_heatmap.css')
with open(path, 'w') as f:
f.write(heatmap_css)
Expand Down
2 changes: 1 addition & 1 deletion tools/report_df/ReportGeneration.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def ensure_dir(path):
ppool = locals()

env = jinja2.Environment(loader=jinja2.ChoiceLoader(
[jinja2.FileSystemLoader('templates'),
[jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')),
jinja2.FileSystemLoader(outdir)])
)
base_template = env.get_template('base.md')
Expand Down
33 changes: 19 additions & 14 deletions tools/report_df/main.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,28 +1,33 @@
#!/usr/bin/env python3

import sys
import json
import logging
import argparse
import jinja2
from Metric import Metric
import Constants
import DataProcessing
import MatplotlibPlotter
from Metric import Metric
from BenchmarkData import BenchmarkData
import DataProcessing
from ReportGeneration import generate_report
import argparse
import logging


def parse_args():
parser = argparse.ArgumentParser(description=(
"Creates detailed plots from experiment summary and generates a report "
"for the Magma website."
))
parser = argparse.ArgumentParser(description="Creates detailed plots from"
" experiment summary and generates a report for the Magma website.")
parser.add_argument("json",
help="The experiment summary JSON file generated by the benchd tool.")
parser.add_argument("outdir",
help="The path to the directory where webpage output and hierarchy "
"will be stored.")
help="The path to the directory where webpage output and hierarchy"
" will be stored.")
parser.add_argument('-d', '--duration', default=Constants.DEFAULT_DURATION,
type=int, help="Time used to run the fuzzing campaigns (in seconds).")
parser.add_argument('-t', '--trials', default=Constants.DEFAULT_TRIALS,
type=int, help="Number of fuzzing campaigns run per program.")
parser.add_argument('-v', '--verbose', action='count', default=0,
help=("Controls the verbosity of messages. "
"-v prints info. -vv prints debug. Default: warnings and higher.")
)
help="Controls the verbosity of messages. -v prints info."
" -vv prints debug. Default: warnings and higher.")
return parser.parse_args()

def configure_verbosity(level):
Expand All @@ -38,7 +43,7 @@ def configure_verbosity(level):
def main():
args = parse_args()
configure_verbosity(args.verbose)
bd = BenchmarkData(args.json, config={'duration': 7 * 24 * 60 * 60, 'trials': 10})
bd = BenchmarkData(args.json, config={'duration': args.duration, 'trials': args.trials})
generate_report(bd, args.outdir)

if __name__ == '__main__':
Expand Down
11 changes: 6 additions & 5 deletions tools/report_df/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pandas>=1.1.0
lifelines>=0.25.2
scipy>=1.4.1
seaborn>=0.11.0
scikit-posthocs>=0.6.4
pandas>=1.5.2
lifelines>=0.27.4
scipy>=1.9.3
seaborn>=0.12.1
scikit-posthocs>=0.7.0
jinja2>=3.1.2