HexHive · mgobbi289 · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022
diff --git a/tools/report_df/BenchmarkData.py b/tools/report_df/BenchmarkData.py
@@ -1,13 +1,14 @@
-import pandas as pd
-from pandas import DataFrame
-import numpy as np
 import sys
 import json
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
 from collections import Mapping, Iterable
+import Constants
 
 INDEX_NAMES = ['Fuzzer', 'Target','Program','Campaign','Metric','BugID']
 
-#TODO add retrival of experiment infomation (Campaign duration)
+# TODO add retrieval of experiment information (Campaign duration)
 class BenchmarkData:
 
     def __init__(self,filename, **kwargs):
@@ -62,7 +63,7 @@ def update_dict(d, u):
         df.rename(columns={0: 'Time'}, inplace=True)
         # change index names
         df.rename_axis(index=INDEX_NAMES, inplace=True)
-        #Sorting for later performance gain
+        # sorting for later performance gain
         self._df = df.sort_index()
 
         # save configuration parameters
@@ -75,11 +76,11 @@ def frame(self):
 
     @property
     def duration(self):
-        return self._config.get('duration', 24 * 60 * 60)
+        return self._config.get('duration', Constants.DEFAULT_DURATION)
 
     @property
     def trials(self):
-        return self._config.get('trials', 10)
+        return self._config.get('trials', Constants.DEFAULT_TRIALS)
 
     @property
     def version(self):

diff --git a/tools/report_df/Constants.py b/tools/report_df/Constants.py
@@ -0,0 +1,4 @@
+# Time used to run the fuzzing campaigns (in seconds)
+DEFAULT_DURATION = 7 * 24 * 60 * 60
+# Number of fuzzing campaigns run per program
+DEFAULT_TRIALS = 10
diff --git a/tools/report_df/DataProcessing.py b/tools/report_df/DataProcessing.py
@@ -11,7 +11,7 @@
 
 def average_time_to_metric_data(bd,metric) :
     """
-    Reshapes the intial dataframe in a way to obtain the mean and
+    Reshapes the initial dataframe in a way to obtain the mean and
     variance of the number of bugs that have satisfied the metric
 
     :param bd: { A BenchmarkData object loaded from experiment summary file }
@@ -176,7 +176,7 @@ def number_of_unique_bugs_found_data(bd):
     df = bd.frame
     #Extracting all found bugs
     df_triggered = df.iloc[df.index.get_level_values('Metric') == Metric.TRIGGERED.value]
-    #Reseting the index is necessary to get the number of unique bugs triggered by each fuzzer
+    #Resetting the index is necessary to get the number of unique bugs triggered by each fuzzer
     num_trigg = df_triggered.reset_index().groupby(['Fuzzer'])['BugID'].nunique().to_frame()
     num_trigg.columns = ['Bugs']
     return num_trigg
@@ -207,13 +207,13 @@ def bug_list(bd,fuzzer,target,metric):
     df_bugs = df_bugs.loc[fuzzer,target].groupby('BugID')['Time'].apply(list)
     #Preparing the new index to be the bugs
     index = df_bugs.index.tolist()
-    #Reseting the index and converting the data in the column Time into a new Dataframe
+    #Resetting the index and converting the data in the column Time into a new Dataframe
     d = pd.DataFrame(df_bugs.reset_index()['Time'].to_list(),index = index)
     return d
 
 def line_plot_data(bd,target,metric) :
     """
-    Returns a Dataframe that has a row for every fuzzer and 3 columns (x,y,ci) representing repectively
+    Returns a Dataframe that has a row for every fuzzer and 3 columns (x,y,ci) representing respectively
     the datapoints to place on x and y axis alongside with the error margin
 
     :param bd: { A BenchmarkData object loaded from experiment summary file }
@@ -261,7 +261,7 @@ def step_val(series,x):
     x_plot.columns = ['x']
     y_plot = x_plot
     index = x_plot.index
-    #Reseting index to be able to pass index values as argument
+    #Resetting index to be able to pass index values as argument
     y_plot = y_plot.reset_index()
     y_plot = y_plot.apply(lambda f : get_step_value(f['Fuzzer'],f['x'],df_lib),axis=1).to_frame()
     y_plot.index = index
@@ -302,15 +302,15 @@ def fillmissing(group, supergroup_name):
             metrics = set(['reached', 'triggered'])
             group_metrics = set(group['Metric'].unique())
             for metric in metrics.difference(group_metrics):
-                new_row = pd.Series({
+                new_row = pd.DataFrame({
                     'Fuzzer': fuzzer,
                     'Target': target,
                     'Program': program,
                     'Campaign': 0,
                     'Metric': metric,
                     'BugID': bug
-                })
-                group = group.append(new_row, ignore_index=True)
+                }, index=[metric])
+                group = pd.concat([group, new_row], ignore_index=True)
             return group
 
         name = group.name
@@ -319,19 +319,20 @@ def fillmissing(group, supergroup_name):
         for fuzzer in fuzzers:
             if fuzzer in fuzzers_in_group:
                 continue
-            new_rows = [
-                pd.Series({
+            # reached bugs
+            new_row = pd.DataFrame({
                     'Fuzzer': fuzzer,
                     'Metric': 'reached'
-                }),
-                pd.Series({
+                }, index=['Metric'])
+            group = pd.concat([group, new_row], ignore_index=True)
+            # triggered bugs
+            new_row = pd.DataFrame({
                     'Fuzzer': fuzzer,
                     'Metric': 'triggered'
-                }),
-            ]
-            group = group.append(new_rows, ignore_index=True)
+                }, index=['Metric'])
+            group = pd.concat([group, new_row], ignore_index=True)
 
-        group = group.groupby('Fuzzer').apply(fillmissing, name).reset_index(drop=True)
+        group = group.groupby('Fuzzer', group_keys=False).apply(fillmissing, name).reset_index(drop=True)
 
         subgroups = group.groupby(['Fuzzer','Metric']).apply(fit_kmf_one, name, N)
         return subgroups

diff --git a/tools/report_df/MatplotlibPlotter.py b/tools/report_df/MatplotlibPlotter.py
@@ -70,7 +70,7 @@ def expected_time_to_trigger(bd, outdir):
 def unique_bugs_per_target(bd, outdir, metric, libraries=None, symmetric=False, ncols=3):
     """
     Creates a 2D array plot representing the statistical significance
-    between every pair of fuzzers on a target libary
+    between every pair of fuzzers on a target library
 
     :param bd: { A BenchmarkData object loaded from experiment summary file }
     :type  bd: { BenchmarkData }
@@ -109,7 +109,6 @@ def unique_bugs_per_target(bd, outdir, metric, libraries=None, symmetric=False,
 
     for ax in axs.flat[len(libraries):]:
         fig.delaxes(ax)
-    fig.tight_layout(pad=2.0)
 
     sigmatrix, path = output(outdir, 'plot', 'summary_signplot.svg')
     fig.savefig(path, bbox_inches='tight')
@@ -135,7 +134,7 @@ def unique_bugs_per_target(bd, outdir, metric, libraries=None, symmetric=False,
 def bug_metric_boxplot(bd, outdir):
     """
     Create box plot graph showing the time distribution
-    of bugs who satisfid the metric
+    of bugs who satisfied the metric
 
     :param bd: { A BenchmarkData object loaded from experiment summary file }
     :type  bd: { BenchmarkData }
@@ -179,7 +178,7 @@ def plot_boxes(df):
 
     return outfiles
 
-def line_plot_unqiue_bugs(bd, outdir, fuzzers, target, metric) :
+def line_plot_unique_bugs(bd, outdir, fuzzers, target, metric) :
     """
     Creates a line plot for each fuzzer,target pair
     If fuzzers is empty then a plot for every known fuzzer will be computed
@@ -226,7 +225,6 @@ def line_plot_unqiue_bugs(bd, outdir, fuzzers, target, metric) :
         axes.set_title(fuzzer)
         axes.set_ylim((0, y_max + 5))
         axes.set_xlim((x_min, x_max + 5))
-    plt.tight_layout(pad=2.0)
 
     name, path = output(outdir, 'plot', 'lineplot.svg')
     fig.savefig(path, bbox_inches='tight')
@@ -390,17 +388,17 @@ def series_to_mask(series, df):
     hiliter.template = style_tpl
     heatmap.template = style_tpl
 
-    table_html = re.sub(r'colspan=(\d+)', r'colspan="\1"', styler.render())
+    table_html = re.sub(r'colspan=(\d+)', r'colspan="\1"', styler.to_html())
     table_name, path = output(outdir, 'data', 'mean_survival.html')
     with open(path, 'w') as f:
         f.write(table_html)
 
-    hiliter_css = '\n'.join(hiliter.render().split('\n')[1:-1]) + '}'
+    hiliter_css = '\n'.join(hiliter.to_html().split('\n')[1:-1]) + '}'
     hiliter_name, path = output(outdir, 'css', 'survival_hiliter.css')
     with open(path, 'w') as f:
         f.write(hiliter_css)
 
-    heatmap_css = '\n'.join(heatmap.render().split('\n')[1:-1]) + '}'
+    heatmap_css = '\n'.join(heatmap.to_html().split('\n')[1:-1]) + '}'
     heatmap_name, path = output(outdir, 'css', 'survival_heatmap.css')
     with open(path, 'w') as f:
         f.write(heatmap_css)

diff --git a/tools/report_df/ReportGeneration.py b/tools/report_df/ReportGeneration.py
@@ -63,7 +63,7 @@ def ensure_dir(path):
     ppool = locals()
 
     env = jinja2.Environment(loader=jinja2.ChoiceLoader(
-                                        [jinja2.FileSystemLoader('templates'),
+                                        [jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')),
                                          jinja2.FileSystemLoader(outdir)])
                             )
     base_template = env.get_template('base.md')

diff --git a/tools/report_df/main.py b/tools/report_df/main.py
@@ -1,28 +1,33 @@
+#!/usr/bin/env python3
+
 import sys
 import json
+import logging
+import argparse
 import jinja2
-from Metric import Metric
+import Constants
+import DataProcessing
 import MatplotlibPlotter
+from Metric import Metric
 from BenchmarkData import BenchmarkData
-import DataProcessing
 from ReportGeneration import generate_report
-import argparse
-import logging
+
 
 def parse_args():
-    parser = argparse.ArgumentParser(description=(
-        "Creates detailed plots from experiment summary and generates a report "
-        "for the Magma website."
-    ))
+    parser = argparse.ArgumentParser(description="Creates detailed plots from"
+        " experiment summary and generates a report for the Magma website.")
     parser.add_argument("json",
         help="The experiment summary JSON file generated by the benchd tool.")
     parser.add_argument("outdir",
-        help="The path to the directory where webpage output and hierarchy "
-             "will be stored.")
+        help="The path to the directory where webpage output and hierarchy"
+             " will be stored.")
+    parser.add_argument('-d', '--duration', default=Constants.DEFAULT_DURATION,
+        type=int, help="Time used to run the fuzzing campaigns (in seconds).")
+    parser.add_argument('-t', '--trials', default=Constants.DEFAULT_TRIALS,
+        type=int, help="Number of fuzzing campaigns run per program.")
     parser.add_argument('-v', '--verbose', action='count', default=0,
-        help=("Controls the verbosity of messages. "
-            "-v prints info. -vv prints debug. Default: warnings and higher.")
-        )
+        help="Controls the verbosity of messages. -v prints info."
+        " -vv prints debug. Default: warnings and higher.")
     return parser.parse_args()
 
 def configure_verbosity(level):
@@ -38,7 +43,7 @@ def configure_verbosity(level):
 def main():
     args = parse_args()
     configure_verbosity(args.verbose)
-    bd = BenchmarkData(args.json, config={'duration': 7 * 24 * 60 * 60, 'trials': 10})
+    bd = BenchmarkData(args.json, config={'duration': args.duration, 'trials': args.trials})
     generate_report(bd, args.outdir)
 
 if __name__ == '__main__':

diff --git a/tools/report_df/requirements.txt b/tools/report_df/requirements.txt
@@ -1,5 +1,6 @@
-pandas>=1.1.0
-lifelines>=0.25.2
-scipy>=1.4.1
-seaborn>=0.11.0
-scikit-posthocs>=0.6.4
+pandas>=1.5.2
+lifelines>=0.27.4
+scipy>=1.9.3
+seaborn>=0.12.1
+scikit-posthocs>=0.7.0
+jinja2>=3.1.2