Merge pull request #63 from mkyl/alley-and-cleanup

Final Cleanup
uwdb · May 9, 2024 · 684fedb · 684fedb
2 parents b8f1c86 + df019d0
commit 684fedb
Show file tree

Hide file tree

Showing 21 changed files with 128 additions and 90,730 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,9 +7,9 @@
 /docs/Manifest.toml
 /test/coverage/Manifest.toml
 /.ipynb_checkpoints/
-/queryset
-/dataset
-/TrueCardinalities
+/queryset/
+/dataset/
+/TrueCardinalities/
 /Experiments/Results/*
 /Experiments/SerializedSummaries/*
 /Experiments/ConvertedGraphs/*

diff --git a/Experiments/Scripts/coloring_strategies.jl b/Experiments/Scripts/coloring_strategies.jl
@@ -87,7 +87,7 @@ graph_grouped_box_plot(experiment_params;
                         dimensions = (600, 400),
                         legend_pos=:topleft,
                         y_label="Relative Error log\$_{10}\$",
-                        filename="fig_7") # colorings error
+                        filename="fig_9") # colorings error
 
 
 graph_grouped_bar_plot(experiment_params;

diff --git a/Experiments/Scripts/combine_parquet.py b/Experiments/Scripts/combine_parquet.py
@@ -9,7 +9,7 @@
 path2 = "Experiments/alley_results.parquet" # this should represent the path to just the alleyTPI results
 
 
-df1 = pq.read_table(source=path1, filters = [('Estimator', '!=', 'alleyTPI')]).to_pandas() # filter out the old alley results
+df1 = pq.read_table(source=path1, filters = [('Estimator', '!=', 'alleyTPI'), ('Estimator', '!=', 'alley')]).to_pandas() # filter out the old alley results
 df2 = pq.read_table(source=path2).to_pandas() # collect all the new alley results
 
 df_result = pd.concat([df1, df2]) # combine :)

diff --git a/Experiments/Scripts/comparison_exps.jl b/Experiments/Scripts/comparison_exps.jl
@@ -7,6 +7,7 @@ include("../Experiments.jl")
 datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
 bounds_datasets = [human, aids, lubm80]
 
+bounds_mix_scheme = [(Degree, 8), (QuasiStable, 8), (NeighborNodeLabels, 8), (NodeLabels, 8)]
 mix_scheme = [(Degree, 8), (QuasiStable, 8), (NeighborNodeLabels, 8), (NodeLabels, 8)]
 
 experiment_params = Vector{ExperimentParams}()
@@ -16,7 +17,7 @@ smaller_experiment_params = Vector{ExperimentParams}()
 for dataset in bounds_datasets
     push!(max_bounds_experiment_params, ExperimentParams(deg_stats_type=MaxDegStats,
                                                 dataset=dataset,
-                                                partitioning_scheme=mix_scheme,
+                                                partitioning_scheme=bounds_mix_scheme,
                                                 max_cycle_size = -1,
                                                 description = "COLOR (MaxMix32)"))
 
@@ -33,7 +34,7 @@ for dataset in datasets
     push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
                                                 dataset=dataset,
                                                 partitioning_scheme=mix_scheme,
-                                                description = "COLOR (AvgMix32)"))
+                                                description = "COLOR \n(AvgMix32)"))
 
     push!(smaller_experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
                                                 dataset=dataset,
@@ -46,47 +47,61 @@ for dataset in datasets
                                                 max_cycle_size = -1,
                                                 description = "TradEst"))
 
-    push!(min_bounds_experiment_params, ExperimentParams(deg_stats_type=MinDegStats,
-                                                dataset=dataset,
-                                                partitioning_scheme=mix_scheme,
-                                                max_cycle_size = -1,
-                                                description = "COLOR (MinMix32)"))
+
 end
 
 println("Building...")
 
-#build_experiments(experiment_params)
+build_experiments(experiment_params)
+build_experiments(max_bounds_experiment_params)
 
 println("Estimating...")
 
-#run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
+run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
+run_estimation_experiments(max_bounds_experiment_params; timeout=TIMEOUT_SEC)
 
 comparison_methods =  ["alley", "alleyTPI", "wj", "impr", "jsub", "cs", "cset", "sumrdf"]
 x_order = [string(data) for data in datasets]
 bounds_x_order = [string(data) for data in bounds_datasets]
 legend_order = [params.description for params in experiment_params][1:Int(length(experiment_params)/ length(datasets))]
 max_bounds_legend_order = [params.description for params in max_bounds_experiment_params][1:Int(length(max_bounds_experiment_params)/ length(bounds_datasets))]
-min_bounds_legend_order = [params.description for params in min_bounds_experiment_params][1:Int(length(min_bounds_experiment_params)/length(datasets))]
 legend_order = vcat(legend_order, comparison_methods)
-min_bounds_legend_order = vcat(min_bounds_legend_order, "Minimum Estimate")
 
-# max_bounds_legend_order = vcat(max_bounds_legend_order, comparison_methods)
+colors = [:red :yellow :maroon3 :palevioletred1 :dodgerblue :coral :palegreen :mediumpurple2 :darkgreen :cadetblue1]
 
-colors = [:red :yellow :maroon3 :fuchsia :darkblue :navajowhite :lime :cornflowerblue :darkgreen :aqua]
-println("Graphing figures 2 and 3...")
+println("Graphing figures 3 and 4...")
 
-graph_grouped_box_plot(max_bounds_experiment_params;
+graph_grouped_boxplot_with_comparison_methods(experiment_params;
+                                                ylims=[10^-21, 10^21],
+                                                y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
+                                                y_type = estimate_error,
+                                                x_type = dataset,
+                                                x_order = x_order,
+                                                legend_order = legend_order,
+                                                grouping=description,
+                                                dimensions = (1550, 650),
+                                                legend_pos=:outerright,
+                                                legend_columns = 1,
+                                                y_label="Relative Error log\$_{10}\$",
+                                                group_colors = colors,
+                                                filename="fig_3") # overall error
+
+graph_grouped_boxplot_with_comparison_methods(experiment_params;
                                                 ylims=[10^-5, 10^4],
                                                 y_ticks=[10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 10^0, 10^1, 10^2, 10^3, 10^4],
                                                 y_type = runtime,
                                                 x_type = dataset,
-                                                x_order = bounds_x_order,
-                                                legend_order = max_bounds_legend_order,
+                                                x_order = x_order,
+                                                legend_order = legend_order,
                                                 grouping=description,
-                                                dimensions = (600, 400),
-                                                legend_pos=:topright,
+                                                dimensions = (1550, 650),
+                                                legend_pos=:outerright,
+                                                legend_columns = 1,
                                                 y_label="Inference Latency log\$_{10}\$ (s)",
-                                                filename="fig_3_bounds") # overall runtime
+                                                group_colors = colors,
+                                                filename="fig_4") # overall runtime
+
+println("Graphing figures 5 and 6...")
 
 graph_grouped_box_plot(max_bounds_experiment_params;
                                                 ylims=[10^0, 10^30],
@@ -101,85 +116,56 @@ graph_grouped_box_plot(max_bounds_experiment_params;
                                                 legend_columns=1,
                                                 # include_hline = false,
                                                 y_label="Relative Error log\$_{10}\$",
-                                                filename="fig_2_bounds") # overall error
-
-graph_grouped_box_plot(min_bounds_experiment_params;
-                                                ylims=[10^-20, 10^5],
-                                                y_ticks=[10^-15, 10^-10, 10^-5, 10^0, 10^5],
-                                                y_type = estimate_error,
-                                                x_type = dataset,
-                                                x_order = x_order,
-                                                legend_order = min_bounds_legend_order,
-                                                grouping=description,
-                                                dimensions = (800, 400),
-                                                legend_pos=:bottomleft,
-                                                legend_columns=1,
-                                                compare_min=true,
-                                                y_label="Relative Error log\$_{10}\$",
-                                                filename="fig_2_min") # overall error
+                                                filename="fig_5") # bounds error
 
-# want to graph just the estimator work
-
-graph_grouped_boxplot_with_comparison_methods(experiment_params;
+graph_grouped_box_plot(max_bounds_experiment_params;
                                                 ylims=[10^-5, 10^4],
                                                 y_ticks=[10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 10^0, 10^1, 10^2, 10^3, 10^4],
                                                 y_type = runtime,
                                                 x_type = dataset,
-                                                x_order = x_order,
-                                                legend_order = legend_order,
+                                                x_order = bounds_x_order,
+                                                legend_order = max_bounds_legend_order,
                                                 grouping=description,
                                                 dimensions = (600, 400),
                                                 legend_pos=:topright,
                                                 y_label="Inference Latency log\$_{10}\$ (s)",
-                                                group_colors = colors,
-                                                filename="fig_3_estimates") # overall runtime
-
-graph_grouped_boxplot_with_comparison_methods(experiment_params;
-                                                ylims=[10^-21, 10^21],
-                                                y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
-                                                y_type = estimate_error,
-                                                x_type = dataset,
-                                                x_order = x_order,
-                                                legend_order = legend_order,
-                                                grouping=description,
-                                                dimensions = (1550, 650),
-                                                legend_pos=:topright,
-                                                y_label="Relative Error log\$_{10}\$",
-                                                group_colors = colors,
-                                                filename="fig_2_estimates") # overall error
+                                                filename="fig_6") # bounds runtime
 
 comparison_methods =  ["alleyTPI", "sumrdf"]
 x_order = [string(data) for data in datasets]
 bar_legend_order = [params.description for params in smaller_experiment_params][1:Int(length(smaller_experiment_params)/ length(datasets))]
 bar_legend_order = vcat(bar_legend_order, comparison_methods)
 println("bar legend order: ", bar_legend_order)
-bar_plot_colors = [:red :fuchsia :aqua]
-println("Graphing figures 5 and 6")
+bar_plot_colors = [:red :palevioletred1 :cadetblue1]
+
+println("Graphing figures 7 and 8")
 
 graph_grouped_bar_plot(smaller_experiment_params;
                         grouping=description,
                         y_type=memory_footprint,
                         x_order = x_order,
                         legend_order = bar_legend_order,
-                        ylims=[0, 6],
-                        y_ticks = [1, 2, 3, 4, 5],#[20, 40, 60, 80, 100],
-                        legend_pos=:topright,
-                        dimensions = (1000, 550),
+                        ylims=[0, 10],
+                        y_ticks = [1, 2, 3, 4, 5, 6, 7, 8, 9],#[20, 40, 60, 80, 100],
+                        legend_pos=:topleft,
+                        dimensions = (850, 400),
+                        scale_factor = 1000,
                         log_scale = true,
                         group_colors = bar_plot_colors,
-                        y_label="Memory log\$_{10}\$ (MBs)",
-                        filename="fig_5") # overall memory
+                        y_label="Memory log\$_{10}\$ (KB)",
+                        filename="fig_7") # overall memory
 
 graph_grouped_bar_plot(smaller_experiment_params;
                         grouping=description,
                         y_type=build_time,
                         x_order = x_order,
                         legend_order = bar_legend_order,
                         legend_pos=:topleft,
-                        ylims=[0, 6],
-                        y_ticks = [1, 2, 3, 4, 5], #[100, 200, 300, 400, 500, 600, 700, 800],
-                        dimensions = (1000, 550),
+                        ylims=[0, 10],
+                        y_ticks = [1, 2, 3, 4, 5, 6, 7, 8, 9], #[100, 200, 300, 400, 500, 600, 700, 800],
+                        dimensions = (850, 400),
+                        scale_factor = 1000,
                         log_scale = true,
                         group_colors = bar_plot_colors,
-                        y_label="Build Time log\$_{10}\$ (s)",
-                        filename="fig_6") # overall build time
+                        y_label="Build Time log\$_{10}\$ (ms)",
+                        filename="fig_8") # overall build time
diff --git a/Experiments/Scripts/construction_scaling.jl b/Experiments/Scripts/construction_scaling.jl
@@ -55,6 +55,5 @@ p = bar(graph_sizes,
     legend = false)
 xlabel!("Graph Size (V+E)")
 ylabel!("Build Time (s)")
-# title!("Graph Size vs Average Build Time")
-savefig(p, "Experiments/Results/Figures/fig_8")
+savefig(p, "Experiments/Results/Figures/fig_10")
 
diff --git a/Experiments/Scripts/degree_variance_exps.jl b/Experiments/Scripts/degree_variance_exps.jl
@@ -1,7 +1,7 @@
 using QuasiStableColors
 include("../Experiments.jl")
 
-# want to demonstrate the different variances as num colors increase
+# The goal of this file is to demonstrate the differences in degree as the number of colors increase
 
 datasets = [human, aids, yeast, dblp]
 partitioner = QuasiStable
@@ -35,7 +35,6 @@ for dataset in datasets
         end
         # go through each color in the mapping and figure out the standard degree_deviations
         current_std_devs = []
-        # TODO: change to do it by color instead of by node...
         # iterate through each color used to partition the graph
         for color in keys(color_nodes_mapping)
             # find all the nodes that belong to the color
@@ -72,7 +71,7 @@ println("Degree Deviations: ", string(degree_deviations))
 
 # at this point, we have processed everything.
 
-# save the resulting lists... not a csv because it's just a list of data points, consider changing it in the future
+# save the resulting lists... not a csv because it's just a list of data points
 filename = "degree_variance_results.txt"
 destination = "Experiments/Results/"
 results_file = open(destination * filename, "w")
@@ -87,16 +86,13 @@ println(results_file, "Degree Deviations: ")
 println(results_file, string(degree_deviations))
 
 close(results_file)
-#=
-datapoint_datasets = [yeast, yeast, yeast, yeast, yeast, yeast, yeast, yeast, human, human, human, human, human, human, human, human, aids, aids, aids, aids, aids, aids, aids, aids, lubm80, lubm80, lubm80, lubm80, lubm80, lubm80, lubm80, lubm80]
-num_colors = [1, 4, 16, 32, 64, 128, 256, 512, 1, 4, 16, 32, 64, 128, 256, 512, 1, 4, 16, 32, 64, 128, 256, 512, 1, 4, 16, 32, 64, 128, 256, 512]
-degree_deviations = [6.880726758945922, 4.989643770088716, 1.3337394493305714, 0.5722709934061954, 0.29276791901184857, 0.1576596246976241, 0.04501312177993107, 0.008963394836190203, 26.087460965548715, 15.064273420577308, 2.5033955872900266, 2.011004083809858, 0.9549079332306909, 0.24716697193598255, 0.04852883518585504, 0.01006048907235267, 0.7785905698253888, 0.29769545667568575, 0.1325487056264329, 0.07776684144988345, 0.03704532690058064, 0.01758307485593726, 0.008193695393059712, 0.0035230466131828345, 12.918023635006346, 13.103759670112357, 1.1393851794427807, 1.0811658329775096, 0.3945170971118752, 0.22551298336436013, 0.11471444564342485, 0.03493673703138958]
- =#
+
 log_deviations = [deviation == 0 ? 0 : log10(deviation) for deviation in degree_deviations]
 
 ENV["GKSwstype"]="100"
 # now graph a scatter plot with lines connecting data points from the same dataset
 p = plot(num_colors, log_deviations, group = datapoint_datasets, legend = :topright, size=(600, 400), linewidth=4, left_margin = 10mm, guidefont=14,xtickfont=12,ytickfont=12,legendfont=10,)
 xlabel!(p, "Number of Colors")
 ylabel!(p, "Degree Range log\$_{10}\$")
-savefig("Experiments/Results/Figures/degree_deviations_$(partitioner).pdf")
+# savefig("Experiments/Results/Figures/degree_deviations_$(partitioner).pdf")
+savefig("Experiments/Results/Figures/fig_2.pdf")