Skip to content

Commit

Permalink
Merge pull request #63 from mkyl/alley-and-cleanup
Browse files Browse the repository at this point in the history
Final Cleanup
  • Loading branch information
diandremiguels authored May 9, 2024
2 parents b8f1c86 + df019d0 commit 684fedb
Show file tree
Hide file tree
Showing 21 changed files with 128 additions and 90,730 deletions.
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
/docs/Manifest.toml
/test/coverage/Manifest.toml
/.ipynb_checkpoints/
/queryset
/dataset
/TrueCardinalities
/queryset/
/dataset/
/TrueCardinalities/
/Experiments/Results/*
/Experiments/SerializedSummaries/*
/Experiments/ConvertedGraphs/*
Expand Down
2 changes: 1 addition & 1 deletion Experiments/Scripts/coloring_strategies.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ graph_grouped_box_plot(experiment_params;
dimensions = (600, 400),
legend_pos=:topleft,
y_label="Relative Error log\$_{10}\$",
filename="fig_7") # colorings error
filename="fig_9") # colorings error


graph_grouped_bar_plot(experiment_params;
Expand Down
2 changes: 1 addition & 1 deletion Experiments/Scripts/combine_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
path2 = "Experiments/alley_results.parquet" # this should represent the path to just the alleyTPI results


df1 = pq.read_table(source=path1, filters = [('Estimator', '!=', 'alleyTPI')]).to_pandas() # filter out the old alley results
df1 = pq.read_table(source=path1, filters = [('Estimator', '!=', 'alleyTPI'), ('Estimator', '!=', 'alley')]).to_pandas() # filter out the old alley results
df2 = pq.read_table(source=path2).to_pandas() # collect all the new alley results

df_result = pd.concat([df1, df2]) # combine :)
Expand Down
126 changes: 56 additions & 70 deletions Experiments/Scripts/comparison_exps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ include("../Experiments.jl")
datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
bounds_datasets = [human, aids, lubm80]

bounds_mix_scheme = [(Degree, 8), (QuasiStable, 8), (NeighborNodeLabels, 8), (NodeLabels, 8)]
mix_scheme = [(Degree, 8), (QuasiStable, 8), (NeighborNodeLabels, 8), (NodeLabels, 8)]

experiment_params = Vector{ExperimentParams}()
Expand All @@ -16,7 +17,7 @@ smaller_experiment_params = Vector{ExperimentParams}()
for dataset in bounds_datasets
push!(max_bounds_experiment_params, ExperimentParams(deg_stats_type=MaxDegStats,
dataset=dataset,
partitioning_scheme=mix_scheme,
partitioning_scheme=bounds_mix_scheme,
max_cycle_size = -1,
description = "COLOR (MaxMix32)"))

Expand All @@ -33,7 +34,7 @@ for dataset in datasets
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
dataset=dataset,
partitioning_scheme=mix_scheme,
description = "COLOR (AvgMix32)"))
description = "COLOR \n(AvgMix32)"))

push!(smaller_experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
dataset=dataset,
Expand All @@ -46,47 +47,61 @@ for dataset in datasets
max_cycle_size = -1,
description = "TradEst"))

push!(min_bounds_experiment_params, ExperimentParams(deg_stats_type=MinDegStats,
dataset=dataset,
partitioning_scheme=mix_scheme,
max_cycle_size = -1,
description = "COLOR (MinMix32)"))

end

println("Building...")

#build_experiments(experiment_params)
build_experiments(experiment_params)
build_experiments(max_bounds_experiment_params)

println("Estimating...")

#run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
run_estimation_experiments(max_bounds_experiment_params; timeout=TIMEOUT_SEC)

comparison_methods = ["alley", "alleyTPI", "wj", "impr", "jsub", "cs", "cset", "sumrdf"]
x_order = [string(data) for data in datasets]
bounds_x_order = [string(data) for data in bounds_datasets]
legend_order = [params.description for params in experiment_params][1:Int(length(experiment_params)/ length(datasets))]
max_bounds_legend_order = [params.description for params in max_bounds_experiment_params][1:Int(length(max_bounds_experiment_params)/ length(bounds_datasets))]
min_bounds_legend_order = [params.description for params in min_bounds_experiment_params][1:Int(length(min_bounds_experiment_params)/length(datasets))]
legend_order = vcat(legend_order, comparison_methods)
min_bounds_legend_order = vcat(min_bounds_legend_order, "Minimum Estimate")

# max_bounds_legend_order = vcat(max_bounds_legend_order, comparison_methods)
colors = [:red :yellow :maroon3 :palevioletred1 :dodgerblue :coral :palegreen :mediumpurple2 :darkgreen :cadetblue1]

colors = [:red :yellow :maroon3 :fuchsia :darkblue :navajowhite :lime :cornflowerblue :darkgreen :aqua]
println("Graphing figures 2 and 3...")
println("Graphing figures 3 and 4...")

graph_grouped_box_plot(max_bounds_experiment_params;
graph_grouped_boxplot_with_comparison_methods(experiment_params;
ylims=[10^-21, 10^21],
y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
y_type = estimate_error,
x_type = dataset,
x_order = x_order,
legend_order = legend_order,
grouping=description,
dimensions = (1550, 650),
legend_pos=:outerright,
legend_columns = 1,
y_label="Relative Error log\$_{10}\$",
group_colors = colors,
filename="fig_3") # overall error

graph_grouped_boxplot_with_comparison_methods(experiment_params;
ylims=[10^-5, 10^4],
y_ticks=[10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 10^0, 10^1, 10^2, 10^3, 10^4],
y_type = runtime,
x_type = dataset,
x_order = bounds_x_order,
legend_order = max_bounds_legend_order,
x_order = x_order,
legend_order = legend_order,
grouping=description,
dimensions = (600, 400),
legend_pos=:topright,
dimensions = (1550, 650),
legend_pos=:outerright,
legend_columns = 1,
y_label="Inference Latency log\$_{10}\$ (s)",
filename="fig_3_bounds") # overall runtime
group_colors = colors,
filename="fig_4") # overall runtime

println("Graphing figures 5 and 6...")

graph_grouped_box_plot(max_bounds_experiment_params;
ylims=[10^0, 10^30],
Expand All @@ -101,85 +116,56 @@ graph_grouped_box_plot(max_bounds_experiment_params;
legend_columns=1,
# include_hline = false,
y_label="Relative Error log\$_{10}\$",
filename="fig_2_bounds") # overall error

graph_grouped_box_plot(min_bounds_experiment_params;
ylims=[10^-20, 10^5],
y_ticks=[10^-15, 10^-10, 10^-5, 10^0, 10^5],
y_type = estimate_error,
x_type = dataset,
x_order = x_order,
legend_order = min_bounds_legend_order,
grouping=description,
dimensions = (800, 400),
legend_pos=:bottomleft,
legend_columns=1,
compare_min=true,
y_label="Relative Error log\$_{10}\$",
filename="fig_2_min") # overall error
filename="fig_5") # bounds error

# want to graph just the estimator work

graph_grouped_boxplot_with_comparison_methods(experiment_params;
graph_grouped_box_plot(max_bounds_experiment_params;
ylims=[10^-5, 10^4],
y_ticks=[10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 10^0, 10^1, 10^2, 10^3, 10^4],
y_type = runtime,
x_type = dataset,
x_order = x_order,
legend_order = legend_order,
x_order = bounds_x_order,
legend_order = max_bounds_legend_order,
grouping=description,
dimensions = (600, 400),
legend_pos=:topright,
y_label="Inference Latency log\$_{10}\$ (s)",
group_colors = colors,
filename="fig_3_estimates") # overall runtime

graph_grouped_boxplot_with_comparison_methods(experiment_params;
ylims=[10^-21, 10^21],
y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
y_type = estimate_error,
x_type = dataset,
x_order = x_order,
legend_order = legend_order,
grouping=description,
dimensions = (1550, 650),
legend_pos=:topright,
y_label="Relative Error log\$_{10}\$",
group_colors = colors,
filename="fig_2_estimates") # overall error
filename="fig_6") # bounds runtime

comparison_methods = ["alleyTPI", "sumrdf"]
x_order = [string(data) for data in datasets]
bar_legend_order = [params.description for params in smaller_experiment_params][1:Int(length(smaller_experiment_params)/ length(datasets))]
bar_legend_order = vcat(bar_legend_order, comparison_methods)
println("bar legend order: ", bar_legend_order)
bar_plot_colors = [:red :fuchsia :aqua]
println("Graphing figures 5 and 6")
bar_plot_colors = [:red :palevioletred1 :cadetblue1]

println("Graphing figures 7 and 8")

graph_grouped_bar_plot(smaller_experiment_params;
grouping=description,
y_type=memory_footprint,
x_order = x_order,
legend_order = bar_legend_order,
ylims=[0, 6],
y_ticks = [1, 2, 3, 4, 5],#[20, 40, 60, 80, 100],
legend_pos=:topright,
dimensions = (1000, 550),
ylims=[0, 10],
y_ticks = [1, 2, 3, 4, 5, 6, 7, 8, 9],#[20, 40, 60, 80, 100],
legend_pos=:topleft,
dimensions = (850, 400),
scale_factor = 1000,
log_scale = true,
group_colors = bar_plot_colors,
y_label="Memory log\$_{10}\$ (MBs)",
filename="fig_5") # overall memory
y_label="Memory log\$_{10}\$ (KB)",
filename="fig_7") # overall memory

graph_grouped_bar_plot(smaller_experiment_params;
grouping=description,
y_type=build_time,
x_order = x_order,
legend_order = bar_legend_order,
legend_pos=:topleft,
ylims=[0, 6],
y_ticks = [1, 2, 3, 4, 5], #[100, 200, 300, 400, 500, 600, 700, 800],
dimensions = (1000, 550),
ylims=[0, 10],
y_ticks = [1, 2, 3, 4, 5, 6, 7, 8, 9], #[100, 200, 300, 400, 500, 600, 700, 800],
dimensions = (850, 400),
scale_factor = 1000,
log_scale = true,
group_colors = bar_plot_colors,
y_label="Build Time log\$_{10}\$ (s)",
filename="fig_6") # overall build time
y_label="Build Time log\$_{10}\$ (ms)",
filename="fig_8") # overall build time
3 changes: 1 addition & 2 deletions Experiments/Scripts/construction_scaling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,5 @@ p = bar(graph_sizes,
legend = false)
xlabel!("Graph Size (V+E)")
ylabel!("Build Time (s)")
# title!("Graph Size vs Average Build Time")
savefig(p, "Experiments/Results/Figures/fig_8")
savefig(p, "Experiments/Results/Figures/fig_10")

14 changes: 5 additions & 9 deletions Experiments/Scripts/degree_variance_exps.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using QuasiStableColors
include("../Experiments.jl")

# want to demonstrate the different variances as num colors increase
# The goal of this file is to demonstrate the differences in degree as the number of colors increase

datasets = [human, aids, yeast, dblp]
partitioner = QuasiStable
Expand Down Expand Up @@ -35,7 +35,6 @@ for dataset in datasets
end
# go through each color in the mapping and figure out the standard degree_deviations
current_std_devs = []
# TODO: change to do it by color instead of by node...
# iterate through each color used to partition the graph
for color in keys(color_nodes_mapping)
# find all the nodes that belong to the color
Expand Down Expand Up @@ -72,7 +71,7 @@ println("Degree Deviations: ", string(degree_deviations))

# at this point, we have processed everything.

# save the resulting lists... not a csv because it's just a list of data points, consider changing it in the future
# save the resulting lists... not a csv because it's just a list of data points
filename = "degree_variance_results.txt"
destination = "Experiments/Results/"
results_file = open(destination * filename, "w")
Expand All @@ -87,16 +86,13 @@ println(results_file, "Degree Deviations: ")
println(results_file, string(degree_deviations))

close(results_file)
#=
datapoint_datasets = [yeast, yeast, yeast, yeast, yeast, yeast, yeast, yeast, human, human, human, human, human, human, human, human, aids, aids, aids, aids, aids, aids, aids, aids, lubm80, lubm80, lubm80, lubm80, lubm80, lubm80, lubm80, lubm80]
num_colors = [1, 4, 16, 32, 64, 128, 256, 512, 1, 4, 16, 32, 64, 128, 256, 512, 1, 4, 16, 32, 64, 128, 256, 512, 1, 4, 16, 32, 64, 128, 256, 512]
degree_deviations = [6.880726758945922, 4.989643770088716, 1.3337394493305714, 0.5722709934061954, 0.29276791901184857, 0.1576596246976241, 0.04501312177993107, 0.008963394836190203, 26.087460965548715, 15.064273420577308, 2.5033955872900266, 2.011004083809858, 0.9549079332306909, 0.24716697193598255, 0.04852883518585504, 0.01006048907235267, 0.7785905698253888, 0.29769545667568575, 0.1325487056264329, 0.07776684144988345, 0.03704532690058064, 0.01758307485593726, 0.008193695393059712, 0.0035230466131828345, 12.918023635006346, 13.103759670112357, 1.1393851794427807, 1.0811658329775096, 0.3945170971118752, 0.22551298336436013, 0.11471444564342485, 0.03493673703138958]
=#

log_deviations = [deviation == 0 ? 0 : log10(deviation) for deviation in degree_deviations]

ENV["GKSwstype"]="100"
# now graph a scatter plot with lines connecting data points from the same dataset
p = plot(num_colors, log_deviations, group = datapoint_datasets, legend = :topright, size=(600, 400), linewidth=4, left_margin = 10mm, guidefont=14,xtickfont=12,ytickfont=12,legendfont=10,)
xlabel!(p, "Number of Colors")
ylabel!(p, "Degree Range log\$_{10}\$")
savefig("Experiments/Results/Figures/degree_deviations_$(partitioner).pdf")
# savefig("Experiments/Results/Figures/degree_deviations_$(partitioner).pdf")
savefig("Experiments/Results/Figures/fig_2.pdf")
Loading

0 comments on commit 684fedb

Please sign in to comment.