Skip to content

Commit

Permalink
update graph functions
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebd99 committed Dec 15, 2023
1 parent 7f6227b commit 4c8ab86
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 55 deletions.
16 changes: 4 additions & 12 deletions Experiments/Scripts/comparison_exps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,6 @@ datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]

experiment_params = Vector{ExperimentParams}()
for dataset in datasets
#= push!(experiment_params, ExperimentParams(deg_stats_type=CorrDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 64)],
description = "CorrQ64"))
push!(experiment_params, ExperimentParams(deg_stats_type=CorrDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 32), (NeighborNodeLabels, 32),(QuasiStable, 32), (NeighborNodeLabels, 32)],
description = "CorrQ64N64")) =#
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 64)],
Expand Down Expand Up @@ -54,7 +46,7 @@ graph_grouped_boxplot_with_comparison_methods(experiment_params;
grouping=description,
dimensions = (1450, 550),
legend_pos=:top,
y_label="Runtime (s)",
y_label="Runtime (10^ s)",
filename="runtime")

graph_grouped_boxplot_with_comparison_methods(experiment_params;
Expand All @@ -64,15 +56,15 @@ graph_grouped_boxplot_with_comparison_methods(experiment_params;
grouping=description,
dimensions = (1450, 550),
legend_pos=:bottomleft,
y_label="Relative Error",
y_label="Relative Error (10^)",
filename="error")

graph_grouped_bar_plot(experiment_params;
grouping=description,
y_type=memory_footprint,
y_lims=[0, 30],
y_ticks = [5, 10, 15, 20, 25, 30],
dimensions = (775, 550),
dimensions = (900, 550),
y_label="Memory (MBs)",
filename="memory")

Expand All @@ -81,6 +73,6 @@ graph_grouped_bar_plot(experiment_params;
y_type=build_time,
y_lims=[0, 720],
y_ticks = [0, 100, 200, 300, 400, 500, 600, 700],
dimensions = (775, 550),
dimensions = (900, 550),
y_label="Build Time (s)",
filename="build_time")
16 changes: 8 additions & 8 deletions Experiments/Scripts/just-edge-updates-experiments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using Graphs
using Random
include("../Experiments.jl")

datasets::Vector{DATASET} = [aids, human, wordnet, dblp]
datasets::Vector{DATASET} = [yeast]
# datasets::Vector{DATASET} = [aids, human, yeast, wordnet, youtube, dblp, patents]
# datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
max_cycles = 6
Expand All @@ -15,7 +15,7 @@ experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=cur
for current_dataset in datasets for current_proportion in proportions_updated]
println("started building")
shuffled_edges = Dict()
for experiment_params in experiment_params_list
#= for experiment_params in experiment_params_list
build_times = [("Dataset", "Partitioner", "NumColors", "BuildPhase", "BuildTime", "MemoryFootprint")]
dataset = experiment_params.dataset
summary_params = experiment_params.summary_params
Expand Down Expand Up @@ -85,16 +85,16 @@ for experiment_params in experiment_params_list
results_filename = params_to_results_filename(experiment_params)
result_file_location = "Experiments/Results/Build_" * results_filename
writedlm(result_file_location, build_times, ",")
end
end =#

println("started estimating")
run_estimation_experiments(experiment_params_list)
#run_estimation_experiments(experiment_params_list)
println("started graphing")
# compare how overall accuracy is affected by summary updates
# graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, grouping=proportion_not_updated, filename="overall-accuracy-and-updates")
# compare how cycle stat accuracies are affected by summary updates
# graph_grouped_box_plot(experiment_params_list, x_type=proportion_deleted, y_type=estimate_error, x_label="proportion added then deleted", y_label="accuracy", grouping=cycle_size, filename="deletion-experiment")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=build_time, y_lims=[0, 30], x_label="Proportion Updated", y_label="Build Time (S)", grouping=proportion_updated, filename="just-edge-updates-build")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, x_label="Proportion Updated", y_label="Estimate Error", grouping=proportion_updated, filename="just-edge-updates-error")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=runtime, y_lims=[0, 0.6], x_label="Proportion Updated", y_label="Runtime (S)", grouping=proportion_updated, filename="just-edge-updates-runtime")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=memory_footprint, y_lims=[0, 20], x_label="Proportion Updated", y_label="Memory Footprint (B)", grouping=proportion_updated, filename="just-edge-updates-memory")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=build_time, y_lims=[0, 1], y_ticks = [0,.2, .4, .6, .8, 1], x_label="Proportion Updated", y_label="Build Time (S)", grouping=proportion_updated, filename="just-edge-updates-build")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error,y_lims=[10^-20, 10^15], x_label="Proportion Updated", y_label="Estimate Error", grouping=proportion_updated, filename="just-edge-updates-error")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=runtime, y_lims=[10^-5, 10], y_ticks = [10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 1, 10], x_label="Proportion Updated", y_label="Runtime (S)", grouping=proportion_updated, filename="just-edge-updates-runtime")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=memory_footprint, y_lims=[0, 20], y_ticks = [0, 5, 10, 15, 20], x_label="Proportion Updated", y_label="Memory (MB)", grouping=proportion_updated, filename="just-edge-updates-memory")
84 changes: 49 additions & 35 deletions Experiments/graph_results.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@enum GROUP dataset technique cycle_size summary_paths inference_paths query_type sampling_type cycle_stats number_of_colors build_phase proportion_not_updated proportion_deleted deg_stat_type description
@enum GROUP dataset technique cycle_size summary_paths inference_paths query_type sampling_type cycle_stats number_of_colors build_phase proportion_updated proportion_deleted deg_stat_type description

@enum VALUE estimate_error runtime build_time memory_footprint

Expand All @@ -10,7 +10,9 @@ function graph_grouped_box_plot(experiment_params_list::Vector{ExperimentParams}
x_label=nothing,
y_label=nothing,
filename=nothing,
dimensions = (1000, 600))
y_lims=[0, 10^2.5],
y_ticks=[10^-10, 10^-5, 10^-2, 1, 10^2, 10^5, 10^10],
dimensions = (1000, 800))
# for now let's just use the dataset as the x-values and the cycle size as the groups
x_values = []
y_values = []
Expand Down Expand Up @@ -50,12 +52,12 @@ function graph_grouped_box_plot(experiment_params_list::Vector{ExperimentParams}
y_values,
group = groups,
yscale =:log10,
ylims=[10^-13, 10^11],
yticks=[10^-10, 10^-5, 10^-2, 1, 10^2, 10^5, 10^10],
ylims=y_lims,
y_ticks=y_ticks,
legend = legend_pos,
legend_column = 2,
thickness_scaling=1.25,
bottom_margin = 20px,
bottom_margin = 40px,
top_margin = 20px,
left_margin = 10mm,
titlefont = (12, :black),
Expand Down Expand Up @@ -83,7 +85,17 @@ function comparison_dataset()
comparison_results[i, :QueryType] = match(r".*/query_(.*)_.*", query_path).captures[1]
end
end
return comparison_results
results_dict = Dict()
for i in 1:nrow(comparison_results)
dataset = comparison_results[i, :Dataset]
estimator = comparison_results[i, :Estimator]
query_path = comparison_results[i, :Query]
results_dict[(dataset, estimator, query_path)] = (Estimate=comparison_results[i, :Value],
Runtime=comparison_results[i, :Runtime],
QueryType=comparison_results[i,:QueryType])
end
estimators = unique(comparison_results[:, :Estimator])
return estimators, results_dict
end

function get_query_id(dataset, query_path)
Expand Down Expand Up @@ -120,7 +132,7 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V

# keep track of the data points
for i in 1:nrow(results_df)
current_x = string(get_value_from_param(experiment_params, dataset))
data = string(get_value_from_param(experiment_params, dataset))

current_group = string(grouping == query_type ? results_df[i, :QueryType] : get_value_from_param(experiment_params, grouping))
current_y = 0
Expand All @@ -129,43 +141,48 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
else # y_type == runtime
current_y = results_df[i, :EstimationTime]
end
true_card[(current_x, get_query_id(string(experiment_params.dataset), results_df[i, :QueryPath]))] = results_df[i, :TrueCard]
true_card[(data, get_query_id(string(experiment_params.dataset), results_df[i, :QueryPath]))] = results_df[i, :TrueCard]
# push the errors and their groupings into the correct vector
push!(x_values, current_x)
push!(x_values, data)
push!(y_values, current_y)
push!(estimators, current_group)
end
end
results_filename = params_to_results_filename(experiment_params_list[1])
comparison_results = comparison_dataset()
for i in 1:nrow(comparison_results)
current_x = comparison_results[i, :Dataset]
estimator = comparison_results[i, :Estimator]
current_y = 0
if !haskey(true_card, (current_x, comparison_results[i, :Query]))
continue
end
if y_type == estimate_error
current_y = min(10^30, max(1, comparison_results[i, :Value])) / true_card[(current_x, comparison_results[i, :Query])]
else # y_type == runtime
current_y = comparison_results[i, :Runtime] / 1000.0
estimator_types, comparison_results = comparison_dataset()
for (query_key, card) in true_card
data = query_key[1]
query_path = query_key[2]
for estimator in estimator_types
comp_key = (data, estimator, query_path)
(estimate, runtime) = 1, 10 # TODO: We shouldn't use an arbitrary number for runtime here
if haskey(comparison_results, comp_key)
result = comparison_results[comp_key]
estimate = result.Estimate
runtime = result.Runtime
end

if y_type == estimate_error
current_y = min(10^30, max(1, estimate)) / card
else # y_type == runtime
current_y = runtime / 1000.0
end
# push the errors and their groupings into the correct vector
push!(x_values, data)
push!(y_values, current_y)
push!(estimators, estimator)
end
# push the errors and their groupings into the correct vector
push!(x_values, current_x)
push!(y_values, current_y)
push!(estimators, estimator)
end
println("starting graphs")

# This seems to be necessary for using Plots.jl outside of the ipynb framework.
# See this: https://discourse.julialang.org/t/deactivate-plot-display-to-avoid-need-for-x-server/19359/15
ENV["GKSwstype"]="100"
gbplot = groupedboxplot(x_values,
y_values,
[log10(y) for y in y_values],
group = estimators,
yscale =:log10,
ylims = ylims,
y_ticks = y_ticks,
ylims = (log10(ylims[1]),log10(ylims[2])),
y_ticks = [log10(y) for y in y_ticks],
legend = legend_pos,
size = dimensions,
bottom_margin = 20px,
Expand All @@ -176,7 +193,7 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
legendfont = (11, :black),
tickfont = (12, :black),
guidefont = (15, :black),
whisker_range=0)
whisker_range=2)
x_label !== nothing && xlabel!(gbplot, x_label)
y_label !== nothing && ylabel!(gbplot, y_label)
plotname = (isnothing(filename)) ? results_filename * ".png" : filename * ".png"
Expand Down Expand Up @@ -268,9 +285,6 @@ function graph_grouped_bar_plot(experiment_params_list::Vector{ExperimentParams}
savefig(gbplot, "Experiments/Results/Figures/" * plotname)
end




# default to grouping by dataset
function get_value_from_param(experiment_param::ExperimentParams, value_type::GROUP)
if value_type == dataset
Expand All @@ -287,8 +301,8 @@ function get_value_from_param(experiment_param::ExperimentParams, value_type::GR
return experiment_param.only_shortest_path_cycle
elseif value_type == number_of_colors
return experiment_param.summary_params.num_colors
elseif value_type == proportion_not_updated
return experiment_param.summary_params.proportion_not_updated
elseif value_type == proportion_updated
return experiment_param.summary_params.proportion_updated
elseif value_type == proportion_deleted
return experiment_param.summary_params.proportion_deleted
elseif value_type == deg_stat_type
Expand Down

0 comments on commit 4c8ab86

Please sign in to comment.