MP instead of MT in experiments code

uwdb · Dec 20, 2023 · 440a764 · 440a764
1 parent 62bcf89
commit 440a764
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 32 deletions.
diff --git a/Experiments/Experiments.jl b/Experiments/Experiments.jl
@@ -9,6 +9,9 @@ using DelimitedFiles: writedlm
 using BenchmarkTools
 using Random
 using Printf
+using SharedArrays
+using WeakRefStrings
+using Distributed
 
 include("../Source/CardinalityWithColors.jl")
 include("utils.jl")
@@ -18,3 +21,15 @@ include("build_color_summaries.jl")
 include("get_true_cardinalities.jl")
 include("run_estimators.jl")
 include("graph_results.jl")
+@everywhere include("../Source/CardinalityWithColors.jl")
+@everywhere include("utils.jl")
+@everywhere include("load_datasets.jl")
+@everywhere include("build_color_summaries.jl")
+@everywhere include("run_estimators.jl")
+@everywhere using SharedArrays
+@everywhere using WeakRefStrings
+@everywhere using DelimitedFiles: writedlm
+@everywhere using Parquet2: Dataset
+@everywhere using Random
+@everywhere using CSV, DataFrames
+@everywhere using Serialization: serialize, deserialize
diff --git a/Experiments/Scripts/comparison_exps.jl b/Experiments/Scripts/comparison_exps.jl
@@ -3,8 +3,8 @@ using Profile
 include("../Experiments.jl")
 
 #datasets = [human, aids, lubm80, yeast, hprd, dblp, youtube, eu2005, patents, wordnet]
-datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
-#datasets = [aids]
+#datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
+datasets = [human]
 
 experiment_params = Vector{ExperimentParams}()
 for dataset in datasets
@@ -16,17 +16,22 @@ for dataset in datasets
                                                 dataset=dataset,
                                                 partitioning_scheme=[(QuasiStable, 32), (NeighborNodeLabels, 32),(QuasiStable, 32), (NeighborNodeLabels, 32)],
                                                 description = "AvgQ64N64"))
+
     push!(experiment_params, ExperimentParams(deg_stats_type=MinDegStats,
                                                 dataset=dataset,
                                                 partitioning_scheme=[(QuasiStable, 64)],
+                                                max_cycle_size = -1,
                                                 description = "MinQ64"))
     push!(experiment_params, ExperimentParams(deg_stats_type=MaxDegStats,
                                                 dataset=dataset,
                                                 partitioning_scheme=[(QuasiStable, 64)],
+                                                max_cycle_size = -1,
                                                 description = "MaxQ64"))
+
     push!(experiment_params, ExperimentParams(deg_stats_type=MaxDegStats,
                                                 dataset=dataset,
                                                 partitioning_scheme=[(Hash, 64)],
+                                                max_cycle_size = -1,
                                                 description = "BSK"))
 
     push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
@@ -36,9 +41,9 @@ for dataset in datasets
                                                 description = "IndEst"))
 end
 
-build_experiments(experiment_params)
+#build_experiments(experiment_params)
 
-#run_estimation_experiments(experiment_params)
+run_estimation_experiments(experiment_params)
 
 graph_grouped_boxplot_with_comparison_methods(experiment_params;
                                                 ylims=[10^-5, 10^4],
@@ -47,30 +52,19 @@ graph_grouped_boxplot_with_comparison_methods(experiment_params;
                                                 grouping=description,
                                                 dimensions = (1450, 550),
                                                 legend_pos=:top,
-                                                y_label="Runtime (10^ s)",
-                                                filename="runtime")
+                                                y_label="Inference Latency (10^ s)",
+                                                filename="overall_runtime")
 
 graph_grouped_boxplot_with_comparison_methods(experiment_params;
                                                 ylims=[10^-21, 10^21],
                                                 y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
                                                 y_type = estimate_error,
                                                 grouping=description,
                                                 dimensions = (1450, 550),
-                                                legend_pos=:bottomleft,
+                                                legend_pos=:top,
                                                 y_label="Relative Error (10^)",
-                                                filename="error")
-
+                                                filename="overall_error")
 
-graph_grouped_boxplot_with_comparison_methods(experiment_params;
-                                                ylims=[10^-21, 10^21],
-                                                x_type = query_size,
-                                                y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
-                                                y_type = estimate_error,
-                                                grouping=description,
-                                                dimensions = (1450, 550),
-                                                legend_pos=:bottomleft,
-                                                y_label="Relative Error (10^)",
-                                                filename="error-query-size")
 
 graph_grouped_bar_plot(experiment_params;
                         grouping=description,
@@ -79,13 +73,13 @@ graph_grouped_bar_plot(experiment_params;
                         y_ticks = [5, 10, 15, 20, 25, 30],
                         dimensions = (1000, 550),
                         y_label="Memory (MBs)",
-                        filename="memory")
+                        filename="overall_memory")
 
 graph_grouped_bar_plot(experiment_params;
                         grouping=description,
                         y_type=build_time,
-                        ylims=[0, 100],
-                        y_ticks = [20, 40, 60, 80, 100],
+                        ylims=[0, 1600],
+                        y_ticks = [200, 400, 600, 800, 1000, 1200, 1400, 1600],
                         dimensions = (1000, 550),
                         y_label="Build Time (s)",
-                        filename="build_time")
+                        filename="overall_build_time")
diff --git a/Experiments/build_color_summaries.jl b/Experiments/build_color_summaries.jl
@@ -1,5 +1,5 @@
 function build_experiments(experiment_params_list::Vector{ExperimentParams})
-    for experiment_params in experiment_params_list
+    @sync @distributed for experiment_params in experiment_params_list
         build_times = [("Dataset", "Partitioner", "NumColors",  "BuildPhase", "BuildTime", "MemoryFootprint")]
         dataset = experiment_params.dataset
         summary_params = experiment_params.summary_params

diff --git a/Experiments/run_estimators.jl b/Experiments/run_estimators.jl
@@ -5,10 +5,8 @@ function run_estimation_experiments(experiment_params_list::Vector{ExperimentPar
         summary_file_location = "Experiments/SerializedSummaries/" * params_to_summary_filename(experiment_params)
         !isfile(summary_file_location) && error("The summary has not been built yet! \n Attempted File Location: $(summary_file_location)")
         summary::ColorSummary = deserialize(summary_file_location)
-        experiment_results = []
-        push!(experiment_results, ("Estimate", "TrueCard", "EstimationTime", "QueryType", "QueryPath", "QuerySize"))
-        lk = ReentrantLock()
-        Threads.@threads for i in shuffle(collect(1:length(all_queries[dataset])))
+        experiment_results = SharedArray{Tuple{Float64, Float64, Float64, String255, String255, Float64}}(length(all_queries[dataset]))
+        @sync @distributed for i in 1:length(experiment_results)
             query::QueryGraph = all_queries[dataset][i].query
             query_path = all_queries[dataset][i].query_path
             exact_size = all_queries[dataset][i].exact_size
@@ -20,11 +18,11 @@ function run_estimation_experiments(experiment_params_list::Vector{ExperimentPar
             estimate_time = median([x.time for x in  estimate_results]) # Convert back to seconds from nano seconds
             estimate = max(1, estimate_results[1].value)
             query_type = all_queries[dataset][i].query_type
-            lock(lk)
-            push!(experiment_results, (estimate, exact_size, estimate_time, query_type, query_path, nv(query.graph)))
-            unlock(lk)
+            experiment_results[i] = (estimate, exact_size, estimate_time, query_type, query_path, nv(query.graph))
         end
+        final_results = [(x[1], x[2], x[3], String(x[4]), String(x[5]), x[6]) for x in experiment_results]
+        final_results = [("Estimate", "TrueCard", "EstimationTime", "QueryType", "QueryPath", "QuerySize"); final_results]
         results_file_location = "Experiments/Results/Estimation_"  * params_to_results_filename(experiment_params)
-        writedlm(results_file_location, experiment_results, ",")
+        writedlm(results_file_location, final_results, ",")
     end
 end
diff --git a/Project.toml b/Project.toml
@@ -18,8 +18,10 @@ Probably = "2172800d-0309-5a57-a84f-d50c94757422"
 QuasiStableColors = "9c3856af-3e7c-4d34-a6af-a406867b22e4"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
+SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
+WeakRefStrings = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
 
 [compat]
 julia = "^1.6"