diff --git a/general-testing.ipynb b/general-testing.ipynb deleted file mode 100644 index 3099788..0000000 --- a/general-testing.ipynb +++ /dev/null @@ -1,385 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "include(\"src/QuasiStableCardinalityEstimator.jl\")\n", - "using Distributions\n", - "using DataStructures: counter, Dict, Set, Vector, inc!\n", - "using Plots" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, we generate a zipfian graph of moderate size." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "n = 200000\n", - "numEdgeLabels = 100\n", - "numVertexLabels = 3\n", - "numVertices = 1000\n", - "d = DiscreteNonParametric(1:numVertices, [1.0/(i^.5) for i in 1:numVertices]/sum([1.0/(i^.5) for i in 1:numVertices]))\n", - "dEdgeLabels = DiscreteNonParametric(1:numEdgeLabels, [1.0/(i^.75) for i in 1:numEdgeLabels]/sum([1.0/(i^.75) for i in 1:numEdgeLabels]))\n", - "dVertexLabels = DiscreteNonParametric(1:numVertexLabels, [1.0/(i^.75) for i in 1:numVertexLabels]/sum([1.0/(i^.75) for i in 1:numVertexLabels]))\n", - "x1 = rand(d, n) .% numVertices .+ 1\n", - "x2 = rand(d, n) .% numVertices .+ 1\n", - "g = PropertyGraph(numVertices)\n", - "for x in hcat(x1, x2)\n", - " add_labeled_node!(g, x, [only(rand(dVertexLabels, 1))])\n", - "end\n", - "for i in range(1, length(x1))\n", - " add_labeled_edge!(g, (x1[i], x2[i]), only(rand(dEdgeLabels, 1)))\n", - "end" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then, we generate our lifted graph summary which includes cardinality and min/avg/max degree information about every edge between colors." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "summary = generate_color_summary(g, 64)\n", - "nothing" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1521180" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_color_summary_size(summary)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Lastly, we create a simple acyclic query graph composed of a star with a small leg and see how our bounds compare with the exact result." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1-element Vector{Int64}:\n", - " 6" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query = PropertyGraph(4)\n", - "add_labeled_node!(query, 1, [1])\n", - "add_labeled_node!(query, 2, [1])\n", - "add_labeled_node!(query, 3, [1])\n", - "add_labeled_node!(query, 4, [1])\n", - "add_labeled_edge!(query, (1,2), 10)\n", - "add_labeled_edge!(query, (2,3), 5)\n", - "add_labeled_edge!(query, (3,4), 1)\n", - "add_labeled_edge!(query, (4,1), 6)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3-element Vector{Float64}:\n", - " 0.0\n", - " 31.444869513359386\n", - " 2.072297e7" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bounds_without_partial_agg = get_cardinality_bounds(query, summary; use_partial_sums = false, try_all_starting_nodes=false, verbose=false)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3-element Vector{Float64}:\n", - " 0.0\n", - " 31.444869513359432\n", - " 2.072297e7" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bounds_with_partial_agg = get_cardinality_bounds(query, summary; use_partial_sums = true, try_all_starting_nodes=false, verbose=false)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3-element Vector{Float64}:\n", - " NaN\n", - " 0.9999999999999986\n", - " 1.0" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bounds_without_partial_agg ./ bounds_with_partial_agg # Check that the partial summation doesn't affect the output (beyond floating point issues)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "685" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "exact_size = only(get_exact_size(query, g; verbose=false))" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Relative Error (Lower): 0.0\n", - "Relative Error (Avg): 0.04590491899760501\n", - "Relative Error (Upper): 30252.51094890511\n" - ] - } - ], - "source": [ - "println(\"Relative Error (Lower): \", only(bounds_with_partial_agg[1])/exact_size)\n", - "println(\"Relative Error (Avg): \", only(bounds_with_partial_agg[2])/exact_size)\n", - "println(\"Relative Error (Upper): \", only(bounds_with_partial_agg[3])/exact_size)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we plot the error for our bounds on a variety of graph sizes." - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "ename": "LoadError", - "evalue": "MethodError: no method matching generate_color_summary(::SimpleDiGraph{Int64}, ::Int64)\n\u001b[0mClosest candidates are:\n\u001b[0m generate_color_summary(\u001b[91m::PropertyGraph\u001b[39m, ::Int64; weighting) at ~/Cardinality-with-Colors/Source/QuasiStableCardinalityEstimator.jl:21", - "output_type": "error", - "traceback": [ - "MethodError: no method matching generate_color_summary(::SimpleDiGraph{Int64}, ::Int64)\n\u001b[0mClosest candidates are:\n\u001b[0m generate_color_summary(\u001b[91m::PropertyGraph\u001b[39m, ::Int64; weighting) at ~/Cardinality-with-Colors/Source/QuasiStableCardinalityEstimator.jl:21", - "", - "Stacktrace:", - " [1] top-level scope", - " @ In[34]:24", - " [2] eval", - " @ ./boot.jl:368 [inlined]", - " [3] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)", - " @ Base ./loading.jl:1428" - ] - } - ], - "source": [ - "query_graph = DiGraph(6)\n", - "add_edge!(query_graph, (1,2))\n", - "add_edge!(query_graph, (2,3))\n", - "add_edge!(query_graph, (2,6))\n", - "add_edge!(query_graph, (3,4))\n", - "add_edge!(query_graph, (3,5))\n", - "\n", - "bounds = []\n", - "exact_sizes = []\n", - "graph_sizes = []\n", - "for i in range(1, 12)\n", - " numVertices = 5*2^i\n", - " push!(graph_sizes, numVertices)\n", - " n = numVertices*5\n", - " zipf = [1.0/(i^.5) for i in 1:numVertices]\n", - " zipf = zipf ./ sum(zipf)\n", - " d = DiscreteNonParametric(1:numVertices, zipf)\n", - " x1 = rand(d, n) .% numVertices\n", - " x2 = rand(d, n) .% numVertices\n", - " g = DiGraph(numVertices)\n", - " for i in range(1, length(x1))\n", - " add_edge!(g, x1[i], x2[i])\n", - " end\n", - " summary = generate_color_summary(g, 64)\n", - " bounds_with_partial_agg = get_cardinality_bounds(query_graph, summary; use_partial_sums = true, try_all_starting_nodes=true, verbose=false)\n", - " push!(bounds, bounds_with_partial_agg)\n", - " exact_size = only(get_exact_size(query_graph, g; verbose=false))\n", - " push!(exact_sizes, exact_size)\n", - "end" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "lower_bounds = max.(.1, [x[1] for x in bounds]./exact_sizes)\n", - "avg_bounds = [x[2] for x in bounds]./exact_sizes\n", - "upper_bounds = [x[3] for x in bounds]./exact_sizes\n", - "plot(graph_sizes, [lower_bounds avg_bounds upper_bounds], title=\"4 Chain Query\", \n", - " label = [\"Lower\" \"Avg\" \"Upper\"], xlabel=\"Graph Size\", ylabel = \"Relative Error\", \n", - " xscale=:log10, yscale=:log10, minorgrid=true)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "using Graphs\n", - "\n", - "function get_min_width_node_order(g::DiGraph) \n", - " partial_orders = [x for x in vertices(g)]\n", - " while length(partial_orders[0]) < nv(g)\n", - " \n", - " end\n", - " \n", - "\n", - "\n", - "\n", - " min_width = nv(g)\n", - " min_order = []\n", - " for starting_node in vertices(g)\n", - " max_width = 0\n", - " visited_nodes = [starting_node]\n", - " while length(visited_nodes) < nv(g)\n", - " new_width = nv(g)\n", - " next_node = -1\n", - " for potential_node in vertices(g)\n", - " if potential_node in visited_nodes || !any([x in all_neighbors(g, potential_node) for x in visited_nodes])\n", - " continue\n", - " end\n", - " potential_visited_nodes = []\n", - " copy!(potential_visited_nodes, visited_nodes)\n", - " push!(potential_visited_nodes, potential_node)\n", - " potential_num_active_nodes = 0\n", - " for v in potential_visited_nodes\n", - " if ! all([x in potential_visited_nodes for x in all_neighbors(g, v)])\n", - " potential_num_active_nodes += 1\n", - " end\n", - " end\n", - " if potential_num_active_nodes <= new_width\n", - " next_node = potential_node\n", - " new_width = potential_num_active_nodes\n", - " end\n", - " end\n", - " push!(visited_nodes, next_node)\n", - " max_width = max(max_width, new_width)\n", - " end\n", - " if max_width <= min_width\n", - " min_order = visited_nodes\n", - " min_width = max_width\n", - " end\n", - " end\n", - " return min_order\n", - "end" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Julia 1.8.5", - "language": "julia", - "name": "julia-1.8" - }, - "language_info": { - "file_extension": ".jl", - "mimetype": "application/julia", - "name": "julia", - "version": "1.8.5" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}