diff --git a/examples/afd_multiple_error_thresholds.py b/examples/afd_multiple_error_thresholds.py new file mode 100644 index 0000000000..c5d89deef0 --- /dev/null +++ b/examples/afd_multiple_error_thresholds.py @@ -0,0 +1,14 @@ +import desbordante +import pandas as pd +pyro = desbordante.Pyro() +df = pd.read_csv('../examples/datasets/iris.csv', sep=',', header=0) +pyro.load_data(df) +pyro.execute(error=0.0) +pyro.get_fds() +pyro.execute(error=0.1) +pyro.get_fds() +pyro.execute(error=0.2) +pyro.get_fds() +pyro.execute(error=0.3) +pyro.get_fds() + diff --git a/examples/datasets/inventory_afd.csv b/examples/datasets/inventory_afd.csv new file mode 100644 index 0000000000..f18e74da38 --- /dev/null +++ b/examples/datasets/inventory_afd.csv @@ -0,0 +1,13 @@ +Id,ProductName,Price +1,Laptop,3000 +2,Laptop,3000 +3,Laptop,300 +4,Laptop,3000 +5,Smartwatch,600 +6,Headphones,500 +7,Tablet,300 +8,Tablet,500 +9,Smartphone,1000 +10,Headphones,500 +11,Laptop,3000 +12,Notebook,3000 \ No newline at end of file diff --git a/examples/datasets/iris.csv b/examples/datasets/iris.csv new file mode 100644 index 0000000000..a3490e0e07 --- /dev/null +++ b/examples/datasets/iris.csv @@ -0,0 +1,150 @@ +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica diff --git a/examples/datasets/theatres_mfd.csv b/examples/datasets/theatres_mfd.csv new file mode 100644 index 0000000000..463c765e3f --- /dev/null +++ b/examples/datasets/theatres_mfd.csv @@ -0,0 +1,9 @@ +Title,Theatre,Duration +Don Quixote,Sydney Opera House,139 +Don Quixote,Teatro alla Scala,135 +Don Quixote,Grand Opera House,140 +Cinderella,Teatro alla Scala,110 +Cinderella,Grand Opera House,112 +Romeo and Juliet,Sydney Opera House,160 +Romeo and Juliet,Teatro alla Scala,163 +Romeo and Juliet,Grand Opera House,165 diff --git a/examples/datasets/university_fd.csv b/examples/datasets/university_fd.csv new file mode 100644 index 0000000000..c3afc981e6 --- /dev/null +++ b/examples/datasets/university_fd.csv @@ -0,0 +1,9 @@ +Course,Classroom,Professor,Semester +Math,512,Dr. Smith,Fall +Physics,406,Dr. Green,Fall +English,208,Prof. Turner,Fall +History,209,Prof. Davis,Fall +Math,512,Dr. Smith,Spring +Physics,503,Dr. Gray,Spring +English,116,Prof. Turner,Spring +Biology,209,Prof. Light,Spring diff --git a/examples/mining_afd.py b/examples/mining_afd.py new file mode 100644 index 0000000000..0e8f082e90 --- /dev/null +++ b/examples/mining_afd.py @@ -0,0 +1,18 @@ +import desbordante + +TABLE = '../examples/datasets/inventory_afd.csv' +ERROR = 0.1 + +algo = desbordante.Pyro() +algo.set_option('table', (TABLE, ',', True)) +algo.set_option('is_null_equal_null') +algo.load_data() +algo.set_option('error', ERROR) +algo.set_option('threads') +algo.set_option('max_lhs') +algo.set_option('seed') +algo.execute() +result = algo.get_fds() +print('AFDs:') +for fd in result: + print(fd) diff --git a/examples/mining_fd.py b/examples/mining_fd.py new file mode 100644 index 0000000000..ce71dd6d30 --- /dev/null +++ b/examples/mining_fd.py @@ -0,0 +1,13 @@ +import desbordante + +TABLE = '../examples/datasets/university_fd.csv' + +algo = desbordante.HyFD() +algo.set_option('table', (TABLE, ',', True)) +algo.set_option('is_null_equal_null') +algo.load_data() +algo.execute() +result = algo.get_fds() +print('FDs:') +for fd in result: + print(fd) diff --git a/examples/verifying_mfd.py b/examples/verifying_mfd.py new file mode 100644 index 0000000000..3ec180bd0b --- /dev/null +++ b/examples/verifying_mfd.py @@ -0,0 +1,22 @@ +import desbordante + +TABLE = '../examples/datasets/theatres_mfd.csv' +METRIC = 'euclidean' +LHS_INDICES = [0] +RHS_INDICES = [2] +PARAMETER = 5 + +algo = desbordante.MetricVerifier() +algo.set_option('table', (TABLE, ',', True)) +algo.set_option('is_null_equal_null') +algo.load_data() +algo.set_option('lhs_indices', LHS_INDICES) +algo.set_option('metric', METRIC) +algo.set_option('parameter', PARAMETER) +algo.set_option('dist_from_null_is_infinity') +algo.set_option('rhs_indices', RHS_INDICES) +algo.execute() +if algo.mfd_holds(): + print('MFD holds') +else: + print('MFD not holds')