diff --git a/examples/basic/mining_gfd/figures/gfds/blogs_gfd.png b/examples/basic/mining_gfd/figures/gfds/blogs_gfd.png new file mode 100644 index 0000000000..06caee82a8 Binary files /dev/null and b/examples/basic/mining_gfd/figures/gfds/blogs_gfd.png differ diff --git a/examples/basic/mining_gfd/figures/gfds/study_gfd.png b/examples/basic/mining_gfd/figures/gfds/study_gfd.png new file mode 100644 index 0000000000..c193d9ef0d Binary files /dev/null and b/examples/basic/mining_gfd/figures/gfds/study_gfd.png differ diff --git a/examples/basic/mining_gfd/figures/graphs/blogs_graph.png b/examples/basic/mining_gfd/figures/graphs/blogs_graph.png new file mode 100644 index 0000000000..1f5167572f Binary files /dev/null and b/examples/basic/mining_gfd/figures/graphs/blogs_graph.png differ diff --git a/examples/basic/mining_gfd/figures/graphs/study_graph.png b/examples/basic/mining_gfd/figures/graphs/study_graph.png new file mode 100644 index 0000000000..9b1b89c77b Binary files /dev/null and b/examples/basic/mining_gfd/figures/graphs/study_graph.png differ diff --git a/examples/basic/mining_gfd/mining_gfd1.py b/examples/basic/mining_gfd/mining_gfd1.py new file mode 100644 index 0000000000..e8ed940cb6 --- /dev/null +++ b/examples/basic/mining_gfd/mining_gfd1.py @@ -0,0 +1,69 @@ +from pathlib import Path + +import desbordante +import matplotlib.pyplot as plt +import matplotlib.image as mpimg + + +class bcolors: + HEADER = '\033[95m' + WARNING = '\033[93m' + ENDC = '\033[0m' + + +GRAPH_NAME = 'blogs_graph' +GFD_NAME = 'blogs_gfd' + +GRAPHS_DATASETS_FOLDER_PATH = 'examples/datasets/mining_gfd' + +GRAPH = Path(f'{GRAPHS_DATASETS_FOLDER_PATH}/{GRAPH_NAME}.dot') + +GRAPH_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{GRAPH_NAME}.png') +GFD_IMAGE = Path(f'examples/basic/mining_gfd/figures/gfds/{GFD_NAME}.png') + +GRAPH_INFO = ('The graph is depicted in the figure. The following abbreviations ' + 'were used: A - account, B - blog. Vertices labeled A have a ' + '"name" attribute showing the nickname; vertices labeled B - ' + '"author", indicating who wrote the blog. The values of these ' + 'attributes are labeled next to the vertices. The edges are also ' + 'labeled as: "post", which indicates who wrote the blog, and ' + '"like", which indicates approval by another person. In the ' + 'drawing, the edges are marked "post" in bold.\n') + +INFO = ("Let's run the algorithm and look at the result. We will consider " + "all dependencies with a pattern of no more than 3 vertices, as well as " + "with a frequency of occurrence of at least 3 times.\n") + +RESULTS = ("The found dependency indicates that if the author has posted a " + "blog, then the authorship of this blog always includes the " + "name of the person who posted it.\n") + +EXIT = f'{bcolors.WARNING}Close the image window to finish.{bcolors.ENDC}' + + +def execute_algo(algo): + algo.load_data(graph=GRAPH, gfd_k=3, gfd_sigma=3) + algo.execute() + print(f'{bcolors.HEADER}Desbordante > {bcolors.ENDC}', end='') + print('Mined GFDs:', len(algo.get_gfds())) + print() + + +def show_example(): + _, axarr = plt.subplots(1, 2, figsize=(12, 5), gridspec_kw={'width_ratios': [7, 3], 'wspace': 0.5}) + axarr[0].set_axis_off() + axarr[0].set_title('$Graph$') + axarr[0].imshow(mpimg.imread(GRAPH_IMAGE)) + axarr[1].set_axis_off() + axarr[1].set_title('$Mined$ $GFD$') + axarr[1].imshow(mpimg.imread(GFD_IMAGE)) + plt.show() + + +print(GRAPH_INFO) +print(INFO) +execute_algo(desbordante.gfd_mining.algorithms.GfdMiner()) +print(RESULTS) +print(EXIT) + +show_example() diff --git a/examples/basic/mining_gfd/mining_gfd2.py b/examples/basic/mining_gfd/mining_gfd2.py new file mode 100644 index 0000000000..9d57f5ac00 --- /dev/null +++ b/examples/basic/mining_gfd/mining_gfd2.py @@ -0,0 +1,67 @@ +from pathlib import Path + +import desbordante +import matplotlib.pyplot as plt +import matplotlib.image as mpimg + + +class bcolors: + HEADER = '\033[95m' + WARNING = '\033[93m' + ENDC = '\033[0m' + + +GRAPH_NAME = 'study_graph' +GFD_NAME = 'study_gfd' + +GRAPHS_DATASETS_FOLDER_PATH = 'examples/datasets/mining_gfd' + +GRAPH = Path(f'{GRAPHS_DATASETS_FOLDER_PATH}/{GRAPH_NAME}.dot') + +GRAPH_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{GRAPH_NAME}.png') +GFD_IMAGE = Path(f'examples/basic/mining_gfd/figures/gfds/{GFD_NAME}.png') + +GRAPH_INFO = ('The figure provides an example of a graph. ' + 'The following abbreviations were used here: T - task, S - student. ' + 'The vertices with the T-label have the attributes "name" and "difficulty"' + ', the vertices with the S-label have the "name", "degree" and "year" ' + 'attributes, which indicate the student\'s name, level of education and year. ' + 'The values of these attributes are signed next to the vertices, except for ' + 'the name, since it is not informative.\n') + +INFO = ("Let's run the algorithm. We'll specify 2 as the k parameter to look for patterns " + "with no more than two vertices, and we'll specify 3 as the sigma to exclude " + "rare dependencies.\n") + +RESULTS = ("The dependency found indicates that only second-year master's students are " + "working on the hard task.\n") + +EXIT = f'{bcolors.WARNING}Close the image window to finish.{bcolors.ENDC}' + + +def execute_algo(algo): + algo.load_data(graph=GRAPH, gfd_k=2, gfd_sigma=3) + algo.execute() + print(f'{bcolors.HEADER}Desbordante > {bcolors.ENDC}', end='') + print('Mined GFDs:', len(algo.get_gfds())) + print() + + +def show_example(): + _, axarr = plt.subplots(1, 2, figsize=(12, 5), gridspec_kw={'width_ratios': [7, 3], 'wspace': 0.5}) + axarr[0].set_axis_off() + axarr[0].set_title('$Graph$') + axarr[0].imshow(mpimg.imread(GRAPH_IMAGE)) + axarr[1].set_axis_off() + axarr[1].set_title('$Mined$ $GFD$') + axarr[1].imshow(mpimg.imread(GFD_IMAGE)) + plt.show() + + +print(GRAPH_INFO) +print(INFO) +execute_algo(desbordante.gfd_mining.algorithms.GfdMiner()) +print(RESULTS) +print(EXIT) + +show_example() diff --git a/examples/datasets/mining_gfd/blogs_graph.dot b/examples/datasets/mining_gfd/blogs_graph.dot new file mode 100644 index 0000000000..9edd0b7314 --- /dev/null +++ b/examples/datasets/mining_gfd/blogs_graph.dot @@ -0,0 +1,27 @@ +graph G { +0[label=blog author=Leonardo]; +1[label=blog author=Raphael]; +2[label=blog author=Donatello]; +3[label=blog author=Michelangelo]; +4[label=blog author=Donatello]; +5[label=blog author=Michelangelo]; +6[label=blog author=Donatello]; +7[label=account name=Leonardo]; +8[label=account name=Donatello]; +9[label=account name=Raphael]; +10[label=account name=Michelangelo]; +7--0 [label=post]; +7--1 [label=like]; +7--2 [label=like]; +8--0 [label=like]; +8--2 [label=post]; +8--4 [label=post]; +8--5 [label=like]; +8--6 [label=post]; +9--1 [label=post]; +9--3 [label=like]; +10--3 [label=post]; +10--4 [label=like]; +10--5 [label=post]; +10--6 [label=like]; +} diff --git a/examples/datasets/mining_gfd/study_graph.dot b/examples/datasets/mining_gfd/study_graph.dot new file mode 100644 index 0000000000..1112601e4b --- /dev/null +++ b/examples/datasets/mining_gfd/study_graph.dot @@ -0,0 +1,34 @@ +graph G { +0[label=task difficulty=easy]; +1[label=task difficulty=normal]; +2[label=task difficulty=normal]; +3[label=task difficulty=hard]; +4[label=task difficulty=hard]; +5[label=task difficulty=hard]; +6[label=student name=James degree=bachelor year=2]; +7[label=student name=Michael degree=master year=1]; +8[label=student name=Robert degree=bachelor year=3]; +9[label=student name=John degree=master year=2]; +10[label=student name=David degree=bachelor year=4]; +11[label=student name=William degree=master year=2]; +12[label=student name=Richard degree=master year=2]; +13[label=student name=Joseph degree=master year=2]; +14[label=student name=Thomas degree=master year=2]; +15[label=student name=Christopher degree=master year=2]; +0--6 [label=performs]; +1--6 [label=performs]; +1--7 [label=performs]; +1--10 [label=performs]; +2--7 [label=performs]; +2--8 [label=performs]; +2--9 [label=performs]; +3--9 [label=performs]; +3--11 [label=performs]; +3--12 [label=performs]; +4--12 [label=performs]; +4--13 [label=performs]; +4--14 [label=performs]; +5--11 [label=performs]; +5--14 [label=performs]; +5--15 [label=performs]; +}