Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GFD mining #465

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
69 changes: 69 additions & 0 deletions examples/basic/mining_gfd/mining_gfd1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from pathlib import Path

import desbordante
import matplotlib.pyplot as plt
import matplotlib.image as mpimg


class bcolors:
HEADER = '\033[95m'
WARNING = '\033[93m'
ENDC = '\033[0m'


GRAPH_NAME = 'blogs_graph'
GFD_NAME = 'blogs_gfd'

GRAPHS_DATASETS_FOLDER_PATH = 'examples/datasets/mining_gfd'

GRAPH = Path(f'{GRAPHS_DATASETS_FOLDER_PATH}/{GRAPH_NAME}.dot')

GRAPH_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{GRAPH_NAME}.png')
GFD_IMAGE = Path(f'examples/basic/mining_gfd/figures/gfds/{GFD_NAME}.png')

GRAPH_INFO = ('The graph is depicted in the figure. The following abbreviations '
'were used: A - account, B - blog. Vertices labeled A have a '
'"name" attribute showing the nickname; vertices labeled B - '
'"author", indicating who wrote the blog. The values of these '
'attributes are labeled next to the vertices. The edges are also '
'labeled as: "post", which indicates who wrote the blog, and '
'"like", which indicates approval by another person. In the '
'drawing, the edges are marked "post" in bold.\n')

INFO = ("Let's run the algorithm and look at the result. We will consider "
"all dependencies with a pattern of no more than 3 vertices, as well as "
"with a frequency of occurrence of at least 3 times.\n")

RESULTS = ("The found dependency indicates that if the author has posted a "
"blog, then the authorship of this blog always includes the "
"name of the person who posted it.\n")

EXIT = f'{bcolors.WARNING}Close the image window to finish.{bcolors.ENDC}'


def execute_algo(algo):
algo.load_data(graph=GRAPH, gfd_k=3, gfd_sigma=3)
algo.execute()
print(f'{bcolors.HEADER}Desbordante > {bcolors.ENDC}', end='')
print('Mined GFDs:', len(algo.get_gfds()))
print()


def show_example():
_, axarr = plt.subplots(1, 2, figsize=(12, 5), gridspec_kw={'width_ratios': [7, 3], 'wspace': 0.5})
axarr[0].set_axis_off()
axarr[0].set_title('$Graph$')
axarr[0].imshow(mpimg.imread(GRAPH_IMAGE))
axarr[1].set_axis_off()
axarr[1].set_title('$Mined$ $GFD$')
axarr[1].imshow(mpimg.imread(GFD_IMAGE))
plt.show()


print(GRAPH_INFO)
print(INFO)
execute_algo(desbordante.gfd_mining.algorithms.GfdMiner())
print(RESULTS)
print(EXIT)

show_example()
67 changes: 67 additions & 0 deletions examples/basic/mining_gfd/mining_gfd2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from pathlib import Path

import desbordante
import matplotlib.pyplot as plt
import matplotlib.image as mpimg


class bcolors:
HEADER = '\033[95m'
WARNING = '\033[93m'
ENDC = '\033[0m'


GRAPH_NAME = 'study_graph'
GFD_NAME = 'study_gfd'

GRAPHS_DATASETS_FOLDER_PATH = 'examples/datasets/mining_gfd'

GRAPH = Path(f'{GRAPHS_DATASETS_FOLDER_PATH}/{GRAPH_NAME}.dot')

GRAPH_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{GRAPH_NAME}.png')
GFD_IMAGE = Path(f'examples/basic/mining_gfd/figures/gfds/{GFD_NAME}.png')

GRAPH_INFO = ('The figure provides an example of a graph. '
'The following abbreviations were used here: T - task, S - student. '
'The vertices with the T-label have the attributes "name" and "difficulty"'
', the vertices with the S-label have the "name", "degree" and "year" '
'attributes, which indicate the student\'s name, level of education and year. '
'The values of these attributes are signed next to the vertices, except for '
'the name, since it is not informative.\n')

INFO = ("Let's run the algorithm. We'll specify 2 as the k parameter to look for patterns "
"with no more than two vertices, and we'll specify 3 as the sigma to exclude "
"rare dependencies.\n")

RESULTS = ("The dependency found indicates that only second-year master's students are "
"working on the hard task.\n")

EXIT = f'{bcolors.WARNING}Close the image window to finish.{bcolors.ENDC}'


def execute_algo(algo):
algo.load_data(graph=GRAPH, gfd_k=2, gfd_sigma=3)
algo.execute()
print(f'{bcolors.HEADER}Desbordante > {bcolors.ENDC}', end='')
print('Mined GFDs:', len(algo.get_gfds()))
print()


def show_example():
_, axarr = plt.subplots(1, 2, figsize=(12, 5), gridspec_kw={'width_ratios': [7, 3], 'wspace': 0.5})
axarr[0].set_axis_off()
axarr[0].set_title('$Graph$')
axarr[0].imshow(mpimg.imread(GRAPH_IMAGE))
axarr[1].set_axis_off()
axarr[1].set_title('$Mined$ $GFD$')
axarr[1].imshow(mpimg.imread(GFD_IMAGE))
plt.show()


print(GRAPH_INFO)
print(INFO)
execute_algo(desbordante.gfd_mining.algorithms.GfdMiner())
print(RESULTS)
print(EXIT)

show_example()
27 changes: 27 additions & 0 deletions examples/datasets/mining_gfd/blogs_graph.dot
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
graph G {
0[label=blog author=Leonardo];
1[label=blog author=Raphael];
2[label=blog author=Donatello];
3[label=blog author=Michelangelo];
4[label=blog author=Donatello];
5[label=blog author=Michelangelo];
6[label=blog author=Donatello];
7[label=account name=Leonardo];
8[label=account name=Donatello];
9[label=account name=Raphael];
10[label=account name=Michelangelo];
7--0 [label=post];
7--1 [label=like];
7--2 [label=like];
8--0 [label=like];
8--2 [label=post];
8--4 [label=post];
8--5 [label=like];
8--6 [label=post];
9--1 [label=post];
9--3 [label=like];
10--3 [label=post];
10--4 [label=like];
10--5 [label=post];
10--6 [label=like];
}
34 changes: 34 additions & 0 deletions examples/datasets/mining_gfd/study_graph.dot
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
graph G {
0[label=task difficulty=easy];
1[label=task difficulty=normal];
2[label=task difficulty=normal];
3[label=task difficulty=hard];
4[label=task difficulty=hard];
5[label=task difficulty=hard];
6[label=student name=James degree=bachelor year=2];
7[label=student name=Michael degree=master year=1];
8[label=student name=Robert degree=bachelor year=3];
9[label=student name=John degree=master year=2];
10[label=student name=David degree=bachelor year=4];
11[label=student name=William degree=master year=2];
12[label=student name=Richard degree=master year=2];
13[label=student name=Joseph degree=master year=2];
14[label=student name=Thomas degree=master year=2];
15[label=student name=Christopher degree=master year=2];
0--6 [label=performs];
1--6 [label=performs];
1--7 [label=performs];
1--10 [label=performs];
2--7 [label=performs];
2--8 [label=performs];
2--9 [label=performs];
3--9 [label=performs];
3--11 [label=performs];
3--12 [label=performs];
4--12 [label=performs];
4--13 [label=performs];
4--14 [label=performs];
5--11 [label=performs];
5--14 [label=performs];
5--15 [label=performs];
}
2 changes: 1 addition & 1 deletion src/core/algorithms/gfd/balancer.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "balancer.h"
#include "algorithms/gfd/balancer.h"

#include <algorithm>
#include <map>
Expand Down
33 changes: 33 additions & 0 deletions src/core/algorithms/gfd/comparator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "algorithms/gfd/comparator.h"

#include <algorithm>
#include <cstdlib>
#include <map>
#include <sstream>
#include <string>
#include <vector>

#include <boost/graph/vf2_sub_graph_iso.hpp>

#include "algorithms/gfd/gfd.h"

namespace comparator {
AntonChern marked this conversation as resolved.
Show resolved Hide resolved

bool CompareLiterals(Literal const& lhs, Literal const& rhs) {
return (lhs == rhs) || ((lhs.first == rhs.second) && (lhs.second == rhs.first));
}

bool ContainsLiteral(std::vector<Literal> const& literals, Literal const& l) {
auto check = [&l](auto const& cur_lit) { return CompareLiterals(cur_lit, l); };
return std::any_of(literals.begin(), literals.end(), check);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return std::any_of(literals.begin(), literals.end(), check);
return std::ranges::any_of(literals, check);

More cleaner this way, gets rid of .begin()/.end() boilerplate

}

bool CompareLiteralSets(std::vector<Literal> const& lhs, std::vector<Literal> const& rhs) {
if (lhs.size() != rhs.size()) {
return false;
}
auto check = [&rhs](auto const& cur_lit) { return ContainsLiteral(rhs, cur_lit); };
return std::all_of(lhs.begin(), lhs.end(), check);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return std::all_of(lhs.begin(), lhs.end(), check);
return std::ranges::all_of(lhs, check);

}

} // namespace comparator
17 changes: 17 additions & 0 deletions src/core/algorithms/gfd/comparator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include <vector>

#include <boost/graph/vf2_sub_graph_iso.hpp>

#include "algorithms/gfd/gfd.h"

namespace comparator {
AntonChern marked this conversation as resolved.
Show resolved Hide resolved

using namespace details;

bool CompareLiterals(Literal const& lhs, Literal const& rhs);
bool ContainsLiteral(std::vector<Literal> const& literals, Literal const& l);
bool CompareLiteralSets(std::vector<Literal> const& lhs, std::vector<Literal> const& rhs);

} // namespace comparator
5 changes: 4 additions & 1 deletion src/core/algorithms/gfd/egfd_validation.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "egfd_validation.h"
#include "algorithms/gfd/egfd_validation.h"

#include <iostream>

Expand All @@ -13,6 +13,7 @@
namespace {

using namespace algos;
using namespace details;
using Match = std::vector<std::pair<std::set<vertex_t>::iterator, std::set<vertex_t>::iterator>>;

void FstStepForest(graph_t const& graph, std::map<vertex_t, std::set<vertex_t>>& rooted_subtree,
Expand Down Expand Up @@ -1035,6 +1036,8 @@ bool Validate(graph_t const& graph, Gfd const& gfd) {

namespace algos {

using namespace details;

std::vector<Gfd> EGfdValidation::GenerateSatisfiedGfds(graph_t const& graph,
std::vector<Gfd> const& gfds) {
for (auto& gfd : gfds) {
Expand Down
3 changes: 2 additions & 1 deletion src/core/algorithms/gfd/egfd_validation.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#pragma once
#include "algorithms/gfd/gfd.h"
#include "algorithms/gfd/gfd_handler.h"
#include "config/names_and_descriptions.h"
#include "gfd.h"

namespace algos {

using namespace details;
using CPI = std::map<std::pair<vertex_t, vertex_t>, std::map<vertex_t, std::set<vertex_t>>>;

class EGfdValidation : public GfdHandler {
Expand Down
78 changes: 76 additions & 2 deletions src/core/algorithms/gfd/gfd.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,85 @@
#include "gfd.h"
#include "algorithms/gfd/gfd.h"

#include <algorithm>
#include <cstdlib>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#include <algorithm> is not used in this file

#include <sstream>
#include <string>

#include <boost/graph/vf2_sub_graph_iso.hpp>
AntonChern marked this conversation as resolved.
Show resolved Hide resolved
AntonChern marked this conversation as resolved.
Show resolved Hide resolved

#include "algorithms/gfd/comparator.h"
#include "parser/graph_parser/graph_parser.h"

std::string Gfd::ToString() {
namespace details {

std::string Gfd::ToString() const {
std::stringstream gfd_stream;
parser::graph_parser::WriteGfd(gfd_stream, *this);
return gfd_stream.str();
}

AntonChern marked this conversation as resolved.
Show resolved Hide resolved
namespace {

using namespace comparator;

class CmpCallback {
private:
bool& res_;

AntonChern marked this conversation as resolved.
Show resolved Hide resolved
public:
CmpCallback(bool& res) : res_(res) {}

template <typename CorrespondenceMap1To2, typename CorrespondenceMap2To1>
bool operator()(CorrespondenceMap1To2, CorrespondenceMap2To1) const {
res_ = true;
return false;
}
};

Comment on lines +25 to +37
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using references as fields is not a good sign in any code.
Here we pass this FunctionObject to boost::vf2_subgraph_iso as a callback that will be called with operator() and set local variable bool result = false; to true.

Can't we just pass simple lambda to it then?
Like

bool result = false;
auto callback = [&result](auto, auto) { result = true; return false; }
//...
boost::vf2_subgraph_iso(query, graph, callback, //...
//...
return result;

struct VCmp {
graph_t const& lhs;
graph_t const& rhs;

bool operator()(vertex_t const& fr, vertex_t const& to) const {
return lhs[fr].attributes.at("label") == rhs[to].attributes.at("label");
}
};

struct ECmp {
graph_t const& lhs;
graph_t const& rhs;

bool operator()(edge_t const& fr, edge_t const& to) const {
return lhs[fr].label == rhs[to].label;
}
};

Comment on lines +39 to +55
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is slighty better because we use const references as fields, but still, why don't just use lambdas?

auto vertex_cmp =  [&query, &graph](vertex_t const& fr, vertex_t const& to) {
        return query[fr].attributes.at("label") == graph[to].attributes.at("label");
    };;
auto edge_cmp = [&query, &graph](edge_t const& fr, edge_t const& to) {
        return query[fr].label == graph[to].label;
    };

//pass vertex_cmp and edge_cmp to boost::vf2_subgraph_iso

bool IsSub(graph_t const& query, graph_t const& graph) {
bool result = false;
VCmp vcmp = {query, graph};
ECmp ecmp = {query, graph};
CmpCallback callback(result);
boost::property_map<graph_t, boost::vertex_index_t>::type query_index_map =
get(boost::vertex_index, query);
boost::property_map<graph_t, boost::vertex_index_t>::type graph_index_map =
get(boost::vertex_index, graph);
AntonChern marked this conversation as resolved.
Show resolved Hide resolved
std::vector<vertex_t> query_vertex_order = vertex_order_by_mult(query);
boost::vf2_subgraph_iso(query, graph, callback, query_index_map, graph_index_map,
query_vertex_order, ecmp, vcmp);
return result;
}

} // namespace

Comment on lines +25 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have not done

bool Gfd::operator==(Gfd const& gfd) const {
graph_t pat = gfd.GetPattern();
return IsSub(pattern_, pat) && IsSub(pat, pattern_) &&
CompareLiteralSets(premises_, gfd.GetPremises()) &&
CompareLiteralSets(conclusion_, gfd.GetConclusion());
}

bool Gfd::operator!=(Gfd const& gfd) const {
return !(*this == gfd);
}

Comment on lines +81 to +83
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you've defined operator==, this code can be easily generated by compiler. Put that in .h file:

    bool operator==(const Gfd& gfd) const;
    bool operator!=(const Gfd& gfd) const = default;

and remove manual implementation

} // namespace details
Loading
Loading