Skip to content

Commit

Permalink
Merge pull request #136 from FormalLanguageConstrainedPathQuerying/ta…
Browse files Browse the repository at this point in the history
…sk8-autotest

Автотесты для 8 и 9 задания.
  • Loading branch information
gsvgit authored Apr 20, 2024
2 parents ebd48cb + 25e7b95 commit f7b909a
Show file tree
Hide file tree
Showing 4 changed files with 473 additions and 1 deletion.
20 changes: 19 additions & 1 deletion tasks/task8.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@
## Задача

- [ ] Реализовать **функцию**, основанную на тензорном алгоритме, решающую задачу достижимости между всеми парами вершин для заданного графа и заданной КС грамматики.
- Для преобразования грамматики в RSM использовать результаты предыдущих работ.
- Для преобразования грамматики в RSM использовать результаты предыдущих работ. Явно опишите **функции** преобразования CFG -> RSM и EBNF -> RSM
- Для реализации матричных операций использовать [sciPy](https://docs.scipy.org/doc/scipy/reference/sparse.html).
- Необходимые функции:
```python
def cfpq_with_tensor(
rsm: pyformlang.rsa.RecursiveAutomaton,
graph: nx.DiGraph,
final_nodes: set[int] = None,
start_nodes: set[int] = None,
) -> set[tuple[int, int]]:
pass


def cfg_to_rsm(cfg: pyformlang.cfg.CFG) -> pyformlang.rsa.RecursiveAutomaton:
pass


def ebnf_to_rsm(ebnf: str) -> pyformlang.rsa.RecursiveAutomaton:
pass
```
- [ ] Добавить необходимые тесты.
10 changes: 10 additions & 0 deletions tasks/task9.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,14 @@

- [ ] Реализовать **функцию**, основанную на алгоритме Generalized LL (работающего с RSM), решающую задачу достижимости между всеми парами вершин для заданного графа и заданной КС грамматики.
- Для работы с графами и RSM использовать функции из предыдущих задач.
- Требуемая функция:
```python
def cfpq_with_gll(
rsm: pyformlang.rsa.RecursiveAutomaton,
graph: nx.DiGraph,
start_nodes: set[int] = None,
final_nodes: set[int] = None,
) -> set[tuple[int, int]]:
pass
```
- [ ] Добавить необходимые тесты.
220 changes: 220 additions & 0 deletions tests/autotests/test_task8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
# This file contains test cases that you need to pass to get a grade
# You MUST NOT touch anything here except ONE block below
# You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it
# Otherwise, please report it
import itertools
import random
from copy import deepcopy
import cfpq_data as cd
import networkx as nx
import pytest
from networkx import MultiDiGraph
from pyformlang import cfg, rsa

# Fix import statements in try block to run tests
try:
from project.task2 import graph_to_nfa, regex_to_dfa
from project.task3 import FiniteAutomaton
from project.task4 import reachability_with_constraints
from project.task7 import cfpq_with_matrix
from project.task6 import cfpq_with_hellings
from project.task8 import cfpq_with_tensor, cfg_to_rsm, ebnf_to_rsm
except ImportError:
pytestmark = pytest.mark.skip("Task 8 is not ready to test!")

REGEXP_CFG: dict[str, list[cfg.CFG]] = {
"a": [cfg.CFG.from_text("S -> a"), cfg.CFG.from_text("S -> N B\nB -> $\nN -> a")],
"a*": [
cfg.CFG.from_text("S -> $ | a S"),
cfg.CFG.from_text("S -> $ | S S | a"),
cfg.CFG.from_text("S -> S a S | $"),
],
"a b c": [cfg.CFG.from_text("S -> a b c"), cfg.CFG.from_text("S -> a B\nB -> b c")],
"a*b*": [
cfg.CFG.from_text("S -> S1 S2\nS2 -> $ | b S2\nS1 -> $ | a S1"),
cfg.CFG.from_text("S -> $ | S1 | a S\nS1 -> $ | b S1"),
],
"(a b)*": [
cfg.CFG.from_text("S -> $ | a b S"),
cfg.CFG.from_text("S -> $ | S S1\nS1 -> a b"),
],
"a b*c*": [
cfg.CFG.from_text("S -> S1 S2 S3\nS1 -> a\nS2 -> $ | S2 b\nS3 -> $ | c S3"),
cfg.CFG.from_text("S -> a S2 S3\nS2 -> S2 b | $\nS3 -> c | $ | S3 S3"),
],
"(a|b|c|d|e)*": [
cfg.CFG.from_text("S -> $ | S1 S\nS1 -> a | b | c | d | e"),
cfg.CFG.from_text("S -> $ | a | b | c | d | e | S S"),
cfg.CFG.from_text("S -> $ | a S | b S | c S | e S | d S"),
],
"((a | b) * c)*(d | e)": [
cfg.CFG.from_text(
"S -> S1 S2\nS1 -> S1 S1 | $ | S3 c\n S2 -> d | e\n S3 -> b S3 | $ | a S3"
),
cfg.CFG.from_text("S -> S1 d | S1 e\nS1 -> S1 S3 c | $\nS3 -> b S3 | $ | a S3"),
],
}

GRAMMARS = [
[
cfg.CFG.from_text("S -> $ | a S b | S S"),
cfg.CFG.from_text("S -> $ | a S b S"),
cfg.CFG.from_text("S -> $ | S a S b"),
cfg.CFG.from_text("S -> $ | a S b | S S S"),
],
[
cfg.CFG.from_text("S -> $ | a S b | c S d | S S"),
cfg.CFG.from_text("S -> $ | a S b S | c S d S"),
cfg.CFG.from_text("S -> $ | S a S b | S c S d"),
cfg.CFG.from_text("S -> $ | a S b | c S d S | S S S"),
],
[
cfg.CFG.from_text("S -> $ | S1 S S2\nS1 -> a | c\n S2 -> b | d\n S -> S S"),
cfg.CFG.from_text("S -> $ | S1 S S2 S\n S1 -> a | c\nS2 -> b | d"),
cfg.CFG.from_text("S -> $ | S a S b | S a S d | S c S d | S c S b"),
cfg.CFG.from_text("S -> $ | S1 S S2 | S S S\nS1 -> a | c\nS2-> b | d"),
],
[
cfg.CFG.from_text("S -> S S | Se S1 Se\nSe -> $ | Se e\nS1 -> $ | a S1 b"),
cfg.CFG.from_text("S -> S1 | S S | e\nS1 -> $ | a S1 b"),
cfg.CFG.from_text("S -> S2 S | $\n S2 -> e | S1\n S1 -> $ | a S1 b"),
cfg.CFG.from_text("S -> $ | S1 S | e S\n S1 -> $ | a S1 b"),
],
[
cfg.CFG.from_text("S -> a S | $"),
cfg.CFG.from_text("S -> S1 | a\nS1 -> a S1 | $"),
],
]

GRAMMARS_DIFFERENT = [
cfg.CFG.from_text(
"S -> S1 | S2\nS1 -> Sab | S1 c\nSab -> $ | a Sab b\nS2 -> Sbc | a S2\nSbc -> b Sbc c"
),
cfg.CFG.from_text("S -> a | b | S c S | S d S | e S f | g S"),
cfg.CFG.from_text("S -> $ | a S b | b S a | e S f | S S | c S d | f S c | f S e"),
]

EBNF_GRAMMARS = [
"""S -> ( Sab c* ) | ( a* Sbc )
Sab -> a ( Sab | $ ) b
Sbc -> b ( Sbc | $ ) c""",
"S -> a | b | (S ( c | d ) S ) | ( e S f ) | ( g S )",
"S -> ( ( a S b ) | ( b S a ) | ( c S d ) | ( d S c ) | ( e S f ) | (f S e) )*",
]

LABELS = ["a", "b", "c", "d", "e", "f", "g", "h"]

LABEL = "label"
IS_FINAL = "is_final"
IS_START = "is_start"


def generate_rnd_start_and_final(graph):
start_nodes = set(
random.choices(
list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes))
)
)
final_nodes = set(
random.choices(
list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes))
)
)

for node, data in graph.nodes(data=True):
if node in start_nodes:
data[IS_START] = True
if node in final_nodes:
data[IS_FINAL] = True
return start_nodes, final_nodes


@pytest.fixture(scope="function", params=range(5))
def graph(request) -> MultiDiGraph:
n_of_nodes = random.randint(20, 40)
return cd.graphs.labeled_scale_free_graph(n_of_nodes, labels=LABELS)


class TestReachabilityTensorAlgorithm:
@pytest.mark.parametrize(
"regex_str, cfg_list", REGEXP_CFG.items(), ids=lambda regexp_cfgs: regexp_cfgs
)
def test_rpq_cfpq_tensor(self, graph, regex_str, cfg_list) -> None:
start_nodes, final_nodes = generate_rnd_start_and_final(graph)

for cf_gram in cfg_list:
cfpq: set[tuple[int, int]] = cfpq_with_tensor(
cfg_to_rsm(cf_gram), deepcopy(graph), start_nodes, final_nodes
)
rpq: dict[int, set[int]] = reachability_with_constraints(
FiniteAutomaton(graph_to_nfa(graph, start_nodes, final_nodes)),
FiniteAutomaton(regex_to_dfa(regex_str)),
)
rpq_set = set()
for node_from, nodes_to in rpq.items():
for node_to in nodes_to:
rpq_set.add((node_from, node_to))
assert cfpq == rpq_set

@pytest.mark.parametrize("eq_grammars", GRAMMARS, ids=lambda grammars: grammars)
def test_different_grammars(self, graph, eq_grammars):
start_nodes, final_nodes = generate_rnd_start_and_final(graph)
eq_cfpqs = [
cfpq_with_tensor(
cfg_to_rsm(deepcopy(cf_gram)), deepcopy(graph), start_nodes, final_nodes
)
for cf_gram in eq_grammars
]
for a, b in itertools.combinations(eq_cfpqs, 2):
assert a == b

@pytest.mark.parametrize("grammar", GRAMMARS_DIFFERENT, ids=lambda g: g)
def test_hellings_matrix_tensor(self, graph, grammar):
start_nodes, final_nodes = generate_rnd_start_and_final(graph)
hellings = cfpq_with_hellings(
deepcopy(grammar), deepcopy(graph), start_nodes, final_nodes
)
matrix = cfpq_with_matrix(
deepcopy(grammar), deepcopy(graph), start_nodes, final_nodes
)
tensor = cfpq_with_tensor(
cfg_to_rsm(deepcopy(grammar)), deepcopy(graph), start_nodes, final_nodes
)
assert (hellings == matrix) and (matrix == tensor)

@pytest.mark.parametrize(
"cfg_grammar, ebnf_grammar",
(zip(GRAMMARS_DIFFERENT, EBNF_GRAMMARS)),
ids=lambda t: t,
)
def test_ebnf_cfg(self, graph, cfg_grammar, ebnf_grammar):
start_nodes, final_nodes = generate_rnd_start_and_final(graph)
cfg_cfpq = cfpq_with_tensor(
cfg_to_rsm(cfg_grammar), deepcopy(graph), start_nodes, final_nodes
)
ebnf_cfpq = cfpq_with_tensor(
ebnf_to_rsm(ebnf_grammar), deepcopy(graph), start_nodes, final_nodes
)
assert ebnf_cfpq == cfg_cfpq

@pytest.mark.parametrize(
"regex_str, cfg_list", REGEXP_CFG.items(), ids=lambda regexp_cfgs: regexp_cfgs
)
def test_cfpq_tensor(self, graph, regex_str, cfg_list):
start_nodes, final_nodes = generate_rnd_start_and_final(graph)
eq_cfpqs = [
cfpq_with_tensor(
cfg_to_rsm(deepcopy(cf_gram)), deepcopy(graph), start_nodes, final_nodes
)
for cf_gram in cfg_list
]
eq_cfpqs.append(
cfpq_with_tensor(
ebnf_to_rsm(f"S -> {regex_str}"),
deepcopy(graph),
start_nodes,
final_nodes,
)
)
for a, b in itertools.combinations(eq_cfpqs, 2):
assert a == b
Loading

0 comments on commit f7b909a

Please sign in to comment.