Skip to content

Commit

Permalink
Support legacy_matrix in cfpq_eval
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyaMuravjov committed May 20, 2024
1 parent 0fb8a32 commit 061f3c6
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 34 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ For more details, refer to [docs/cli.md](docs/cli.md).

## Evaluation

The CFPQ_PyAlgo project includes a [CFPQ evaluator](cfpq_eval) tool for evaluating the performance
The CFPQ_PyAlgo project includes a [CFPQ evaluator](cfpq_eval/README.md) tool for evaluating the performance
of various CFPQ solvers.

For more details on [CFPQ evaluator](cfpq_eval) usage, refer to [docs/eval.md](docs/eval.md).
For more details on [CFPQ evaluator](cfpq_eval/README.md) usage, refer to [docs/eval.md](docs/eval.md).

We used the [CFPQ evaluator](cfpq_eval) to compare our solver, FastMatrixCFPQ, with five
We used the [CFPQ evaluator](cfpq_eval) to compare our solver, [FastMatrixCFPQ](cfpq_cli/README.md), with five
state-of-the-art competitors:
[PEARL](https://figshare.com/articles/dataset/ASE_2023_artifact/23702271),
[POCR](https://github.com/kisslune/POCR),
[KotGLL](https://github.com/vadyushkins/kotgll),
[Graspan](https://github.com/Graspan/Graspan-C), and
[Gigascale](https://bitbucket.org/jensdietrich/gigascale-pointsto-oopsla2015/src),
as well as with the previous version of our solver, MatrixCFPQ.
as well as with the previous version of our solver, [MatrixCFPQ](src/README.md).
The input data was provided by the
[CFPQ_Data](https://github.com/FormalLanguageConstrainedPathQuerying/CFPQ_Data),
[CFPQ_JavaGraphMiner](https://github.com/FormalLanguageConstrainedPathQuerying/CFPQ_JavaGraphMiner), and
Expand Down
3 changes: 3 additions & 0 deletions cfpq_eval/runners/all_pairs_cflr_tool_runner_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
GraspanAllPairsCflrToolRunner)
from cfpq_eval.runners.kotgll_all_pairs_cflr_tool_runner import (
KotgllAllPairsCflrToolRunner)
from cfpq_eval.runners.legacy_matrix_all_pairs_cflr_tool_runner import (
LegacyMatrixAllPairsCflrToolRunner)
from cfpq_eval.runners.pearl_algo_all_pairs_cflr_tool_runner import (
PearlAllPairsCflrToolRunner)
from cfpq_eval.runners.pocr_algo_all_pairs_cflr_tool_runner import (
Expand All @@ -28,6 +30,7 @@ def run_appropriate_all_pairs_cflr_tool(
"gigascale": GigascaleAllPairsCflrToolRunner,
"graspan": GraspanAllPairsCflrToolRunner,
"kotgll": KotgllAllPairsCflrToolRunner,
"legacy_matrix": LegacyMatrixAllPairsCflrToolRunner,
}.get(algo_settings, PyAlgoAllPairsCflrToolRunner)(
algo_settings, graph_path, grammar_path, timeout_sec
).run()
93 changes: 93 additions & 0 deletions cfpq_eval/runners/legacy_matrix_all_pairs_cflr_tool_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import os
import re
import subprocess
from pathlib import Path
from typing import Optional

import pandas as pd

from cfpq_eval.runners.all_pairs_cflr_tool_runner import (
AbstractAllPairsCflrToolRunner, CflrToolRunResult
)
from cfpq_model.cnf_grammar_template import CnfGrammarTemplate, Symbol
from cfpq_model.label_decomposed_graph import LabelDecomposedGraph
from cfpq_model.model_utils import explode_indices


class LegacyMatrixAllPairsCflrToolRunner(AbstractAllPairsCflrToolRunner):
@property
def base_command(self) -> Optional[str]:
grammar = CnfGrammarTemplate.read_from_pocr_cnf_file(self.grammar_path)
graph = LabelDecomposedGraph.read_from_pocr_graph_file(self.graph_path)

# Legacy Matrix doesn't support indexed symbols, we need to concat labels and indices
graph, grammar = explode_indices(graph, grammar)
graph_path = self.graph_path.parent / "legacy_matrix" / self.graph_path.name
os.makedirs(graph_path.parent, exist_ok=True)
self._write_legacy_graph(graph, graph_path)
grammar_path = self.grammar_path.parent / "legacy_matrix" / self.grammar_path.name
os.makedirs(grammar_path.parent, exist_ok=True)
self._write_legacy_grammar(grammar, grammar_path)
return f"python3 -m src.legacy_cflr {graph_path} {grammar_path}"

def parse_results(self, process: subprocess.CompletedProcess[str]) -> CflrToolRunResult:
return CflrToolRunResult(
s_edges=int(re.search(r"#(SEdges|CountEdges)\s+(\d+)", process.stdout).group(2)),
time_sec=float(re.search(r"AnalysisTime\s+([\d.]+)", process.stdout).group(1)),
ram_kb=self.parse_ram_usage_kb(process)
)

@staticmethod
def _write_legacy_graph(graph: LabelDecomposedGraph, graph_path: Path) -> None:
with open(graph_path, 'w', encoding="utf-8") as output_file:
for symbol, matrix in graph.matrices.items():
edge_label = symbol.label
(rows, columns, _) = matrix.to_coo()
edges_df = pd.DataFrame({
'source': rows,
'label': edge_label,
'destination': columns,
})
csv_string = edges_df.to_csv(sep=' ', index=False, header=False)
output_file.write(csv_string)

@staticmethod
def _write_legacy_grammar(grammar: CnfGrammarTemplate, grammar_path: Path) -> None:
with (open(grammar_path, 'w', encoding="utf-8") as output_file):
output_file.write(f"{grammar.start_nonterm.label}\n\n")

non_terms = grammar.non_terminals
non_term_prefix = "NON_TERMINAL#"
eps = f"{non_term_prefix}EPS"

terms_needing_non_term = set()
eps_needed = False

def format(symbol: Symbol) -> str:
if symbol in non_terms:
return symbol.label
else:
terms_needing_non_term.add(symbol)
return f"{non_term_prefix}{symbol.label}"

for lhs in grammar.epsilon_rules:
output_file.write(f"{lhs.label} ->\n")
for lhs, rhs in grammar.simple_rules:
# Legacy Matrix doesn't support rules with
# single non-terminal right-hand side (see CnfGrammar).
# Hence, we need to add auxiliary EPS non-terminal.
if rhs in non_terms:
output_file.write(f"{lhs.label} -> {rhs.label} {eps}\n")
eps_needed = True
else:
output_file.write(f"{lhs} -> {rhs.label}\n")
for lhs, rhs1, rhs2 in grammar.complex_rules:
# Legacy Matrix doesn't support terminals in complex rules (see CnfGrammar).
# Hence, we need to add auxiliary non-terminals (see the next `for` loop).
output_file.write(f"{lhs} -> {format(rhs1)} {format(rhs2)}\n")

for term in terms_needing_non_term:
output_file.write(f"{non_term_prefix}{term.label} -> {term.label}\n")

if eps_needed:
output_file.write(f"{eps} ->\n")
55 changes: 28 additions & 27 deletions docs/eval.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,25 @@ python3 -m cfpq_eval.eval_all_pairs_cflr algo_config.csv data_config.csv results

### Premade Configurations

The `CFPQ_eval` Docker image includes premade configurations located in the `/py_algo/configs` folder.
The `CFPQ_eval` [Docker image](https://hub.docker.com/r/cfpq/py_algo_eval) includes premade configurations located in the `/py_algo/configs` folder.

### Algorithm Configuration

The `algo_config.csv` configuration should list algorithms and their settings.

Supported algorithms:

- `IncrementalAllPairsCFLReachabilityMatrix` (this tool)
- `NonIncrementalAllPairsCFLReachabilityMatrix` (this tool)
- [`IncrementalAllPairsCFLReachabilityMatrix`](cli.md)
- [`NonIncrementalAllPairsCFLReachabilityMatrix`](cli.md)
- [`pocr`](https://github.com/kisslune/POCR)
- [`pearl`](https://figshare.com/articles/dataset/ASE_2023_artifact/23702271)
- [`graspan`](https://github.com/Graspan/Graspan-C)
- [`gigascale`](https://bitbucket.org/jensdietrich/gigascale-pointsto-oopsla2015/src)
- [`kotgll`](https://github.com/vadyushkins/kotgll)
- [`legacy_matrix`](../src/README.md)

For Matrix-based algorithms, options described in [cli.md](cli.md)
can be used to change the performance.
For first two algorithms, options described in [cli.md](cli.md)
can be used to configure optimizations.

Here's an algorithm configuration example:
```
Expand Down Expand Up @@ -82,28 +83,28 @@ memory usage, and output size, will be printed to `stdout`.

Here's an example of a mean execution time summary table:
```
============================================ TIME, SEC (grammar 'c_alias') ============================================
| graph | fast matrix | fast matrix | matrix cfpq | pearl | pocr | kotgll | gigascale | graspan |
| | cfpq | cfpq (no | | | | | | |
| | | grammar | | | | | | |
| | | rewrite) | | | | | | |
|:---------|:--------------|:--------------|:--------------|:--------|:----------|:---------|:------------|:----------|
| init | 1.2 ± 3% | 2.9 | 7.0 ± 1% | - | 85 | 23 ± 6% | - | 16 ± 14% |
| mm | 1.3 ± 2% | 3.1 | 7.5 | - | 89 ± 1% | 25 ± 3% | - | 16 ± 5% |
| block | 1.7 ± 2% | 4.1 | 11 ± 1% | - | 123 | 34 ± 3% | - | 21 ± 2% |
| ipc | 1.7 ± 4% | 4.0 | 10 ± 1% | - | 121 ± 1% | 34 ± 1% | - | 21 ± 3% |
| lib | 1.7 ± 2% | 4.0 | 11 ± 1% | - | 123 ± 1% | 34 ± 1% | - | 21 ± 3% |
| arch | 1.7 ± 3% | 4.1 | 11 ± 1% | - | 123 ± 1% | 34 ± 5% | - | 22 ± 10% |
| crypto | 1.7 ± 3% | 4.2 | 11 ± 1% | - | 125 ± 1% | 34 ± 2% | - | 22 ± 8% |
| security | 1.8 ± 4% | 4.4 | 11 ± 1% | - | 129 ± 1% | 35 ± 5% | - | 22 ± 5% |
| sound | 2.0 ± 2% | 5.0 | 12 | - | 140 ± 1% | 38 ± 5% | - | 24 ± 11% |
| fs | 2.5 ± 2% | 6.9 | 17 | - | 230 ± 1% | 53 ± 1% | - | 34 ± 3% |
| net | 2.6 ± 3% | 7.4 | 20 | - | 221 ± 1% | 52 ± 1% | - | 35 ± 2% |
| drivers | 3.9 ± 2% | 12 ± 1% | 28 ± 1% | - | 755 ± 1% | 92 ± 3% | - | 69 ± 3% |
| kernel | 6.1 ± 2% | 13 | 43 | - | 387 ± 1% | 118 ± 2% | - | 69 ± 3% |
| apache | 6.5 ± 1% | 26 ± 1% | 84 | - | OOT | OOM | - | 601 ± 2% |
| postgre | 10 ± 1% | 36 ± 1% | 104 | - | 5398 ± 1% | OOM | - | 427 ± 4% |
=======================================================================================================================
==================================== TIME, SEC (grammar 'c_alias') ====================================
| graph | fast matrix | fast matrix | pearl | pocr | kotgll | gigascale | graspan |
| | cfpq | cfpq (no | | | | | |
| | | grammar | | | | | |
| | | rewrite) | | | | | |
|:---------|:--------------|:--------------|:--------|:----------|:---------|:------------|:----------|
| init | 1.2 ± 3% | 2.9 | - | 85 | 23 ± 6% | - | 16 ± 14% |
| mm | 1.3 ± 2% | 3.1 | - | 89 ± 1% | 25 ± 3% | - | 16 ± 5% |
| block | 1.7 ± 2% | 4.1 | - | 123 | 34 ± 3% | - | 21 ± 2% |
| ipc | 1.7 ± 4% | 4.0 | - | 121 ± 1% | 34 ± 1% | - | 21 ± 3% |
| lib | 1.7 ± 2% | 4.0 | - | 123 ± 1% | 34 ± 1% | - | 21 ± 3% |
| arch | 1.7 ± 3% | 4.1 | - | 123 ± 1% | 34 ± 5% | - | 22 ± 10% |
| crypto | 1.7 ± 3% | 4.2 | - | 125 ± 1% | 34 ± 2% | - | 22 ± 8% |
| security | 1.8 ± 4% | 4.4 | - | 129 ± 1% | 35 ± 5% | - | 22 ± 5% |
| sound | 2.0 ± 2% | 5.0 | - | 140 ± 1% | 38 ± 5% | - | 24 ± 11% |
| fs | 2.5 ± 2% | 6.9 | - | 230 ± 1% | 53 ± 1% | - | 34 ± 3% |
| net | 2.6 ± 3% | 7.4 | - | 221 ± 1% | 52 ± 1% | - | 35 ± 2% |
| drivers | 3.9 ± 2% | 12 ± 1% | - | 755 ± 1% | 92 ± 3% | - | 69 ± 3% |
| kernel | 6.1 ± 2% | 13 | - | 387 ± 1% | 118 ± 2% | - | 69 ± 3% |
| apache | 6.5 ± 1% | 26 ± 1% | - | OOT | OOM | - | 601 ± 2% |
| postgre | 10 ± 1% | 36 ± 1% | - | 5398 ± 1% | OOM | - | 427 ± 4% |
=======================================================================================================
```

## Custom Tools Integration
Expand Down
Binary file modified performance.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 5 additions & 3 deletions src/grammar/cnf_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,19 @@ def __init__(self):
self.eps_rules = []

def __setitem__(self, key, value):
if (isinstance(value, tuple) or isinstance(value, list)) and 1 <= len(value) <= 2:
if (isinstance(value, tuple) or isinstance(value, list)) and len(value) <= 2:
self.nonterms.add(key)
if len(value) == 1:
if len(value) == 0:
self.eps_rules.append(key)
elif len(value) == 1:
self.simple_rules.append((key, value[0]))
self.terms.add(value[0])
else:
self.complex_rules.append((key, value[0], value[1]))
for x in value:
self.nonterms.add(x)
else:
raise Exception('value must be str, (str, str) or [str, str]')
raise Exception('value must be [], str, (str, str) or [str, str]')

@classmethod
def from_cfg(cls, cfg: CFG):
Expand Down
30 changes: 30 additions & 0 deletions src/legacy_cflr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import sys
from pathlib import Path
from time import time
from typing import List

from src.grammar.cnf_grammar import CnfGrammar
from src.graph.graph import Graph
from src.problems.Base.algo.matrix_base.matrix_base import MatrixBaseAlgo


# Minimalistic CLI needed for integration with cfpq_eval,
# not intended to be used by consumers
def main(raw_args: List[str]):
graph_path = raw_args[0]
grammar_path = raw_args[1]
algo = MatrixBaseAlgo()

algo.graph = Graph.from_txt(Path(graph_path))
algo.graph.load_bool_graph()
algo.grammar = CnfGrammar.from_cnf(grammar_path)

start = time()
res = algo.solve()
finish = time()
print(f"AnalysisTime\t{finish - start}")
print(f"#SEdges\t{res.matrix_S.nvals}")


if __name__ == '__main__':
main(raw_args=sys.argv[1:])

0 comments on commit 061f3c6

Please sign in to comment.