Skip to content

Commit

Permalink
Merge pull request #83 from dice-group/sparql-confusion-matrix
Browse files Browse the repository at this point in the history
SPARQL query returning a confusion matrix
  • Loading branch information
Demirrr authored Oct 16, 2024
2 parents dc32de2 + 1edfcd4 commit 49f41b9
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 0 deletions.
82 changes: 82 additions & 0 deletions owlapy/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,61 @@ def as_query(self,
parseQuery(query)
return query

def as_confusion_matrix_query(self,
root_variable: str,
ce: OWLClassExpression,
positive_examples: Iterable[OWLNamedIndividual],
negative_examples: Iterable[OWLNamedIndividual],
for_all_de_morgan: bool = True,
named_individuals: bool = False) -> str:
# get the graph pattern corresponding to the provided class expression (ce)
graph_pattern_str = "".join(self.convert(root_variable,
ce,
for_all_de_morgan=for_all_de_morgan,
named_individuals=named_individuals))
# preparation for the final query

# required to compute false negatives
number_of_positive_examples = 0
# required to compute true negatives
number_of_negative_examples = 0
# string representation of the positive examples (to be passed to the first VALUES clause)
positive_examples_as_str = ""
# iterate over the positive examples
for positive_example in positive_examples:
number_of_positive_examples += 1
positive_examples_as_str += f"<{positive_example.to_string_id()}> "
assert (len(positive_examples_as_str) > 0)

# string representation of the positive examples (to be passed to the first VALUES clause)
negative_examples_as_str = ""
# iterate over the negative examples
for negative_example in negative_examples:
number_of_negative_examples += 1
negative_examples_as_str += f"<{negative_example.to_string_id()}> "
assert(len(negative_examples_as_str) > 0)

# create the sparql query
sparql_str= f"""
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT * WHERE {{
{{
SELECT (xsd:double(COUNT(DISTINCT {root_variable})) as ?tp) (xsd:double(({number_of_positive_examples} - COUNT(DISTINCT {root_variable}))) as ?fn) WHERE {{
VALUES {root_variable} {{ {positive_examples_as_str} }}
{graph_pattern_str}
}}
}}
{{
SELECT DISTINCT (xsd:double(COUNT(DISTINCT {root_variable})) as ?fp) (xsd:double(({number_of_negative_examples} - COUNT(DISTINCT {root_variable}))) as ?tn) WHERE {{
VALUES {root_variable} {{ {negative_examples_as_str} }}
{graph_pattern_str}
}}
}}
}}
"""
parseQuery(sparql_str)
return sparql_str


converter = Owl2SparqlConverter()

Expand All @@ -656,3 +711,30 @@ def owl_expression_to_sparql(expression: OWLClassExpression = None,
assert expression is not None, "expression cannot be None"
return converter.as_query(root_variable, expression, count=False, values=values,
named_individuals=named_individuals, for_all_de_morgan=for_all_de_morgan)


def owl_expression_to_sparql_with_confusion_matrix(expression: OWLClassExpression,
positive_examples: Optional[Iterable[OWLNamedIndividual]],
negative_examples: Optional[Iterable[OWLNamedIndividual]],
root_variable: str = "?x",
for_all_de_morgan: bool = True,
named_individuals: bool = False) -> str:
"""Convert an OWL Class Expression (https://www.w3.org/TR/owl2-syntax/#Class_Expressions) into a SPARQL query
root variable: the variable that will be projected
expression: the class expression to be transformed to a SPARQL query
positive_examples: positive examples from a class expression problem
negative_examples: positive examples from a class expression problem
for_all_de_morgan: if set to True, the SPARQL mapping will use the mapping containing the nested FILTER NOT EXISTS
patterns for the universal quantifier (¬(∃r.¬C)), instead of the counting query
named_individuals: if set to True, the generated SPARQL query will return only entities
that are instances of owl:NamedIndividual
"""
assert expression is not None, "expression cannot be None"
assert positive_examples is not None, "positive examples cannot be None"
assert negative_examples is not None, "negative examples cannot be None"
return converter.as_confusion_matrix_query(root_variable,
expression,
positive_examples=positive_examples,
negative_examples=negative_examples,
named_individuals=named_individuals,
for_all_de_morgan=for_all_de_morgan)
30 changes: 30 additions & 0 deletions tests/test_owlapy_owl2sparql_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,36 @@ def test_Exists(self):
# # self.assertEqual(len(sparql_results_actual), len(reasoner_results))
# # self.assertTrue(check_reasoner_instances_in_sparql_results(sparql_results_actual, reasoner_results))

def test_ConfusionMatrixQuery(self):
# rdf graph - using rdflib
family_rdf_graph = Graph()
family_rdf_graph.parse(location=PATH_FAMILY)

ce_str = "Brother"
ce_parsed = DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str=ce_str)

positive_examples = [
DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str="F10M173"),
DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str="F10M183"),
]

negative_examples = [
DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str="F10M184"),
DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str="F10F179"),
]

query_with_confusion_matrix = Owl2SparqlConverter().as_confusion_matrix_query(root_variable=self._root_var_,
ce=ce_parsed,
positive_examples=positive_examples,
negative_examples=negative_examples,
for_all_de_morgan=True,
named_individuals=True)

sparql_results = family_rdf_graph.query(query_with_confusion_matrix)
self.assertEqual(float(sparql_results.bindings[0]["tp"]), 2.0)
self.assertEqual(float(sparql_results.bindings[0]["fn"]), 0.0)
self.assertEqual(float(sparql_results.bindings[0]["fp"]), 1.0)
self.assertEqual(float(sparql_results.bindings[0]["tn"]), 1.0)

if __name__ == '__main__':
unittest.main()

0 comments on commit 49f41b9

Please sign in to comment.