Merge pull request #83 from dice-group/sparql-confusion-matrix

SPARQL query returning a confusion matrix
dice-group · Oct 16, 2024 · 49f41b9 · 49f41b9
2 parents dc32de2 + 1edfcd4
commit 49f41b9
Show file tree

Hide file tree

Showing 2 changed files with 112 additions and 0 deletions.
diff --git a/owlapy/converter.py b/owlapy/converter.py
@@ -634,6 +634,61 @@ def as_query(self,
         parseQuery(query)
         return query
 
+    def as_confusion_matrix_query(self,
+                                  root_variable: str,
+                                  ce: OWLClassExpression,
+                                  positive_examples: Iterable[OWLNamedIndividual],
+                                  negative_examples: Iterable[OWLNamedIndividual],
+                                  for_all_de_morgan: bool = True,
+                                  named_individuals: bool = False) -> str:
+        # get the graph pattern corresponding to the provided class expression (ce)
+        graph_pattern_str = "".join(self.convert(root_variable,
+                                                 ce,
+                                                 for_all_de_morgan=for_all_de_morgan,
+                                                 named_individuals=named_individuals))
+        # preparation for the final query
+
+        # required to compute false negatives
+        number_of_positive_examples = 0
+        # required to compute true negatives
+        number_of_negative_examples = 0
+        # string representation of the positive examples (to be passed to the first VALUES clause)
+        positive_examples_as_str = ""
+        # iterate over the positive examples
+        for positive_example in positive_examples:
+            number_of_positive_examples += 1
+            positive_examples_as_str += f"<{positive_example.to_string_id()}> "
+        assert (len(positive_examples_as_str) > 0)
+
+        # string representation of the positive examples (to be passed to the first VALUES clause)
+        negative_examples_as_str = ""
+        # iterate over the negative examples
+        for negative_example in negative_examples:
+            number_of_negative_examples += 1
+            negative_examples_as_str += f"<{negative_example.to_string_id()}> "
+        assert(len(negative_examples_as_str) > 0)
+
+        # create the sparql query
+        sparql_str= f"""
+                    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+                    SELECT * WHERE {{
+                       {{
+                          SELECT (xsd:double(COUNT(DISTINCT {root_variable})) as ?tp) (xsd:double(({number_of_positive_examples} - COUNT(DISTINCT {root_variable}))) as ?fn) WHERE {{
+                             VALUES {root_variable} {{ {positive_examples_as_str} }}
+                             {graph_pattern_str}
+                          }}
+                       }}
+                       {{
+                          SELECT DISTINCT (xsd:double(COUNT(DISTINCT {root_variable})) as ?fp) (xsd:double(({number_of_negative_examples} - COUNT(DISTINCT {root_variable}))) as ?tn) WHERE {{
+                             VALUES {root_variable} {{ {negative_examples_as_str} }}
+                             {graph_pattern_str}
+                          }}
+                       }}
+                    }}
+                    """
+        parseQuery(sparql_str)
+        return sparql_str
+
 
 converter = Owl2SparqlConverter()
 
@@ -656,3 +711,30 @@ def owl_expression_to_sparql(expression: OWLClassExpression = None,
     assert expression is not None, "expression cannot be None"
     return converter.as_query(root_variable, expression, count=False, values=values,
                               named_individuals=named_individuals, for_all_de_morgan=for_all_de_morgan)
+
+
+def owl_expression_to_sparql_with_confusion_matrix(expression: OWLClassExpression,
+                                                   positive_examples: Optional[Iterable[OWLNamedIndividual]],
+                                                   negative_examples: Optional[Iterable[OWLNamedIndividual]],
+                                                   root_variable: str = "?x",
+                                                   for_all_de_morgan: bool = True,
+                                                   named_individuals: bool = False) -> str:
+    """Convert an OWL Class Expression (https://www.w3.org/TR/owl2-syntax/#Class_Expressions) into a SPARQL query
+     root variable: the variable that will be projected
+     expression: the class expression to be transformed to a SPARQL query
+     positive_examples: positive examples from a class expression problem
+     negative_examples: positive examples from a class expression problem
+     for_all_de_morgan: if set to True, the SPARQL mapping will use the mapping containing the nested FILTER NOT EXISTS
+     patterns for the universal quantifier (¬(∃r.¬C)), instead of the counting query
+     named_individuals: if set to True, the generated SPARQL query will return only entities
+     that are instances of owl:NamedIndividual
+    """
+    assert expression is not None, "expression cannot be None"
+    assert positive_examples is not None, "positive examples cannot be None"
+    assert negative_examples is not None, "negative examples cannot be None"
+    return converter.as_confusion_matrix_query(root_variable,
+                                               expression,
+                                               positive_examples=positive_examples,
+                                               negative_examples=negative_examples,
+                                               named_individuals=named_individuals,
+                                               for_all_de_morgan=for_all_de_morgan)
diff --git a/tests/test_owlapy_owl2sparql_converter.py b/tests/test_owlapy_owl2sparql_converter.py
@@ -408,6 +408,36 @@ def test_Exists(self):
     #     # self.assertEqual(len(sparql_results_actual), len(reasoner_results))
     #     # self.assertTrue(check_reasoner_instances_in_sparql_results(sparql_results_actual, reasoner_results))
 
+    def test_ConfusionMatrixQuery(self):
+        # rdf graph - using rdflib
+        family_rdf_graph = Graph()
+        family_rdf_graph.parse(location=PATH_FAMILY)
+
+        ce_str = "Brother"
+        ce_parsed = DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str=ce_str)
+
+        positive_examples = [
+            DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str="F10M173"),
+            DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str="F10M183"),
+        ]
+
+        negative_examples = [
+            DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str="F10M184"),
+            DLSyntaxParser(namespace="http://www.benchmark.org/family#").parse_expression(expression_str="F10F179"),
+        ]
+
+        query_with_confusion_matrix = Owl2SparqlConverter().as_confusion_matrix_query(root_variable=self._root_var_,
+                                                                                      ce=ce_parsed,
+                                                                                      positive_examples=positive_examples,
+                                                                                      negative_examples=negative_examples,
+                                                                                      for_all_de_morgan=True,
+                                                                                      named_individuals=True)
+
+        sparql_results = family_rdf_graph.query(query_with_confusion_matrix)
+        self.assertEqual(float(sparql_results.bindings[0]["tp"]), 2.0)
+        self.assertEqual(float(sparql_results.bindings[0]["fn"]), 0.0)
+        self.assertEqual(float(sparql_results.bindings[0]["fp"]), 1.0)
+        self.assertEqual(float(sparql_results.bindings[0]["tn"]), 1.0)
 
 if __name__ == '__main__':
     unittest.main()