From 5d87e4652012805a675152f5a9b26e1800f7956f Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Wed, 25 Sep 2024 18:57:50 +1000 Subject: [PATCH 01/11] Add Focus Nodes mode. --- CHANGELOG.md | 15 +++ pyshacl/cli.py | 10 ++ pyshacl/pytypes.py | 5 +- pyshacl/shape.py | 16 ++++ pyshacl/validate.py | 27 ++++++ test/test_extra.py | 228 ++++++++++++++++++++++++++++++++++---------- 6 files changed, 247 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1faab02..4e38041 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,26 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Python PEP 440 Versioning](https://www.python.org/dev/peps/pep-0440/). ## [Unreleased] + +### Added +- Focus Node mode! + - You can now pass in a list of focus nodes to the validator, and it will only validate those focus nodes. + - Note, you still need to pass in a SHACL Shapes Graph, and the shapes still need to target the focus nodes. + - This feature will filter the Shapes' targeted focus nodes to include only those that are in the list of specified focus nodes. + ### Changed - Don't make a clone of the DataGraph if the input data graph is ephemeral. - An ephemeral graph is one that is loaded from a string or file location by PySHACL - This includes all files opened by the PySHACL CLI validator tool - We don't need to make a copy because PySHACL parsed the Graph into memory itself already, so we are not concerned about not polluting the user's graph. +- Refactorings + - shacl_path_to_sparql_path code to a reusable importable function + - move sht_validate and dash_validate routes to `validator_conformance.py` module. + - Removes some complexity from the main `validate` function. +- Typing + - A whole swathe of python typing fixes and new type annotations. Thanks @ajnelson-nist +### Fixed +- Fix logic determining if a datagraph is ephemeral. ## [0.26.0] - 2024-04-11 diff --git a/pyshacl/cli.py b/pyshacl/cli.py index 4715974..adf572f 100644 --- a/pyshacl/cli.py +++ b/pyshacl/cli.py @@ -148,6 +148,14 @@ def str_is_true(s_var: str): parser.add_argument( '-d', '--debug', dest='debug', action='store_true', default=False, help='Output additional runtime messages.' ) +parser.add_argument( + '--focus', + dest='focus', + action='store', + help='The IRI of a focus node from the DataGraph, the shapes will validate only that node.', + nargs="?", + default=None, +) parser.add_argument( '-f', '--format', @@ -259,6 +267,8 @@ def main(prog: Union[str, None] = None) -> None: validator_kwargs['advanced'] = True if args.js: validator_kwargs['js'] = True + if args.focus: + validator_kwargs['focus'] = args.focus if args.iterate_rules: if not args.advanced: sys.stderr.write("Iterate-Rules option only works when you enable Advanced Mode.\n") diff --git a/pyshacl/pytypes.py b/pyshacl/pytypes.py index 3b28ccf..ac50d73 100644 --- a/pyshacl/pytypes.py +++ b/pyshacl/pytypes.py @@ -2,10 +2,10 @@ # from dataclasses import dataclass -from typing import Optional, Union +from typing import List, Optional, Union from rdflib import ConjunctiveGraph, Dataset, Graph, Literal -from rdflib.term import IdentifiedNode +from rdflib.term import IdentifiedNode, URIRef ConjunctiveLike = Union[ConjunctiveGraph, Dataset] GraphLike = Union[ConjunctiveLike, Graph] @@ -23,3 +23,4 @@ class SHACLExecutor: debug: bool = False sparql_mode: bool = False max_validation_depth: int = 15 + focus_nodes: Optional[List[URIRef]] = None diff --git a/pyshacl/shape.py b/pyshacl/shape.py index 404aa12..500d7a2 100644 --- a/pyshacl/shape.py +++ b/pyshacl/shape.py @@ -661,6 +661,22 @@ def validate( return True, [] else: self.logger.debug(f"Running evaluation of Shape {str(self)}") + + if executor.focus_nodes is not None and len(executor.focus_nodes) > 0: + filtered_focus_nodes = [] + for f in focus: + if f in executor.focus_nodes: + filtered_focus_nodes.append(f) + len_orig_focus = len(focus) + len_filtered_focus = len(filtered_focus_nodes) + if len_filtered_focus < 1: + self.logger.debug(f"Skipping shape {str(self)} because specified focus nodes are not targeted.") + return True, [] + elif len_filtered_focus != len_orig_focus: + self.logger.debug( + f"Filtered focus nodes based on focus_nodes option. Only {len_filtered_focus} of {len_orig_focus} focus nodes remain." + ) + focus = filtered_focus_nodes t1 = ct1 = 0.0 # prevent warnings about use-before-assign collect_stats = bool(executor.debug) diff --git a/pyshacl/validate.py b/pyshacl/validate.py index 7fe4b08..4a6127c 100644 --- a/pyshacl/validate.py +++ b/pyshacl/validate.py @@ -64,6 +64,7 @@ def _load_default_options(cls, options_dict: dict): options_dict.setdefault('allow_warnings', False) options_dict.setdefault('sparql_mode', False) options_dict.setdefault('max_validation_depth', 15) + options_dict.setdefault('focus_nodes', None) if 'logger' not in options_dict: options_dict['logger'] = logging.getLogger(__name__) if options_dict['debug']: @@ -230,6 +231,7 @@ def make_executor(self) -> SHACLExecutor: iterate_rules=bool(self.options.get("iterate_rules", False)), sparql_mode=bool(self.options.get("sparql_mode", False)), max_validation_depth=self.options.get("max_validation_depth", 15), + focus_nodes=self.options.get("focus_nodes", None), debug=self.debug, ) @@ -275,6 +277,27 @@ def run(self): self._target_graph = the_target_graph shapes = self.shacl_graph.shapes # This property getter triggers shapes harvest. + limit_focus_nodes = self.options.get("focus_nodes", None) + if limit_focus_nodes is not None and len(limit_focus_nodes) > 0: + # Expand any CURIEs in the focus_nodes list + expanded_focus_nodes = [] + for f in limit_focus_nodes: + f_lower = f.lower() + if ( + f_lower.startswith("http:") + or f_lower.startswith("https:") + or f_lower.startswith("urn:") + or f_lower.startswith("file:") + ): + expanded_focus_nodes.append(URIRef(f)) + else: + try: + expanded_focus_node = self.target_graph.namespace_manager.expand_curie(f) + except ValueError: + expanded_focus_node = URIRef(f) + expanded_focus_nodes.append(expanded_focus_node) + self.options["focus_nodes"] = expanded_focus_nodes + executor = self.make_executor() if executor.advanced_mode: self.logger.debug("Activating SHACL-AF Features.") @@ -406,6 +429,7 @@ def validate( allow_warnings: Optional[bool] = False, max_validation_depth: Optional[int] = None, sparql_mode: Optional[bool] = False, + focus_nodes: Optional[List[Union[str | URIRef]]] = None, **kwargs, ): """ @@ -434,6 +458,8 @@ def validate( :type max_validation_depth: int | None :param sparql_mode: Treat the DataGraph as a SPARQL endpoint, validate the graph at the SPARQL endpoint. :type sparql_mode: bool | None + :param focus_nodes: A list of IRIs to validate only those nodes. + :type focus_nodes: list | None :param kwargs: :return: """ @@ -532,6 +558,7 @@ def validate( 'use_js': use_js, 'sparql_mode': sparql_mode, 'logger': log, + 'focus_nodes': focus_nodes, } if max_validation_depth is not None: validator_options_dict['max_validation_depth'] = max_validation_depth diff --git a/test/test_extra.py b/test/test_extra.py index 448ea33..e6be92d 100644 --- a/test/test_extra.py +++ b/test/test_extra.py @@ -6,7 +6,9 @@ # are added as required. import os import re + from rdflib import Graph + from pyshacl import validate from pyshacl.errors import ReportableRuntimeError @@ -123,51 +125,98 @@ exOnt:nLegs "four"^^xsd:string . """ + def test_validate_with_ontology(): g = Graph().parse(data=data_file_text, format='turtle') e = Graph().parse(data=ontology_file_text, format='turtle') g_len = len(g) - res = validate(g, shacl_graph=shacl_file_text, - shacl_graph_format='turtle', - ont_graph=e, inference='both', debug=True) + res = validate( + g, shacl_graph=shacl_file_text, shacl_graph_format='turtle', ont_graph=e, inference='both', debug=True + ) conforms, graph, string = res g_len2 = len(g) assert conforms assert g_len2 == g_len + def test_validate_with_ontology_inplace(): g = Graph().parse(data=data_file_text, format='turtle') e = Graph().parse(data=ontology_file_text, format='turtle') g_len = len(g) - res = validate(g, shacl_graph=shacl_file_text, - shacl_graph_format='turtle', - ont_graph=e, inference='both', debug=True, inplace=True) + res = validate( + g, + shacl_graph=shacl_file_text, + shacl_graph_format='turtle', + ont_graph=e, + inference='both', + debug=True, + inplace=True, + ) conforms, graph, string = res g_len2 = len(g) assert conforms assert g_len2 != g_len + def test_validate_with_ontology_fail1(): - res = validate(data_file_text_bad, shacl_graph=shacl_file_text, - data_graph_format='turtle', shacl_graph_format='turtle', - ont_graph=ontology_file_text, ont_graph_format="turtle", - inference='both', debug=True) + res = validate( + data_file_text_bad, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + debug=True, + ) conforms, graph, string = res assert not conforms + def test_validate_with_ontology_fail2(): - res = validate(data_file_text_bad, shacl_graph=shacl_file_text, - data_graph_format='turtle', shacl_graph_format='turtle', - ont_graph=ontology_file_text, ont_graph_format="turtle", - inference=None, debug=True) + res = validate( + data_file_text_bad, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference=None, + debug=True, + ) conforms, graph, string = res assert not conforms + +def test_validate_with_ontology_fail_focuses(): + res = validate( + data_file_text_bad, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + focus_nodes=["ex:Human1"], + debug=True, + ) + conforms, graph, string = res + assert "Results (1)" in string + assert not conforms + + def test_metashacl_pass(): - res = validate(data_file_text, shacl_graph=shacl_file_text, - meta_shacl=True, data_graph_format='turtle', - shacl_graph_format='turtle', ont_graph=ontology_file_text, - ont_graph_format="turtle", inference='both', debug=True) + res = validate( + data_file_text, + shacl_graph=shacl_file_text, + meta_shacl=True, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + debug=True, + ) conforms, graph, string = res assert conforms @@ -205,10 +254,17 @@ def test_metashacl_fail(): """ did_error = False try: - res = validate(data_file_text, shacl_graph=bad_shacl_text, - meta_shacl=True, data_graph_format='turtle', - shacl_graph_format='turtle', ont_graph=ontology_file_text, - ont_graph_format="turtle", inference='both', debug=True) + res = validate( + data_file_text, + shacl_graph=bad_shacl_text, + meta_shacl=True, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + debug=True, + ) conforms, graph, string = res assert not conforms except ReportableRuntimeError as r: @@ -216,6 +272,7 @@ def test_metashacl_fail(): did_error = True assert did_error + data_file_text_bn = """ @prefix rdf: . @prefix xsd: . @@ -252,12 +309,19 @@ def test_metashacl_fail(): exOnt:nLegs "four"^^xsd:string . """ + def test_blank_node_string_generation(): - res = validate(data_file_text_bad_bn, shacl_graph=shacl_file_text, - data_graph_format='turtle', shacl_graph_format='turtle', - ont_graph=ontology_file_text, ont_graph_format="turtle", - inference='rdfs', debug=True) + res = validate( + data_file_text_bad_bn, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='rdfs', + debug=True, + ) conforms, graph, string = res assert not conforms rx = r"^\s*Focus Node\:\s+\[.+rdf:type\s+.+exOnt\:PreschoolTeacher.*\]$" @@ -266,13 +330,21 @@ def test_blank_node_string_generation(): def test_serialize_report_graph(): - res = validate(data_file_text, shacl_graph=shacl_file_text, - data_graph_format='turtle', serialize_report_graph=True, - shacl_graph_format='turtle', ont_graph=ontology_file_text, - ont_graph_format="turtle", inference='both', debug=True) + res = validate( + data_file_text, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + serialize_report_graph=True, + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + debug=True, + ) conforms, graph, string = res assert isinstance(graph, (str, bytes)) + shacl_file_property_shapes_text = """\ @prefix owl: . @prefix rdf: . @@ -305,22 +377,37 @@ def test_serialize_report_graph(): sh:targetClass exOnt:Animal . """ + def test_property_shape_focus(): - res = validate(data_file_text, shacl_graph=shacl_file_property_shapes_text, - data_graph_format='turtle', shacl_graph_format='turtle', - ont_graph=ontology_file_text, ont_graph_format="turtle", - inference='rdfs', debug=True) + res = validate( + data_file_text, + shacl_graph=shacl_file_property_shapes_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='rdfs', + debug=True, + ) conforms, graph, string = res assert conforms + def test_property_shape_focus_fail1(): - res = validate(data_file_text_bad, shacl_graph=shacl_file_property_shapes_text, - data_graph_format='turtle', shacl_graph_format='turtle', - ont_graph=ontology_file_text, ont_graph_format="turtle", - inference='rdfs', debug=True) + res = validate( + data_file_text_bad, + shacl_graph=shacl_file_property_shapes_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='rdfs', + debug=True, + ) conforms, graph, string = res assert not conforms + web_d1_ttl = """\ @prefix rdf: . @prefix xsd: . @@ -352,6 +439,7 @@ def test_property_shape_focus_fail1(): exOnt:nLegs "g"^^xsd:string . """ + def test_web_retrieve(): DEB_BUILD_ARCH = os.environ.get('DEB_BUILD_ARCH', None) DEB_HOST_ARCH = os.environ.get('DEB_HOST_ARCH', None) @@ -361,9 +449,16 @@ def test_web_retrieve(): return True shacl_file = "https://raw.githubusercontent.com/RDFLib/pySHACL/master/test/resources/cmdline_tests/s1.ttl" ont_file = "https://raw.githubusercontent.com/RDFLib/pySHACL/master/test/resources/cmdline_tests/o1.ttl" - res = validate(web_d1_ttl, shacl_graph=shacl_file, data_graph_format='turtle', - shacl_graph_format='turtle', ont_graph=ont_file, - ont_graph_format="turtle", inference='both', debug=True) + res = validate( + web_d1_ttl, + shacl_graph=shacl_file, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ont_file, + ont_graph_format="turtle", + inference='both', + debug=True, + ) conforms, graph, string = res assert conforms @@ -377,9 +472,16 @@ def test_web_retrieve_fail(): return True shacl_file = "https://raw.githubusercontent.com/RDFLib/pySHACL/master/test/resources/cmdline_tests/s1.ttl" ont_file = "https://raw.githubusercontent.com/RDFLib/pySHACL/master/test/resources/cmdline_tests/o1.ttl" - res = validate(web_d2_ttl, shacl_graph=shacl_file, data_graph_format='turtle', - shacl_graph_format='turtle', ont_graph=ont_file, - ont_graph_format="turtle", inference='both', debug=True) + res = validate( + web_d2_ttl, + shacl_graph=shacl_file, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ont_file, + ont_graph_format="turtle", + inference='both', + debug=True, + ) conforms, graph, string = res assert not conforms @@ -408,9 +510,17 @@ def test_owl_imports(): print("Cannot run owl:imports in debhelper tests.") assert True return True - res = validate(web_d1_ttl, shacl_graph=my_partial_shapes_text, data_graph_format='turtle', - shacl_graph_format='turtle', ont_graph=my_partial_ont_text, - ont_graph_format="turtle", inference='both', debug=True, do_owl_imports=True) + res = validate( + web_d1_ttl, + shacl_graph=my_partial_shapes_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=my_partial_ont_text, + ont_graph_format="turtle", + inference='both', + debug=True, + do_owl_imports=True, + ) conforms, graph, string = res print(string) assert conforms @@ -424,13 +534,22 @@ def test_owl_imports_fail(): assert True return True - res = validate(web_d2_ttl, shacl_graph=my_partial_shapes_text, data_graph_format='turtle', - shacl_graph_format='turtle', ont_graph=my_partial_ont_text, - ont_graph_format=None, inference='both', debug=True, do_owl_imports=True) + res = validate( + web_d2_ttl, + shacl_graph=my_partial_shapes_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=my_partial_ont_text, + ont_graph_format=None, + inference='both', + debug=True, + do_owl_imports=True, + ) conforms, graph, string = res print(string) assert not conforms + def test_sparql_message_subst(): df = '''@prefix ex: . @prefix owl: . @@ -469,13 +588,19 @@ def test_sparql_message_subst(): FILTER (?path = ) . }""" ; .''' - res = validate(df, data_graph_format='turtle', inference=None, debug=True,) + res = validate( + df, + data_graph_format='turtle', + inference=None, + debug=True, + ) conforms, graph, s = res assert "#InvalidResource1 cannot have a http://www.w3.org/2000/01/rdf-schema#label of Invalid resource 1" in s assert "#InvalidResource2 cannot have a http://www.w3.org/2000/01/rdf-schema#label of Invalid label 1" in s assert "#InvalidResource2 cannot have a http://www.w3.org/2000/01/rdf-schema#label of Invalid label 2" in s assert not conforms + if __name__ == "__main__": test_validate_with_ontology() test_validate_with_ontology_fail1() @@ -490,4 +615,3 @@ def test_sparql_message_subst(): test_owl_imports() test_owl_imports_fail() test_sparql_message_subst() - From 0283a9bc3fcbba62101ef111596be6008e9fa109 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 26 Sep 2024 11:14:27 +1000 Subject: [PATCH 02/11] Always pass a List (or str or URIRef) in the focus_nodes argument, fix the python 3.8 compatible typing in the validator kwargs. --- pyshacl/cli.py | 1 + pyshacl/validate.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyshacl/cli.py b/pyshacl/cli.py index adf572f..63f269e 100644 --- a/pyshacl/cli.py +++ b/pyshacl/cli.py @@ -268,6 +268,7 @@ def main(prog: Union[str, None] = None) -> None: if args.js: validator_kwargs['js'] = True if args.focus: + validator_kwargs['focus_nodes'] = [_f.strip() for _f in args.focus.split(',')] validator_kwargs['focus'] = args.focus if args.iterate_rules: if not args.advanced: diff --git a/pyshacl/validate.py b/pyshacl/validate.py index 4a6127c..19a2f29 100644 --- a/pyshacl/validate.py +++ b/pyshacl/validate.py @@ -429,7 +429,7 @@ def validate( allow_warnings: Optional[bool] = False, max_validation_depth: Optional[int] = None, sparql_mode: Optional[bool] = False, - focus_nodes: Optional[List[Union[str | URIRef]]] = None, + focus_nodes: Optional[List[Union[str, URIRef]]] = None, **kwargs, ): """ From 77425ea6e098f58c8e6fc68d6772439c1fb44850 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 26 Sep 2024 13:00:36 +1000 Subject: [PATCH 03/11] Add ability to target by manually specified NodeShape or PropertyShape from the ShapesGraph. --- pyshacl/cli.py | 11 +- pyshacl/shapes_graph.py | 149 ++++++++++++++++++++++- pyshacl/validate.py | 52 +++++++-- test/test_extra.py | 17 --- test/test_manual_targeting.py | 214 ++++++++++++++++++++++++++++++++++ 5 files changed, 414 insertions(+), 29 deletions(-) create mode 100644 test/test_manual_targeting.py diff --git a/pyshacl/cli.py b/pyshacl/cli.py index 63f269e..e68fd92 100644 --- a/pyshacl/cli.py +++ b/pyshacl/cli.py @@ -156,6 +156,14 @@ def str_is_true(s_var: str): nargs="?", default=None, ) +parser.add_argument( + '--shape', + dest='shape', + action='store', + help='The IRI of a NodeShape or PropertyShape from the SHACL ShapesGraph, only this shape will be used to validate the DataGraph.', + nargs="?", + default=None, +) parser.add_argument( '-f', '--format', @@ -269,7 +277,8 @@ def main(prog: Union[str, None] = None) -> None: validator_kwargs['js'] = True if args.focus: validator_kwargs['focus_nodes'] = [_f.strip() for _f in args.focus.split(',')] - validator_kwargs['focus'] = args.focus + if args.shape: + validator_kwargs['use_shapes'] = [_s.strip() for _s in args.shape.split(',')] if args.iterate_rules: if not args.advanced: sys.stderr.write("Iterate-Rules option only works when you enable Advanced Mode.\n") diff --git a/pyshacl/shapes_graph.py b/pyshacl/shapes_graph.py index b3965d8..761cd3f 100644 --- a/pyshacl/shapes_graph.py +++ b/pyshacl/shapes_graph.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import logging import warnings -from typing import TYPE_CHECKING, Dict, Optional, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Union import rdflib @@ -164,13 +164,24 @@ def get_shacl_target_type(self, uri: Union[str, 'RDFNode']): def shapes(self): """ - :returns: [Shape] - :rtype: list(pyshacl.shape.Shape) + :returns: List[Shape] + :rtype: List[pyshacl.shape.Shape] """ if len(self._node_shape_cache) < 1: self._build_node_shape_cache() return self._node_shape_cache.values() + def shapes_from_uris(self, shapes_uris: List[rdflib.URIRef]): + """ + :param shapes_uris: + :type shapes_uris: List[rdflib.URIRef] + :returns: List[Shape] + :rtype: List[Shape] + """ + if len(self._node_shape_cache) < 1: + self._build_node_shape_cache_from_list(shapes_uris) + return [self._node_shape_cache[s] for s in shapes_uris] + def lookup_shape_from_node(self, node) -> Shape: # This will throw a KeyError if it is not found. This is intentionally not caught here. return self._node_shape_cache[node] @@ -330,3 +341,135 @@ def _build_node_shape_cache(self): self.logger.debug( f"Cached {node_shape_count} unique NodeShapes and {property_shape_count} unique PropertyShapes." ) + + def _build_node_shape_cache_from_list(self, shapes_list: List[rdflib.URIRef]): + """ + :returns: None + :rtype: NoneType + """ + g = self.graph + gathered_node_shapes = set() + gathered_prop_shapes = set() + found_prop_shapes_paths: Dict[rdflib.URIRef, rdflib.URIRef] = dict() + + def _gather_shapes(shapes_nodes: List[rdflib.URIRef | rdflib.BNode], recurse_depth: int = 0): + nonlocal gathered_node_shapes, gathered_prop_shapes, found_prop_shapes_paths + if recurse_depth > 10: + raise ShapeLoadError( + "Specified shape has too many levels of attached bnodes.", + "https://www.w3.org/TR/shacl/#shapes-graph", + ) + for s in shapes_nodes: + all_po = list(g.predicate_objects(s)) + if len(all_po) < 1: + if recurse_depth < 1: + raise ShapeLoadError( + "Shape listed in use_shapes does not exist in the SHACL ShapesGraph.", + "https://www.w3.org/TR/shacl/#shapes-graph", + ) + else: + return + has_class = any(RDF_type == _p for _p, _o in all_po) + has_property = any(SH_property == _p for _p, _o in all_po) + has_node = any(SH_node == _p for _p, _o in all_po) + knows_class = False + if has_class: + all_classes = list(g.objects(s, RDF_type)) + for c in all_classes: + if c == SH_PropertyShape: + knows_class = True + gathered_prop_shapes.add(s) + break + elif c == SH_NodeShape: + knows_class = True + gathered_node_shapes.add(s) + break + else: + knows_class = False + if not knows_class: + for _p, _o in all_po: + if _p == SH_path: + has_path = True + found_prop_shapes_paths[s] = _o + break + else: + has_path = False + if has_path: + gathered_prop_shapes.add(s) + else: + is_property_of = len(list(g.subjects(SH_property, s))) > 0 + if is_property_of: + gathered_prop_shapes.add(s) + else: + gathered_node_shapes.add(s) + _found_child_bnodes = [] + if has_property: + property_entries = list(g.objects(s, SH_property)) + for p_e in property_entries: + if isinstance(p_e, rdflib.BNode): + _found_child_bnodes.append(p_e) + if has_node: + node_entries = list(g.objects(s, SH_node)) + for n_e in node_entries: + if isinstance(n_e, rdflib.BNode): + _found_child_bnodes.append(n_e) + if len(_found_child_bnodes) > 0: + _gather_shapes(_found_child_bnodes, recurse_depth=recurse_depth + 1) + + _gather_shapes(shapes_list) + + for s in gathered_node_shapes: + path_vals = list(g.objects(s, SH_path)) + if len(path_vals) > 0: + # TODO:coverage: we don't have any tests for invalid shapes + raise ShapeLoadError( + "A shape defined as a NodeShape cannot be the subject of a 'sh:path' predicate.", + "https://www.w3.org/TR/shacl/#node-shapes", + ) + + for s in gathered_prop_shapes: + if s in gathered_node_shapes: + # TODO:coverage: we don't have any tests for invalid shapes + raise ShapeLoadError( + "A shape defined as a NodeShape cannot also be defined as a PropertyShape.", + "https://www.w3.org/TR/shacl/#node-shapes", + ) + if s not in found_prop_shapes_paths: + path_vals = list(g.objects(s, SH_path)) + if len(path_vals) < 1: + # TODO:coverage: we don't have any tests for invalid shapes + raise ShapeLoadError( + "A shape defined as a PropertyShape must include one `sh:path` property.", + "https://www.w3.org/TR/shacl/#property-shapes", + ) + elif len(path_vals) > 1: + # TODO:coverage: we don't have any tests for invalid shapes + raise ShapeLoadError( + "A shape defined as a PropertyShape cannot have more than one 'sh:path' property.", + "https://www.w3.org/TR/shacl/#property-shapes", + ) + else: + found_prop_shapes_paths[s] = path_vals[0] + + node_shape_count = 0 + property_shape_count = 0 + for node_shape in gathered_node_shapes: + if node_shape in self._node_shape_cache: + # TODO:coverage: we don't have any tests where a shape is loaded twice + raise ShapeLoadError("That shape has already been loaded!", "None") + s = Shape(self, node_shape, p=False, logger=self.logger) + self._node_shape_cache[node_shape] = s + node_shape_count += 1 + for prop_shape in gathered_prop_shapes: + if prop_shape in self._node_shape_cache: + # TODO:coverage: we don't have any tests where a shape is loaded twice + raise ShapeLoadError("That shape has already been loaded!", "None") + prop_shape_path = found_prop_shapes_paths[prop_shape] + s = Shape(self, prop_shape, p=True, path=prop_shape_path, logger=self.logger) + self._node_shape_cache[prop_shape] = s + property_shape_count += 1 + + if self.debug: + self.logger.debug( + f"Cached {node_shape_count} unique NodeShapes and {property_shape_count} unique PropertyShapes." + ) diff --git a/pyshacl/validate.py b/pyshacl/validate.py index 19a2f29..20c4793 100644 --- a/pyshacl/validate.py +++ b/pyshacl/validate.py @@ -65,6 +65,7 @@ def _load_default_options(cls, options_dict: dict): options_dict.setdefault('sparql_mode', False) options_dict.setdefault('max_validation_depth', 15) options_dict.setdefault('focus_nodes', None) + options_dict.setdefault('use_shapes', None) if 'logger' not in options_dict: options_dict['logger'] = logging.getLogger(__name__) if options_dict['debug']: @@ -275,13 +276,33 @@ def run(self): self.logger.debug("Running validation in-place, without modifying the DataGraph.") self.inplace = True self._target_graph = the_target_graph - - shapes = self.shacl_graph.shapes # This property getter triggers shapes harvest. - limit_focus_nodes = self.options.get("focus_nodes", None) - if limit_focus_nodes is not None and len(limit_focus_nodes) > 0: + if self.options.get("use_shapes", None) is not None and len(self.options["use_shapes"]) > 0: + using_manually_specified_shapes = True + expanded_use_shapes = [] + for s in self.options["use_shapes"]: + s_lower = s.lower() + if ( + s_lower.startswith("http:") + or s_lower.startswith("https:") + or s_lower.startswith("urn:") + or s_lower.startswith("file:") + ): + expanded_use_shapes.append(URIRef(s)) + else: + try: + expanded_use_shape = self.shacl_graph.graph.namespace_manager.expand_curie(s) + except ValueError: + expanded_use_shape = URIRef(s) + expanded_use_shapes.append(expanded_use_shape) + shapes = self.shacl_graph.shapes_from_uris(expanded_use_shapes) + else: + using_manually_specified_shapes = False + shapes = self.shacl_graph.shapes # This property getter triggers shapes harvest. + option_focus_nodes = self.options.get("focus_nodes", None) + if option_focus_nodes is not None and len(option_focus_nodes) > 0: # Expand any CURIEs in the focus_nodes list - expanded_focus_nodes = [] - for f in limit_focus_nodes: + expanded_focus_nodes: List[URIRef] = [] + for f in option_focus_nodes: f_lower = f.lower() if ( f_lower.startswith("http:") @@ -297,8 +318,18 @@ def run(self): expanded_focus_node = URIRef(f) expanded_focus_nodes.append(expanded_focus_node) self.options["focus_nodes"] = expanded_focus_nodes - + specified_focus_nodes: Union[None, List[URIRef]] = expanded_focus_nodes + else: + specified_focus_nodes = None executor = self.make_executor() + + # Special hack, if we are using manually specified shapes, and have + # manually specified focus nodes, then we need to disable the + # focus_nodes in the executor, because we apply the specified focus + # nodes directly to the specified shapes. + if using_manually_specified_shapes and specified_focus_nodes is not None: + executor.focus_nodes = None + if executor.advanced_mode: self.logger.debug("Activating SHACL-AF Features.") target_types = gather_target_types(self.shacl_graph) @@ -345,7 +376,10 @@ def run(self): apply_rules(executor, advanced['rules'], g) try: for s in shapes: - _is_conform, _reports = s.validate(executor, g) + if using_manually_specified_shapes and specified_focus_nodes is not None: + _is_conform, _reports = s.validate(executor, g, focus=specified_focus_nodes) + else: + _is_conform, _reports = s.validate(executor, g) non_conformant = non_conformant or (not _is_conform) reports.extend(_reports) if executor.abort_on_first and non_conformant: @@ -430,6 +464,7 @@ def validate( max_validation_depth: Optional[int] = None, sparql_mode: Optional[bool] = False, focus_nodes: Optional[List[Union[str, URIRef]]] = None, + use_shapes: Optional[List[Union[str, URIRef]]] = None, **kwargs, ): """ @@ -559,6 +594,7 @@ def validate( 'sparql_mode': sparql_mode, 'logger': log, 'focus_nodes': focus_nodes, + 'use_shapes': use_shapes, } if max_validation_depth is not None: validator_options_dict['max_validation_depth'] = max_validation_depth diff --git a/test/test_extra.py b/test/test_extra.py index e6be92d..0b1b23f 100644 --- a/test/test_extra.py +++ b/test/test_extra.py @@ -188,23 +188,6 @@ def test_validate_with_ontology_fail2(): assert not conforms -def test_validate_with_ontology_fail_focuses(): - res = validate( - data_file_text_bad, - shacl_graph=shacl_file_text, - data_graph_format='turtle', - shacl_graph_format='turtle', - ont_graph=ontology_file_text, - ont_graph_format="turtle", - inference='both', - focus_nodes=["ex:Human1"], - debug=True, - ) - conforms, graph, string = res - assert "Results (1)" in string - assert not conforms - - def test_metashacl_pass(): res = validate( data_file_text, diff --git a/test/test_manual_targeting.py b/test/test_manual_targeting.py new file mode 100644 index 0000000..c960bf0 --- /dev/null +++ b/test/test_manual_targeting.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- +# +# Extra tests which are not part of the SHT or DASH test suites, +# nor the discrete issues tests or the cmdline_test file. +# The need for these tests are discovered by doing coverage checks and these +# are added as required. + + +from pyshacl import validate + +ontology_file_text = """ +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix exOnt: . + + a owl:Ontology ; + rdfs:label "An example extra-ontology file."@en . + +exOnt:Animal a rdfs:Class ; + rdfs:comment "The parent class for Humans and Pets"@en ; + rdfs:subClassOf owl:Thing . + +exOnt:Human a rdfs:Class ; + rdfs:comment "A Human being"@en ; + rdfs:subClassOf exOnt:Animal . + +exOnt:Pet a rdfs:Class ; + rdfs:comment "An animal owned by a human"@en ; + rdfs:subClassOf exOnt:Animal . + +exOnt:hasPet a rdf:Property ; + rdfs:domain exOnt:Human ; + rdfs:range exOnt:Pet . + +exOnt:nlegs a rdf:Property ; + rdfs:domain exOnt:Animal ; + rdfs:range xsd:integer . + +exOnt:Teacher a rdfs:Class ; + rdfs:comment "A Human who is a teacher."@en ; + rdfs:subClassOf exOnt:Human . + +exOnt:PreschoolTeacher a rdfs:Class ; + rdfs:comment "A Teacher who teaches preschool."@en ; + rdfs:subClassOf exOnt:Teacher . + +exOnt:Lizard a rdfs:Class ; + rdfs:subClassOf exOnt:Pet . + +exOnt:Turtle a rdfs:Class ; + rdfs:subClassOf exOnt:Pet . + +exOnt:Goanna a rdfs:Class ; + rdfs:subClassOf exOnt:Lizard . + +""" + +shacl_file_text = """ +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix sh: . +@prefix xsd: . +@prefix exShape: . +@prefix exOnt: . + + a owl:Ontology ; + rdfs:label "Example Shapes File"@en . + +exShape:HumanShape a sh:NodeShape ; + sh:property [ + sh:class exOnt:Pet ; + sh:path exOnt:hasPet ; + ] ; + sh:property [ + sh:datatype xsd:integer ; + sh:path exOnt:nLegs ; + sh:maxInclusive 2 ; + sh:minInclusive 2 ; + ] ; + sh:targetClass exOnt:Human . + +exShape:AnimalShape a sh:NodeShape ; + sh:property [ + sh:datatype xsd:integer ; + sh:path exOnt:nLegs ; + sh:maxInclusive 4 ; + sh:minInclusive 1 ; + ] ; + sh:targetClass exOnt:Animal . +""" + +data_file_text = """ +@prefix rdf: . +@prefix xsd: . +@prefix exOnt: . +@prefix ex: . + +ex:Human1 rdf:type exOnt:PreschoolTeacher ; + rdf:label "Amy" ; + exOnt:nLegs "2"^^xsd:integer ; + exOnt:hasPet ex:Pet1 . + +ex:Pet1 rdf:type exOnt:Goanna ; + rdf:label "Sebastian" ; + exOnt:nLegs "4"^^xsd:integer . + +ex:Human2 rdf:type exOnt:PreschoolTeacher ; + rdf:label "JoAnne" ; + exOnt:nLegs "2"^^xsd:integer ; + exOnt:hasPet ex:Pet2 . + +ex:Pet2 rdf:type exOnt:Turtle ; + rdf:label "Terrance" ; + exOnt:nLegs "4"^^xsd:integer . + +""" + +data_file_text_bad = """ +@prefix rdf: . +@prefix xsd: . +@prefix exOnt: . +@prefix ex: . + +ex:Human1 rdf:type exOnt:PreschoolTeacher ; + rdf:label "Amy" ; + exOnt:nLegs "2"^^xsd:integer ; + exOnt:hasPet "Sebastian"^^xsd:string . + +ex:Pet1 rdf:type exOnt:Goanna ; + rdf:label "Sebastian" ; + exOnt:nLegs "four"^^xsd:string . + +ex:Human2 rdf:type exOnt:PreschoolTeacher ; + rdf:label "JoAnne" ; + exOnt:nLegs "2"^^xsd:integer ; + exOnt:hasPet "Terrance"^^xsd:string . + +ex:Pet2 rdf:type exOnt:Turtle ; + rdf:label "Terrance" ; + exOnt:nLegs "four"^^xsd:string . + +""" + + +def test_validate_pass_manual_targeting_focus(): + res = validate( + data_file_text, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + focus_nodes=["ex:Human1"], + debug=True, + ) + conforms, graph, string = res + assert "Results (1)" not in string + assert conforms + + +def test_validate_fail_manual_targeting_focus(): + res = validate( + data_file_text_bad, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + focus_nodes=["ex:Human1"], + debug=True, + ) + conforms, graph, string = res + assert "Results (1)" in string + assert not conforms + + +def test_validate_fail_manual_targeting_shape(): + res = validate( + data_file_text_bad, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + use_shapes=["exShape:HumanShape"], + debug=True, + ) + conforms, graph, string = res + assert "Results (2)" in string + assert not conforms + + +def test_validate_fail_manual_targeting_focus_with_shape(): + res = validate( + data_file_text_bad, + shacl_graph=shacl_file_text, + data_graph_format='turtle', + shacl_graph_format='turtle', + ont_graph=ontology_file_text, + ont_graph_format="turtle", + inference='both', + focus_nodes=["ex:Human1"], + use_shapes=["exShape:HumanShape"], + debug=True, + ) + conforms, graph, string = res + assert "Results (1)" in string + assert not conforms From 5f8c1550bf23c1684d30074c349ffb97de5f96fc Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 28 Sep 2024 11:22:32 +1000 Subject: [PATCH 04/11] small wording changes in CLI help strings --- pyshacl/cli.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pyshacl/cli.py b/pyshacl/cli.py index 1e4b656..c82dc55 100644 --- a/pyshacl/cli.py +++ b/pyshacl/cli.py @@ -147,13 +147,18 @@ def str_is_true(s_var: str): help="The maximum number of SHACL shapes \"deep\" that the validator can go before reaching an \"endpoint\" constraint.", ) parser.add_argument( - '-d', '--debug', dest='debug', action='store_true', default=False, help='Output additional runtime messages.' + '-d', + '--debug', + dest='debug', + action='store_true', + default=False, + help='Output additional verbose runtime messages.', ) parser.add_argument( '--focus', dest='focus', action='store', - help='The IRI of a focus node from the DataGraph, the shapes will validate only that node.', + help='Optional IRIs of focus nodes from the DataGraph, the shapes will validate only these node. Comma-separated list.', nargs="?", default=None, ) @@ -161,7 +166,7 @@ def str_is_true(s_var: str): '--shape', dest='shape', action='store', - help='The IRI of a NodeShape or PropertyShape from the SHACL ShapesGraph, only this shape will be used to validate the DataGraph.', + help='Optional IRIs of a NodeShape or PropertyShape from the SHACL ShapesGraph, only these shapes will be used to validate the DataGraph. Comma-separated list.', nargs="?", default=None, ) From 2e466c36eb86e1987fcf5bb15be7e351a8b3b6ed Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 28 Sep 2024 13:49:11 +1000 Subject: [PATCH 05/11] Don't include memory addresses in stringified Literal values. This aides in comparing results across runs. --- pyshacl/rdfutil/stringify.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pyshacl/rdfutil/stringify.py b/pyshacl/rdfutil/stringify.py index 92ceb5d..2677fd7 100644 --- a/pyshacl/rdfutil/stringify.py +++ b/pyshacl/rdfutil/stringify.py @@ -95,15 +95,19 @@ def stringify_list(node: rdflib.BNode) -> str: def stringify_literal(graph: rdflib.Graph, node: rdflib.Literal, ns_manager: Optional[NamespaceManager] = None): - lit_val_string = str(node.value) - lex_val_string = str(node) + lit_val_string: Union[str, None] = None if node.value is None else str(node.value) + lex_string = str(node) if ns_manager is None: # pragma: no cover ns_manager = graph.namespace_manager ns_manager.bind("sh", SH) - if lit_val_string != lex_val_string: - val_string = "\"{}\" = {}".format(lex_val_string, lit_val_string) + if lit_val_string is not None: + i_at = lit_val_string.find(" object at 0x") + if i_at > 0: + lit_val_string = lit_val_string[:i_at] + if lit_val_string is not None and lit_val_string != lex_string: + val_string = "\"{}\" = {}".format(lex_string, lit_val_string) else: - val_string = "\"{}\"".format(lex_val_string) + val_string = "\"{}\"".format(lex_string) if node.language: lang_string = ", lang={}".format(str(node.language)) else: From fb248520d622555354af782277f58e552e5b279e Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 28 Sep 2024 14:37:43 +1000 Subject: [PATCH 06/11] Add Focus-filtering and Shape-selection words to Changelog --- CHANGELOG.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e38041..2aaa337 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,18 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d ## [Unreleased] ### Added -- Focus Node mode! +- Focus Node Filtering - You can now pass in a list of focus nodes to the validator, and it will only validate those focus nodes. - Note, you still need to pass in a SHACL Shapes Graph, and the shapes still need to target the focus nodes. - This feature will filter the Shapes' targeted focus nodes to include only those that are in the list of specified focus nodes. +- SHACL Shape selection + - You can now pass in a list of SHACL Shapes to the validator, and it will use only those Shapes for validation. + - This is useful for testing new shapes in your shapes graph, or for many other procedure-driven use cases. +- Combined Shape Selection with Focus Node filtering + - The combination of the above two new features is especially powerful. + - If you give the validator a list of Shapes to use, and a list of focus nodes, the validator will operate in + a highly-targeted mode, it feeds those focus nodes directly into those given Shapes for validation. + - In this mode, the selected SHACL Shape does not need to specify any focus-targeting mechanisms of its own. ### Changed - Don't make a clone of the DataGraph if the input data graph is ephemeral. @@ -23,6 +31,7 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d - Removes some complexity from the main `validate` function. - Typing - A whole swathe of python typing fixes and new type annotations. Thanks @ajnelson-nist + ### Fixed - Fix logic determining if a datagraph is ephemeral. From b8a4603e2942f247b7c83a5248fb88f0062d2d1b Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 28 Sep 2024 14:51:36 +1000 Subject: [PATCH 07/11] Add docs for Focus Node filtering and SHACL Shape selection to the README --- README.md | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 18707b8..84d8fe3 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,12 @@ optional arguments: The maximum number of SHACL shapes "deep" that the validator can go before reaching an "endpoint" constraint. - -d, --debug Output additional runtime messages. + -d, --debug Output additional verbose runtime messages. + --focus [FOCUS] Optional IRIs of focus nodes from the DataGraph, the shapes will + validate only these node. Comma-separated list. + --shape [SHAPE] Optional IRIs of a NodeShape or PropertyShape from the SHACL + ShapesGraph, only these shapes will be used to validate the + DataGraph. Comma-separated list. -f {human,table,turtle,xml,json-ld,nt,n3}, --format {human,table,turtle,xml,json-ld,nt,n3} Choose an output format. Default is "human". -df {auto,turtle,xml,json-ld,nt,n3}, --data-file-format {auto,turtle,xml,json-ld,nt,n3} @@ -172,8 +177,8 @@ Some other optional keyword variables available on the `validate` function: Return value: * a three-component `tuple` containing: - * `conforms`: a `bool`, indicating whether or not the `data_graph` conforms to the `shacl_graph` - * `results_graph`: a `Graph` object built according to the SHACL specification's [Validation Report](https://www.w3.org/TR/shacl/#validation-report) structure + * `conforms`: a `bool`, indicating whether the `data_graph` conforms to the `shacl_graph` + * `results_graph`: a `Graph` object built according to the SHACL specification's [Validation Report](https://www.w3.org/TR/shacl/#validation-report) scheme * `results_text`: python string representing a verbose textual representation of the [Validation Report](https://www.w3.org/TR/shacl/#validation-report) @@ -200,6 +205,23 @@ Unlike `ValidationFailure`, these errors are not passed back as a result by the caught in a `try ... except` block. In the case of `ShapeLoadError` and `ConstraintLoadError`, see the `str()` string representation of the exception instance for the error message along with a link to the relevant section in the SHACL spec document. + +## Focus Node Filtering, and Shape Selection +PySHACL v0.27.0 and above has two powerful new features: +- Focus Node Filtering + - You can pass in a list of focus nodes to the validator, and it will only validate those focus nodes. + - _Note_, you still need to use a SHACL ShapesGraph, and the Shapes _still need to target_ the focus nodes. + - This feature will filter the Shapes' targeted focus nodes to include only those that are in the list of specified focus nodes. +- SHACL Shape selection + - You can pass in a list of SHACL Shapes to the validator, and it will use only those Shapes for validation. + - This is useful for testing new shapes in your shapes graph, or for many other procedure-driven use cases. +- Combined Shape Selection with Focus Node filtering + - The combination of the above two new features is especially powerful. + - If you give the validator a list of Shapes to use, and a list of focus nodes, the validator will operate in + a highly-targeted mode, it feeds those focus nodes directly into those given Shapes for validation. + - In this mode, the selected SHACL Shape does not need to specify any focus-targeting mechanisms of its own. + + ## SPARQL Remote Graph Mode _**PySHACL now has a built-in SPARQL Remote Graph Mode, which allows you to validate a data graph that is stored on a remote server.**_ From 63a49e1e128449b20a80a747723c83773825170e Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 28 Sep 2024 14:56:02 +1000 Subject: [PATCH 08/11] Fix some wording in default messages --- pyshacl/constraints/core/logical_constraints.py | 14 +++++--------- .../constraints/core/shape_based_constraints.py | 2 +- test/issues/test_162.py | 3 ++- test/issues/test_217.py | 14 +++++++++++--- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/pyshacl/constraints/core/logical_constraints.py b/pyshacl/constraints/core/logical_constraints.py index 31ae059..edb7ce3 100644 --- a/pyshacl/constraints/core/logical_constraints.py +++ b/pyshacl/constraints/core/logical_constraints.py @@ -57,10 +57,10 @@ def constraint_name(cls): def make_generic_messages(self, datagraph: GraphLike, focus_node, value_node) -> List[rdflib.Literal]: if len(self.not_list) == 1: - m = f"Node {stringify_node(datagraph, value_node)} conforms to shape {stringify_node(self.shape.sg.graph, self.not_list[0])}" + m = f"Node {stringify_node(datagraph, value_node)} must not to shape {stringify_node(self.shape.sg.graph, self.not_list[0])}" else: nots_list = " , ".join(stringify_node(self.shape.sg.graph, n) for n in self.not_list) - m = f"Node {stringify_node(datagraph, value_node)} conforms to one or more shapes in {nots_list}" + m = f"Node {stringify_node(datagraph, value_node)} must not conform to any shapes in {nots_list}" return [rdflib.Literal(m)] def evaluate(self, executor: SHACLExecutor, datagraph: GraphLike, focus_value_nodes: Dict, _evaluation_path: List): @@ -162,7 +162,7 @@ def make_generic_messages(self, datagraph: GraphLike, focus_node, value_node) -> and_list = " , ".join( stringify_node(self.shape.sg.graph, a_c) for a in self.and_list for a_c in self.shape.sg.graph.items(a) ) - m = "Node {} does not conform to all shapes in {}".format(stringify_node(datagraph, value_node), and_list) + m = "Node {} must conform to all shapes in {}".format(stringify_node(datagraph, value_node), and_list) return [rdflib.Literal(m)] def evaluate( @@ -258,9 +258,7 @@ def make_generic_messages(self, datagraph: GraphLike, focus_node, value_node) -> or_list = " , ".join( stringify_node(self.shape.sg.graph, o_c) for o in self.or_list for o_c in self.shape.sg.graph.items(o) ) - m = "Node {} does not conform to one or more shapes in {}".format( - stringify_node(datagraph, value_node), or_list - ) + m = "Node {} must conform to one or more shapes in {}".format(stringify_node(datagraph, value_node), or_list) return [rdflib.Literal(m)] def evaluate( @@ -356,9 +354,7 @@ def make_generic_messages(self, datagraph: GraphLike, focus_node, value_node) -> xone_list = " , ".join( stringify_node(self.shape.sg.graph, a_c) for a in self.xone_nodes for a_c in self.shape.sg.graph.items(a) ) - m = "Node {} does not conform to exactly one shape in {}".format( - stringify_node(datagraph, value_node), xone_list - ) + m = "Node {} must conform to exactly one shape in {}".format(stringify_node(datagraph, value_node), xone_list) return [rdflib.Literal(m)] def evaluate( diff --git a/pyshacl/constraints/core/shape_based_constraints.py b/pyshacl/constraints/core/shape_based_constraints.py index acb8b41..634e3bb 100644 --- a/pyshacl/constraints/core/shape_based_constraints.py +++ b/pyshacl/constraints/core/shape_based_constraints.py @@ -167,7 +167,7 @@ def make_generic_messages(self, datagraph: GraphLike, focus_node, value_node) -> m = "Value does not conform to Shape {}.".format(stringify_node(self.shape.sg.graph, self.node_shapes[0])) else: rules = "', '".join(stringify_node(self.shape.sg.graph, c) for c in self.node_shapes) - m = "Value does not conform to every Shape in ('{}').".format(rules) + m = "Value must conform to every Shape in ('{}').".format(rules) m += " See details for more information." return [rdflib.Literal(m)] diff --git a/test/issues/test_162.py b/test/issues/test_162.py index 7175a97..6578d2a 100644 --- a/test/issues/test_162.py +++ b/test/issues/test_162.py @@ -4,6 +4,7 @@ """ import rdflib + from pyshacl import validate shacl_file = """\ @@ -63,7 +64,7 @@ def test_162() -> None: conforms, report, message = validate(data_g, shacl_graph=shapes, debug=True) assert not conforms # confirm that both nodeShapes are included in the error message - assert "Value does not conform to every Shape in ('ex:nodeShape1', 'ex:nodeShape2')" in message + assert "Value must conform to every Shape in ('ex:nodeShape1', 'ex:nodeShape2')" in message if __name__ == "__main__": diff --git a/test/issues/test_217.py b/test/issues/test_217.py index f76191f..684d891 100644 --- a/test/issues/test_217.py +++ b/test/issues/test_217.py @@ -65,11 +65,19 @@ def test_217(): shape_g = rdflib.Graph().parse(data=shapes_data, format='turtle') data_g = rdflib.Graph().parse(data=data_g_text, format="turtle") conforms, results_graph, results_text = pyshacl.validate( - data_g, shacl_graph=shape_g, debug=True, meta_shacl=False, + data_g, + shacl_graph=shape_g, + debug=True, + meta_shacl=False, ) assert not conforms - assert ("Node kb:Thing-2 conforms to shape" in results_text or "Node kb:Thing-2 conforms to one or more shapes" in results_text) and \ - ("Node kb:Thing-3 conforms to shape" in results_text or "Node kb:Thing-3 conforms to one or more shapes" in results_text) + assert ( + "Node kb:Thing-2 must not conform to shape" in results_text + or "Node kb:Thing-2 must not conform to any shapes" in results_text + ) and ( + "Node kb:Thing-3 must not conform to shape" in results_text + or "Node kb:Thing-3 must not conform to any shapes" in results_text + ) if __name__ == "__main__": From 9f90bf778149381d19bc5cca0d553cd3d4818e00 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 28 Sep 2024 14:56:24 +1000 Subject: [PATCH 09/11] enhance error output in stringify BNode from Datagraph --- pyshacl/rdfutil/stringify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyshacl/rdfutil/stringify.py b/pyshacl/rdfutil/stringify.py index 2677fd7..d446ea3 100644 --- a/pyshacl/rdfutil/stringify.py +++ b/pyshacl/rdfutil/stringify.py @@ -140,7 +140,7 @@ def find_node_named_graph(dataset, node): return g except StopIteration: continue - raise RuntimeError("Cannot find that node in any named graph.") + raise RuntimeError(f"Cannot find node {node} in any named graph.") def stringify_node( From ce22edf5a5e065a6131ba2c48947e653c8ca9f2a Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 28 Sep 2024 14:57:22 +1000 Subject: [PATCH 10/11] More complete comparison of specific parts of Results in a ValidationReports. --- pyshacl/validator_conformance.py | 81 ++++++++++++++++++++++++++++---- test/test_sht_validate.py | 37 +++++---------- 2 files changed, 84 insertions(+), 34 deletions(-) diff --git a/pyshacl/validator_conformance.py b/pyshacl/validator_conformance.py index 49dc1d0..d788b20 100644 --- a/pyshacl/validator_conformance.py +++ b/pyshacl/validator_conformance.py @@ -1,5 +1,5 @@ import logging -from typing import TYPE_CHECKING, Iterator, List, Optional, Set, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union import rdflib from rdflib import BNode, Literal, URIRef @@ -13,14 +13,17 @@ RDFS_Resource, SH_conforms, SH_detail, + SH_focusNode, SH_result, SH_resultMessage, + SH_resultPath, SH_ValidationReport, + SH_value, ) from pyshacl.errors import ReportableRuntimeError, ValidationFailure from pyshacl.functions import apply_functions, gather_functions from pyshacl.pytypes import GraphLike, RDFNode -from pyshacl.rdfutil import compare_blank_node, compare_node, order_graph_literal +from pyshacl.rdfutil import compare_blank_node, compare_node, order_graph_literal, stringify_node if TYPE_CHECKING: from pyshacl.validate import Validator @@ -70,15 +73,15 @@ def compare_validation_reports( report_graph: GraphLike, expected_graph: GraphLike, expected_result, log: logging.Logger ): expected_conforms_i = expected_graph.objects(expected_result, SH_conforms) - expected_conforms = set(cast(Iterator[Literal], expected_conforms_i)) + expected_conforms = set(expected_conforms_i) if len(expected_conforms) < 1: # pragma: no cover raise ReportableRuntimeError( "Cannot check the expected result, the given expectedResult does not have an sh:conforms." ) expected_conform = next(iter(expected_conforms)) - expected_result_nodes = expected_graph.objects(expected_result, SH_result) - expected_result_nodes_set = set(expected_result_nodes) - expected_result_node_count = len(expected_result_nodes_set) + expected_result_nodes_i = expected_graph.objects(expected_result, SH_result) + expected_result_nodes = list(expected_result_nodes_i) + expected_result_node_count = len(expected_result_nodes) validation_reports = report_graph.subjects(RDF_type, SH_ValidationReport) validation_reports_set = set(validation_reports) @@ -92,20 +95,25 @@ def compare_validation_reports( if eq != 0: return False report_conforms_i = report_graph.objects(validation_report, SH_conforms) - report_conforms = set(cast(Iterator[Literal], report_conforms_i)) + report_conforms = set(report_conforms_i) if len(report_conforms) < 1: # pragma: no cover raise ReportableRuntimeError( "Cannot check the validation report, the report graph does not have an sh:conforms." ) report_conform = next(iter(report_conforms)) - if bool(expected_conform.value) != bool(report_conform.value): + if ( + isinstance(expected_conform, Literal) + and isinstance(report_conform, Literal) + and bool(expected_conform.value) != bool(report_conform.value) + ): # TODO:coverage: write a test for this log.error("Expected Result Conforms value is different from Validation Report's Conforms value.") return False report_result_nodes_i = report_graph.objects(validation_report, SH_result) - report_result_node_count = len(set(report_result_nodes_i)) + report_result_nodes = list(report_result_nodes_i) + report_result_node_count = len(report_result_nodes) if expected_result_node_count != report_result_node_count: # TODO:coverage: write a test for this @@ -114,6 +122,61 @@ def compare_validation_reports( "Expected {}, got {}.".format(expected_result_node_count, report_result_node_count) ) return False + + expected_results_dict: Dict[Tuple[str, str, str], Any] = {} + report_results_dict: Dict[Tuple[str, str, str], Any] = {} + for result_nodes, result_graph, dest_dict in ( + (expected_result_nodes, expected_graph, expected_results_dict), + (report_result_nodes, report_graph, report_results_dict), + ): + for result in result_nodes: + result_focus_i = result_graph.objects(result, SH_focusNode) + result_focus_list = list(result_focus_i) + if len(result_focus_list) > 0: + f_node = result_focus_list[0] + if isinstance(f_node, Literal): + result_focus = str(f_node) + elif isinstance(f_node, BNode): + # result_value = "_:" + str(v_node) + # Can't compare BNodes because they are + # different in the shapes graph than the data graph + result_focus = "BNode" + else: + result_focus = stringify_node(result_graph, f_node) + else: + result_focus = "" + result_value_i = result_graph.objects(result, SH_value) + result_value_list = list(result_value_i) + if len(result_value_list) > 0: + v_node = result_value_list[0] + if isinstance(v_node, Literal): + result_value = str(v_node) + elif isinstance(v_node, BNode): + # result_value = "_:" + str(v_node) + # Can't compare BNodes because they are + # different in the shapes graph than the data graph + result_value = "BNode" + else: + result_value = stringify_node(result_graph, v_node) + else: + result_value = "" + result_path_i = result_graph.objects(result, SH_resultPath) + result_path_list = list(result_path_i) + if len(result_path_list) > 0: + result_path = stringify_node(result_graph, result_path_list[0]) + else: + result_path = "" + dest_dict[(result_focus, result_value, result_path)] = result + not_found_results = 0 + for expected_focus, expected_value, expected_path in expected_results_dict.keys(): + if (expected_focus, expected_value, expected_path) not in report_results_dict: + log.error( + "Expected result not found in Validation Report.\n" + "Expected focus: {}, value: {}, path: {}.".format(expected_focus, expected_value, expected_path) + ) + not_found_results += 1 + if not_found_results > 0: + return False return True diff --git a/test/test_sht_validate.py b/test/test_sht_validate.py index df6c499..5f93a56 100644 --- a/test/test_sht_validate.py +++ b/test/test_sht_validate.py @@ -1,16 +1,17 @@ # -*- coding: utf-8 -*- # import logging -from collections import defaultdict, OrderedDict import platform -import pytest +from collections import OrderedDict, defaultdict from os import path +from test.helpers import flatten_manifests, load_manifest + +import pytest +from rdflib.namespace import RDF, RDFS, Namespace + import pyshacl from pyshacl.errors import ReportableRuntimeError -from rdflib.namespace import Namespace, RDF, RDFS - from pyshacl.validator_conformance import check_sht_result -from test.helpers import load_manifest, flatten_manifests here_dir = path.abspath(path.dirname(__file__)) sht_files_dir = path.join(here_dir, 'resources', 'sht_tests') @@ -29,15 +30,11 @@ tests_found_in_manifests[m.base].extend(tests) tests_found_in_manifests = OrderedDict(sorted(tests_found_in_manifests.items())) -test_index_map = [[base, i] - for base,tests in tests_found_in_manifests.items() - for i, t in enumerate(tests)] +test_index_map = [[base, i] for base, tests in tests_found_in_manifests.items() for i, t in enumerate(tests)] # There are some tests we know will fail, but we don't want to stop deployment # if we hit them. List them here: -ALLOWABLE_FAILURES = [ - "/sparql/pre-binding/shapesGraph-001" -] +ALLOWABLE_FAILURES = ["/sparql/pre-binding/shapesGraph-001"] @pytest.mark.parametrize("base, index", test_index_map) @@ -45,11 +42,7 @@ def test_sht_all(base, index, caplog) -> None: caplog.set_level(logging.DEBUG) tests = tests_found_in_manifests[base] test = tests[index] - run_sht_test(test, { - "inference": 'rdfs', - "debug": True, - "meta_shacl": False - }) + run_sht_test(test, {"inference": 'rdfs', "debug": True, "meta_shacl": False}) @pytest.mark.parametrize("base, index", test_index_map) @@ -57,12 +50,8 @@ def test_sht_all_sparql_mode(base, index, caplog) -> None: caplog.set_level(logging.DEBUG) tests = tests_found_in_manifests[base] test = tests[index] - run_sht_test(test, { - "inference": 'none', - "debug": True, - "sparql_mode": True, - "meta_shacl": False - }) + run_sht_test(test, {"inference": 'none', "debug": True, "sparql_mode": True, "meta_shacl": False}) + def run_sht_test(sht_test, validate_args: dict) -> None: logger = logging.getLogger() # pytest uses the root logger with a capturing handler @@ -80,8 +69,7 @@ def run_sht_test(sht_test, validate_args: dict) -> None: if label: logger.info("testing: ".format(label)) try: - conforms, r_graph, r_text = pyshacl.validate( - data_file, shacl_graph=shacl_file, **validate_args) + conforms, r_graph, r_text = pyshacl.validate(data_file, shacl_graph=shacl_file, **validate_args) except (NotImplementedError, ReportableRuntimeError) as e: logger.exception(e) r_text = "" @@ -98,4 +86,3 @@ def run_sht_test(sht_test, validate_args: dict) -> None: break else: raise ae - From 3f2e0395519dfa683c88debccefe425d954069d4 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Sat, 28 Sep 2024 14:58:22 +1000 Subject: [PATCH 11/11] Fixing typing issues introduced or uncovered in Shape class and ShapesGraph after implementing the better shape targeting feature. --- pyshacl/shape.py | 61 +++++++++++++++++++++-------------------- pyshacl/shapes_graph.py | 59 +++++++++++++++++++++++---------------- 2 files changed, 67 insertions(+), 53 deletions(-) diff --git a/pyshacl/shape.py b/pyshacl/shape.py index 500d7a2..79d3d36 100644 --- a/pyshacl/shape.py +++ b/pyshacl/shape.py @@ -5,9 +5,9 @@ import sys from decimal import Decimal from time import perf_counter -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, Type, Union -from rdflib import BNode, Literal, URIRef +from rdflib import BNode, IdentifiedNode, Literal, URIRef from .consts import ( RDF_type, @@ -622,10 +622,8 @@ def validate( target_graph: GraphLike, focus: Optional[ Union[ - Tuple[Union[URIRef, BNode]], - List[Union[URIRef, BNode]], - Set[Union[URIRef, BNode]], - Union[URIRef, BNode], + Sequence[RDFNode], + RDFNode, ] ] = None, _evaluation_path: Optional[List] = None, @@ -634,40 +632,45 @@ def validate( if executor.debug: self.logger.debug(f"Skipping shape because it is deactivated: {str(self)}") return True, [] + focus_list: Sequence[RDFNode] = [] if focus is not None: lh_shape = False rh_shape = True self.logger.debug(f"Running evaluation of Shape {str(self)}") - if not isinstance(focus, (tuple, list, set)): - focus = [focus] - self.logger.debug(f"Shape was passed {len(focus)} Focus Node/s to evaluate.") - if len(focus) < 1: - return True, [] + # Passed in Focus node _can_ be a Literal, happens in PropertyShapes + # when the path resolves to a literal or set of Literals + if isinstance(focus, (IdentifiedNode, Literal)): + focus_list = [focus] + else: + focus_list = list(focus) + self.logger.debug(f"Shape was passed {len(focus_list)} Focus Node/s to evaluate.") else: lh_shape = True rh_shape = False self.logger.debug(f"Checking if Shape {str(self)} defines its own targets.") self.logger.debug("Identifying targets to find focus nodes.") if executor.sparql_mode: - focus = self.focus_nodes_sparql(target_graph, debug=executor.debug) + focus_set = self.focus_nodes_sparql(target_graph, debug=executor.debug) else: - focus = self.focus_nodes(target_graph, debug=executor.debug) - self.logger.debug(f"Found {len(focus)} Focus Nodes to evaluate.") - if len(focus) < 1: - # It's possible for shapes to have _no_ focus nodes - # (they are called in other ways) - if executor.debug: - self.logger.debug(f"Skipping shape {str(self)} because it found no focus nodes.") - return True, [] - else: - self.logger.debug(f"Running evaluation of Shape {str(self)}") + focus_set = self.focus_nodes(target_graph, debug=executor.debug) + self.logger.debug(f"Found {len(focus_list)} Focus Nodes to evaluate.") + focus_list = list(focus_set) + + if len(focus_list) < 1: + # It's possible for shapes to have _no_ focus nodes + # (they are called in other ways) + if executor.debug: + self.logger.debug(f"Skipping shape {str(self)} because it found no focus nodes.") + return True, [] + else: + self.logger.debug(f"Running evaluation of Shape {str(self)}") if executor.focus_nodes is not None and len(executor.focus_nodes) > 0: - filtered_focus_nodes = [] - for f in focus: - if f in executor.focus_nodes: - filtered_focus_nodes.append(f) - len_orig_focus = len(focus) + filtered_focus_nodes: List[Union[URIRef]] = [] + for _fo in focus_list: # type: RDFNode + if isinstance(_fo, URIRef) and _fo in executor.focus_nodes: + filtered_focus_nodes.append(_fo) + len_orig_focus = len(focus_list) len_filtered_focus = len(filtered_focus_nodes) if len_filtered_focus < 1: self.logger.debug(f"Skipping shape {str(self)} because specified focus nodes are not targeted.") @@ -676,7 +679,7 @@ def validate( self.logger.debug( f"Filtered focus nodes based on focus_nodes option. Only {len_filtered_focus} of {len_orig_focus} focus nodes remain." ) - focus = filtered_focus_nodes + focus_list = filtered_focus_nodes t1 = ct1 = 0.0 # prevent warnings about use-before-assign collect_stats = bool(executor.debug) @@ -719,7 +722,7 @@ def validate( parameters = (p for p, v in self.sg.predicate_objects(self.node) if p in search_parameters) reports = [] focus_value_nodes = self.value_nodes( - target_graph, focus, sparql_mode=executor.sparql_mode, debug=executor.debug + target_graph, focus_list, sparql_mode=executor.sparql_mode, debug=executor.debug ) filter_reports: bool = False allow_conform: bool = False diff --git a/pyshacl/shapes_graph.py b/pyshacl/shapes_graph.py index 761cd3f..dfadccc 100644 --- a/pyshacl/shapes_graph.py +++ b/pyshacl/shapes_graph.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import logging import warnings -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Union import rdflib @@ -350,15 +350,16 @@ def _build_node_shape_cache_from_list(self, shapes_list: List[rdflib.URIRef]): g = self.graph gathered_node_shapes = set() gathered_prop_shapes = set() - found_prop_shapes_paths: Dict[rdflib.URIRef, rdflib.URIRef] = dict() + found_prop_shapes_paths: Dict[Union[rdflib.URIRef, rdflib.BNode], Union[rdflib.URIRef, rdflib.BNode]] = dict() - def _gather_shapes(shapes_nodes: List[rdflib.URIRef | rdflib.BNode], recurse_depth: int = 0): + def _gather_shapes(shapes_nodes: Sequence[Union[rdflib.URIRef, rdflib.BNode]], recurse_depth: int = 0): nonlocal gathered_node_shapes, gathered_prop_shapes, found_prop_shapes_paths if recurse_depth > 10: raise ShapeLoadError( "Specified shape has too many levels of attached bnodes.", "https://www.w3.org/TR/shacl/#shapes-graph", ) + shape_expecting_preds = (SH_and, SH_not, SH_or, SH_xone, SH_property, SH_node, SH_qualifiedValueShape) for s in shapes_nodes: all_po = list(g.predicate_objects(s)) if len(all_po) < 1: @@ -370,8 +371,11 @@ def _gather_shapes(shapes_nodes: List[rdflib.URIRef | rdflib.BNode], recurse_dep else: return has_class = any(RDF_type == _p for _p, _o in all_po) - has_property = any(SH_property == _p for _p, _o in all_po) - has_node = any(SH_node == _p for _p, _o in all_po) + has_shape_expecting_p: Dict[rdflib.URIRef, bool] = {} + for _p in shape_expecting_preds: + if any(_p == _p2 for _p2, _o in all_po): + has_shape_expecting_p[_p] = True + knows_class = False if has_class: all_classes = list(g.objects(s, RDF_type)) @@ -387,10 +391,15 @@ def _gather_shapes(shapes_nodes: List[rdflib.URIRef | rdflib.BNode], recurse_dep else: knows_class = False if not knows_class: - for _p, _o in all_po: - if _p == SH_path: + for po_p, po_o in all_po: + if po_p == SH_path: + if not isinstance(po_o, (rdflib.BNode, rdflib.URIRef)): + raise ShapeLoadError( + "Found a path property with a value that is not a URIRef or BNode.", + "https://www.w3.org/TR/shacl/#property-paths", + ) has_path = True - found_prop_shapes_paths[s] = _o + found_prop_shapes_paths[s] = po_o break else: has_path = False @@ -402,17 +411,13 @@ def _gather_shapes(shapes_nodes: List[rdflib.URIRef | rdflib.BNode], recurse_dep gathered_prop_shapes.add(s) else: gathered_node_shapes.add(s) - _found_child_bnodes = [] - if has_property: - property_entries = list(g.objects(s, SH_property)) - for p_e in property_entries: - if isinstance(p_e, rdflib.BNode): - _found_child_bnodes.append(p_e) - if has_node: - node_entries = list(g.objects(s, SH_node)) - for n_e in node_entries: - if isinstance(n_e, rdflib.BNode): - _found_child_bnodes.append(n_e) + _found_child_bnodes: List[rdflib.BNode] = [] + if has_shape_expecting_p: + for _p in has_shape_expecting_p.keys(): + property_entries = list(g.objects(s, _p)) + for p_e in property_entries: + if isinstance(p_e, rdflib.BNode): + _found_child_bnodes.append(p_e) if len(_found_child_bnodes) > 0: _gather_shapes(_found_child_bnodes, recurse_depth=recurse_depth + 1) @@ -449,7 +454,13 @@ def _gather_shapes(shapes_nodes: List[rdflib.URIRef | rdflib.BNode], recurse_dep "https://www.w3.org/TR/shacl/#property-shapes", ) else: - found_prop_shapes_paths[s] = path_vals[0] + _p = path_vals[0] + if not isinstance(_p, (rdflib.BNode, rdflib.URIRef)): + raise ShapeLoadError( + "Found a path property with a value that is not a URIRef or BNode.", + "https://www.w3.org/TR/shacl/#property-paths", + ) + found_prop_shapes_paths[s] = _p node_shape_count = 0 property_shape_count = 0 @@ -457,16 +468,16 @@ def _gather_shapes(shapes_nodes: List[rdflib.URIRef | rdflib.BNode], recurse_dep if node_shape in self._node_shape_cache: # TODO:coverage: we don't have any tests where a shape is loaded twice raise ShapeLoadError("That shape has already been loaded!", "None") - s = Shape(self, node_shape, p=False, logger=self.logger) - self._node_shape_cache[node_shape] = s + _s = Shape(self, node_shape, p=False, logger=self.logger) + self._node_shape_cache[node_shape] = _s node_shape_count += 1 for prop_shape in gathered_prop_shapes: if prop_shape in self._node_shape_cache: # TODO:coverage: we don't have any tests where a shape is loaded twice raise ShapeLoadError("That shape has already been loaded!", "None") prop_shape_path = found_prop_shapes_paths[prop_shape] - s = Shape(self, prop_shape, p=True, path=prop_shape_path, logger=self.logger) - self._node_shape_cache[prop_shape] = s + _s = Shape(self, prop_shape, p=True, path=prop_shape_path, logger=self.logger) + self._node_shape_cache[prop_shape] = _s property_shape_count += 1 if self.debug: