From 3f113cbd827e154d35ce07950ca5038016934c33 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Tue, 17 Oct 2023 19:04:06 +0100 Subject: [PATCH 1/5] feat(checkquery): tool for checking SPARQL (resolves #48) Tool can check all, changed or single SPARQL query files --- src/scribe_data/checkquery.py | 442 ++++++++++++++++++++++++++++++++++ tests/load/test_checkquery.py | 359 +++++++++++++++++++++++++++ 2 files changed, 801 insertions(+) create mode 100755 src/scribe_data/checkquery.py create mode 100755 tests/load/test_checkquery.py diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py new file mode 100755 index 000000000..bef1e448e --- /dev/null +++ b/src/scribe_data/checkquery.py @@ -0,0 +1,442 @@ +#!/usr/bin/env python3 + +"""Command line tool for testing SPARQl queries against an endpoint.""" + +import argparse +import math +import os +import subprocess +import sys +import time +import urllib.request +from dataclasses import dataclass +from http import HTTPStatus +from pathlib import Path +from typing import Optional +from urllib.error import HTTPError + +import SPARQLWrapper as SPARQL +from SPARQLWrapper import SPARQLExceptions +from tqdm.auto import tqdm + + +EXIT_SUCCESS = 0 +EXIT_FAILURE = 1 +EXIT_CLI_ERROR = 2 + +PROJECT_ROOT = "Scribe-Data" + + +@dataclass(repr=False, frozen=True) +class QueryFile: + """Holds a reference to a file containing a SPARQL query.""" + + path: Path + + def load(self, limit: int) -> str: + """Load the SPARQL query from 'path' into a string. + + Args: + limit (int): the maximum number of results a query should return. + + Returns: + str: the SPARQL query. + """ + with open(self.path, encoding="utf-8") as in_stream: + return f"{in_stream.read()}\nLIMIT {limit}\n" + + def __repr__(self) -> str: + return f"{self.__class__.__name__}(path={self.path})" + + +class QueryExecutionException(Exception): + """Raised when execution of a query fails.""" + + def __init__(self, message: str, query: QueryFile) -> None: + """ + Args: + message (str): why the query failed. + query (QueryFile): the query that failed. + """ + self.message = message + self.query = query + super().__init__(self.message) + + def __str__(self) -> str: + return f"{self.query.path} : {self.message}" + + +def ping(url: str, timeout: int) -> bool: + """Test if a URL is reachable. + + Args: + url (str): the URL to test. + timeout (int): the maximum number of seconds to wait for a reply. + + Returns: + bool: True if connectivity established. False otherwise. + """ + try: + with urllib.request.urlopen(url, timeout=timeout) as response: + return response.getcode() == HTTPStatus.OK + except (HTTPError, Exception) as err: + print(f"{type(err).__name__}: {str(err)}", file=sys.stderr) + + return False + + +def all_queries() -> list[QueryFile]: + """All the SPARQL queries in, and below, 'Scribe-Data/'. + + Returns: + list[QueryFile]: the SPARQL query files. + """ + parts = Path(__file__).resolve().parts + prj_root_idx = parts.index(PROJECT_ROOT) + prj_root = str(Path(*parts[: prj_root_idx + 1])) + + queries: list[QueryFile] = [] + + for root, _, fnames in os.walk(prj_root): + for fname in fnames: + fpath = Path(root, fname) + if fpath.suffix == ".sparql": + queries.append(QueryFile(fpath)) + + return queries + + +def changed_queries() -> Optional[list[QueryFile]]: + """Find all the SPARQL queries that have changed. + + Includes new queries. + + Returns: + Optional[list[QueryFile]]: list of changed/new SPARQL queries or None if error. + """ + + result = subprocess.run( + ( + "git", + "status", + "--short", + ), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + check=False, + ) + + if result.returncode != EXIT_SUCCESS: + print(f"ERROR: {result.stderr}", file=sys.stderr) + return None + + # example 'result.stdout' + # + # M a/b/c/changed.sparql + # ?? ../new.sparql + + changed_files = [ + Path(norm_line.split(maxsplit=1)[1]).resolve() + for line in result.stdout.split("\n") + if (norm_line := line.strip()) + ] + + return [QueryFile(fpath) for fpath in changed_files if fpath.suffix == ".sparql"] + + +def sparql_context(url: str) -> SPARQL.SPARQLWrapper: + """Configure a SPARQL context. + + A context allows the execution of SPARQL queries. + + Args: + url (str): a valid URL of a SPARQL endpoint. + + Returns: + SPARQLWrapper: the context. + """ + context = SPARQL.SPARQLWrapper(url) + context.setReturnFormat(SPARQL.JSON) + context.setMethod(SPARQL.POST) + + return context + + +def execute( + query: QueryFile, limit: int, context: SPARQL.SPARQLWrapper, tries: int = 3 +) -> dict: + """Execute a SPARQL query in a given context. + + Args: + query (QueryFile): the SPARQL query to run. + limit (int): the maximum number of results a query should return. + context (SPARQLWrapper): the SPARQL context. + tries (int): the maximum number of times the query should be executed + after failure. + + Returns: + dict: results of the query. + """ + + def delay_in_seconds() -> int: + """How long to wait, in seconds, between executing repeat queries.""" + return int(math.ceil(10.0 / math.sqrt(tries))) + + if tries <= 0: + raise QueryExecutionException("Failed too many times.", query) + + try: + context.setQuery(query.load(limit)) + result = context.query().convert() + return result if result else [] + + except HTTPError: + time.sleep(delay_in_seconds()) + return execute(query, limit, context, tries - 1) + + except SPARQLExceptions.SPARQLWrapperException as err: + raise QueryExecutionException(err.msg, query) from err + + except Exception as err: + raise QueryExecutionException( + f"{type(err).__name__} - {str(err)}", query + ) from err + + +def check_sparql_file(query_file: str) -> Path: + """Check meta information of SPARQL query file. + + Args: + query_file (str): the file to validate. + + Returns: + Path: the validated file. + """ + fpath = Path(query_file) + + if not fpath.is_file(): + raise argparse.ArgumentTypeError(f"Not a valid file path: {fpath}") + + if fpath.suffix != ".sparql": + raise argparse.ArgumentTypeError(f"{fpath} does not have a '.sparql' extension") + + return fpath + + +def check_positive_number(value: str, err_msg: str) -> int: + """Ensure 'value' is a positive number. + + Args: + value (str): the value to be validated. + err_msg (str): used when value fails validation. + + Raises: + argparse.ArgumentTypeError + + Returns: + int: the validated number. + """ + try: + number = int(value) + if number >= 1: + return number + except ValueError: + pass + + raise argparse.ArgumentTypeError(err_msg) + + +def check_limit(limit: str) -> int: + """Validate the 'limit' argument. + + Args: + limit (str): the LIMIT to be validated. + + Raises: + argparse.ArgumentTypeError + + Returns: + int: the validated LIMIT + """ + return check_positive_number( + limit, "LIMIT must be an integer of value 1 or greater." + ) + + +def check_timeout(timeout: str) -> int: + """Validate the 'timeout' argument. + + Args: + timeout (str): the timeout to be validated. + + Raises: + argparse.ArgumentTypeError + + Returns: + int: the validated timeout. + """ + return check_positive_number( + timeout, "timeout must be an integer of value 1 or greater." + ) + + +def main(argv=None) -> int: + """The main function. + + Args: + argv : If set to None then argparse will use sys.argv as the arguments. + + Returns: + int: the exit status - 0: success, any other value - failure. + """ + cli = argparse.ArgumentParser( + description=f"run SPARQL queries from the '{PROJECT_ROOT}' project", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + group = cli.add_mutually_exclusive_group(required=True) + + group.add_argument( + "-c", + "--changed", + action="store_true", + help="run only changed/new SPARQL queries", + ) + + group.add_argument( + "-a", "--all", action="store_true", help="run all SPARQL queries" + ) + + group.add_argument( + "-f", + "--file", + help="path to a file containing a valid SPARQL query", + type=check_sparql_file, + ) + + group.add_argument( + "-p", + "--ping", + action="store_true", + default=False, + help="check responsiveness of endpoint", + ) + + cli.add_argument( + "--timeout", + type=check_timeout, + default=10, + help="maximum number of seconds to wait for a response from the endpoint when 'pinging'", + ) + + cli.add_argument( + "-e", + "--endpoint", + type=str, + default="https://query.wikidata.org/sparql", + help="URL of the SPARQL endpoint", + ) + + cli.add_argument( + "-l", + "--limit", + type=check_limit, + default=5, + help="the maximum number or results a query should return", + ) + + cli.add_argument( + "-v", + "--verbose", + action="store_true", + default=False, + help="increase output verbosity", + ) + + args = cli.parse_args(argv) + + endpoint = args.endpoint + + if args.ping: + if ping(endpoint, args.timeout): + print(f"Success: pinged '{endpoint}'") + return EXIT_SUCCESS + + print( + f"FAILURE: unable to contact '{endpoint}'. Network problems? " + "Malformed URL? Increase timeout?", + file=sys.stderr, + ) + return EXIT_FAILURE + + queries = None + + if args.all: + queries = all_queries() + elif args.changed: + queries = changed_queries() + elif args.file: + queries = [QueryFile(args.file)] + else: + assert False, "Unknown option" + + if queries is None: + return EXIT_FAILURE + + context = sparql_context(endpoint) + + failures = [] + successes = [] + + for query in tqdm(queries, position=0): + try: + results = execute(query, args.limit, context) + successes.append((query, results)) + except QueryExecutionException as err: + failures.append(err) + + success_report(successes, display=args.verbose) + error_report(failures) + + print("\nSummary") + print( + f"\tQueries run: {len(queries)}, passed: {len(successes)}, failed: {len(failures)}\n" + ) + + return EXIT_FAILURE if failures else EXIT_SUCCESS + + +def error_report(failures: list[QueryExecutionException]) -> None: + """Report failed queries. + + Args: + failures (list[QueryExecutionException]): failed queries. + """ + if not failures: + return + + qword = "query" if len(failures) == 1 else "queries" + print(f"\nFollowing {qword} failed:\n", file=sys.stderr) + for failed_query in failures: + print(str(failed_query), file=sys.stderr) + + +def success_report(successes: list[tuple[QueryFile, dict]], display: bool) -> None: + """Report successful queries. + + Args: + successes (list[tuple[QueryFile, dict]]): successful queries. + display (bool): should there be output? + """ + if not (display and successes): + return + + qword = "query" if len(successes) == 1 else "queries" + print(f"\nFollowing {qword} ran successfully:\n") + for query, results in successes: + print(f"{query.path} returned: {results}") + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/load/test_checkquery.py b/tests/load/test_checkquery.py new file mode 100755 index 000000000..ad69c93c0 --- /dev/null +++ b/tests/load/test_checkquery.py @@ -0,0 +1,359 @@ +import argparse +from http import HTTPStatus +from urllib.error import HTTPError + +from unittest.mock import MagicMock, mock_open, patch +from pathlib import Path + +import pytest + +from scribe_data.checkquery import ( + all_queries, + changed_queries, + check_limit, + check_timeout, + check_sparql_file, + error_report, + execute, + main, + QueryExecutionException, + QueryFile, + ping, + success_report, +) + +S_PATH = "/root/project/src/dir/query.sparql" +A_PATH = Path(S_PATH) + + +@pytest.fixture +def a_query(): + return QueryFile(A_PATH) + + +# Query +def test_full_path(a_query): + assert a_query.path == A_PATH + + +@patch("builtins.open", new_callable=mock_open, read_data="QUERY") +def test_query_load(_, a_query): + assert a_query.load(12) == "QUERY\nLIMIT 12\n" + + +def test_query_equals(a_query): + assert a_query == QueryFile(A_PATH) + + +def test_query_not_equals(a_query): + assert a_query != QueryFile("/root/project/src/Dir/query.sparql") + + +def test_query_not_equals_object(a_query): + assert a_query != object() + + +def test_query_str(a_query): + assert str(a_query) == "QueryFile(path=/root/project/src/dir/query.sparql)" + + +def test_query_repr(a_query): + assert repr(a_query) == "QueryFile(path=/root/project/src/dir/query.sparql)" + + +def test_query_execution_exception(a_query): + exception = QueryExecutionException("failure", a_query) + assert str(exception) == f"{S_PATH} : failure" + + +# ping +@patch("urllib.request.urlopen") +def test_ping_pass(mock_urlopen): + mock_urlopen.return_value.__enter__.return_value.getcode.return_value = ( + HTTPStatus.OK + ) + + assert ping("http://www.python.org", 0) + + +@patch("urllib.request.urlopen") +def test_ping_httperror_fail(mock_urlopen): + mock_urlopen.return_value.__enter__.side_effect = HTTPError + + assert not ping("http://www.python.org", 0) + + +@patch("urllib.request.urlopen") +def test_ping_exception_fail(mock_urlopen): + mock_urlopen.return_value.__enter__.side_effect = Exception + + assert not ping("http://www.python.org", 0) + + +@patch("urllib.request.urlopen") +def test_ping_fail(mock_urlopen): + mock_urlopen.return_value.__enter__.return_value.getcode.return_value = ( + HTTPStatus.BAD_REQUEST + ) + assert not ping("http://www.python.org", 0) + + +# check_sparql_file +@patch.object(Path, "is_file", return_value=True) +def test_check_sparql_file_exists(_): + assert check_sparql_file(S_PATH) == A_PATH + + +@patch.object(Path, "is_file", return_value=False) +def test_check_sparql_file_not_exists(_): + with pytest.raises(argparse.ArgumentTypeError) as err: + _ = check_sparql_file(S_PATH) + + assert str(err.value) == f"Not a valid file path: {S_PATH}" + + +@patch.object(Path, "is_file", return_value=True) +def test_check_sparql_file_not_sparql_extension(_): + fpath = Path("/root/query.txt") + with pytest.raises(argparse.ArgumentTypeError) as err: + _ = check_sparql_file(fpath) + + assert str(err.value) == f"{fpath} does not have a '.sparql' extension" + + +# changed_queries +@pytest.mark.parametrize( + "git_status, expected", + [ + ("", []), + ("\n", []), + ("?? README.md \n M .gitignore", []), + (" M /a/b/a.sparql", [QueryFile(Path("/a/b/a.sparql"))]), + ( + " M /a/with space/a.sparql \n ", + [QueryFile(Path("/a/with space/a.sparql"))], + ), + (" M /a/a.sparql\nM a.txt", [QueryFile(Path("/a/a.sparql"))]), + ( + " M /a/a.sparql\nM /a/a.txt\n ?? /a/b.sparql", + [QueryFile(Path("/a/a.sparql")), QueryFile(Path("/a/b.sparql"))], + ), + ], +) +@patch("subprocess.run") +def test_changed_queries(mock_run, git_status, expected): + mock_result = MagicMock() + mock_result.configure_mock(**{"returncode": 0, "stdout": git_status}) + + mock_run.return_value = mock_result + assert changed_queries() == expected + + +@patch("subprocess.run") +def test_changed_queries_failure(mock_run, capsys): + mock_result = MagicMock() + mock_result.configure_mock(**{"returncode": 1, "stderr": "no git"}) + + mock_run.return_value = mock_result + assert changed_queries() is None + + err_out = capsys.readouterr().err + assert "ERROR: no git" == err_out.strip() + + +# all_queries +@pytest.mark.parametrize( + "tree, expected", + [ + ( + [ + ("/root", ("src",), ("README.txt",)), + ("/root/src", (), ("spam.sh", "eggs.py")), + ], + [], + ), + ( + [ + ("/root", ("src",), ("a.sparql",)), + ("/root/src", (), ("sparql.pdf", "b.sparql")), + ], + [ + QueryFile(Path("/root/a.sparql")), + QueryFile(Path("/root/src/b.sparql")), + ], + ), + ], +) +def test_all_queries(tree, expected): + with patch("os.walk") as mock_walk: + mock_walk.return_value = tree + assert all_queries() == expected + + +# execute + + +def test_execute(a_query): + with pytest.raises(QueryExecutionException) as err: + _ = execute(a_query, 1, None, 0) + + assert str(err) == f"{a_query.path} : Failed too many times." + + +# check_limit +@pytest.mark.parametrize( + "candidate, limit", + [ + ("1", 1), + ("34", 34), + ("1000", 1000), + ], +) +def test_check_limit_pos(candidate, limit): + assert check_limit(candidate) == limit + + +@pytest.mark.parametrize( + "candidate", + [ + "0", + "-1", + "a", + "word", + ], +) +def test_check_limit_neg(candidate): + with pytest.raises(argparse.ArgumentTypeError) as err: + _ = check_limit(candidate) + + assert str(err.value) == "LIMIT must be an integer of value 1 or greater." + + +# check_timeout +@pytest.mark.parametrize( + "candidate, timeout", + [ + ("1", 1), + ("9", 9), + ("8888", 8888), + ], +) +def test_check_timeout_pos(candidate, timeout): + assert check_timeout(candidate) == timeout + + +@pytest.mark.parametrize( + "candidate", + [ + "0", + "-1", + "O", + "ten", + ], +) +def test_check_timeout_neg(candidate): + with pytest.raises(argparse.ArgumentTypeError) as err: + _ = check_timeout(candidate) + + assert str(err.value) == "timeout must be an integer of value 1 or greater." + + +# main + + +@pytest.mark.parametrize("arg", ["-h", "--help"]) +def test_main_help(arg): + with pytest.raises(SystemExit) as err: + _ = main(arg) + assert err.code == 0 + + +@pytest.mark.parametrize( + "args", + [ + ["-a", "-c"], + ["-p", "-f"], + ["-p", "-c"], + ["-p", "-a"], + ["-c", "-p", "-a"], + ["-c", "-f", "-a"], + ], +) +def test_main_mutex_opts(args): + """some options cannot be used together""" + with pytest.raises(SystemExit) as err: + _ = main([args]) + assert err.code == 2 + + +def test_error_report_single(a_query, capsys): + failures = [QueryExecutionException("timeout", a_query)] + error_report(failures) + err_out = capsys.readouterr().err + + assert ( + err_out == "\nFollowing query failed:\n\n" + "/root/project/src/dir/query.sparql : timeout\n" + ) + + +def test_error_report_multiple(a_query, capsys): + failures = [ + QueryExecutionException("timeout", a_query), + QueryExecutionException("bad format", a_query), + ] + error_report(failures) + err_out = capsys.readouterr().err + + assert ( + err_out == "\nFollowing queries failed:\n\n" + "/root/project/src/dir/query.sparql : timeout\n" + "/root/project/src/dir/query.sparql : bad format\n" + ) + + +def test_error_report_no_errors(capsys): + error_report([]) + assert capsys.readouterr().err == "" + + +def test_success_report_single_display_set(a_query, capsys): + successes = [(a_query, {"a": 23})] + success_report(successes, display=True) + + out = capsys.readouterr().out + + assert ( + out == "\nFollowing query ran successfully:\n\n" + "/root/project/src/dir/query.sparql returned: {'a': 23}\n" + ) + + +def test_success_report_no_success_display_set(capsys): + success_report([], display=True) + + assert capsys.readouterr().out == "" + + +@pytest.mark.parametrize( + "successes", + [[], [(a_query, {"a": 23})], [(a_query, {"a": 23}), (a_query, {"b": 53})]], +) +def test_success_report_display_not_set(successes, capsys): + success_report(successes, display=False) + + out = capsys.readouterr().out + + assert out == "" + + +def test_success_report_multiple_display_set(a_query, capsys): + successes = [(a_query, {"a": 23}), (a_query, {"b": 57})] + success_report(successes, display=True) + out = capsys.readouterr().out + + assert ( + out == "\nFollowing queries ran successfully:\n\n" + "/root/project/src/dir/query.sparql returned: {'a': 23}\n" + "/root/project/src/dir/query.sparql returned: {'b': 57}\n" + ) From 2680a894ecccb62019062b15631e6a4ebd5d1f86 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Fri, 3 Nov 2023 14:54:26 +0000 Subject: [PATCH 2/5] feat(checkquery.py): address review comments (resolves #48) --- src/scribe_data/checkquery.py | 79 ++++++++++++++++++++++------------- tests/load/test_checkquery.py | 4 +- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py index bef1e448e..1d27f0293 100755 --- a/src/scribe_data/checkquery.py +++ b/src/scribe_data/checkquery.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 -"""Command line tool for testing SPARQl queries against an endpoint.""" +""" +Command line tool for testing SPARQl queries against an endpoint. +""" import argparse import math @@ -29,12 +31,15 @@ @dataclass(repr=False, frozen=True) class QueryFile: - """Holds a reference to a file containing a SPARQL query.""" + """ + Holds a reference to a file containing a SPARQL query. + """ path: Path def load(self, limit: int) -> str: - """Load the SPARQL query from 'path' into a string. + """ + Load the SPARQL query from 'path' into a string. Args: limit (int): the maximum number of results a query should return. @@ -50,7 +55,9 @@ def __repr__(self) -> str: class QueryExecutionException(Exception): - """Raised when execution of a query fails.""" + """ + Raised when execution of a query fails. + """ def __init__(self, message: str, query: QueryFile) -> None: """ @@ -67,7 +74,8 @@ def __str__(self) -> str: def ping(url: str, timeout: int) -> bool: - """Test if a URL is reachable. + """ + Test if a URL is reachable. Args: url (str): the URL to test. @@ -86,7 +94,8 @@ def ping(url: str, timeout: int) -> bool: def all_queries() -> list[QueryFile]: - """All the SPARQL queries in, and below, 'Scribe-Data/'. + """ + All the SPARQL queries in, and below, 'Scribe-Data/' . Returns: list[QueryFile]: the SPARQL query files. @@ -107,7 +116,8 @@ def all_queries() -> list[QueryFile]: def changed_queries() -> Optional[list[QueryFile]]: - """Find all the SPARQL queries that have changed. + """ + Find all the SPARQL queries that have changed. Includes new queries. @@ -146,7 +156,8 @@ def changed_queries() -> Optional[list[QueryFile]]: def sparql_context(url: str) -> SPARQL.SPARQLWrapper: - """Configure a SPARQL context. + """ + Configure a SPARQL context. A context allows the execution of SPARQL queries. @@ -166,7 +177,8 @@ def sparql_context(url: str) -> SPARQL.SPARQLWrapper: def execute( query: QueryFile, limit: int, context: SPARQL.SPARQLWrapper, tries: int = 3 ) -> dict: - """Execute a SPARQL query in a given context. + """ + Execute a SPARQL query in a given context. Args: query (QueryFile): the SPARQL query to run. @@ -180,7 +192,9 @@ def execute( """ def delay_in_seconds() -> int: - """How long to wait, in seconds, between executing repeat queries.""" + """ + How long to wait, in seconds, between executing repeat queries. + """ return int(math.ceil(10.0 / math.sqrt(tries))) if tries <= 0: @@ -204,28 +218,30 @@ def delay_in_seconds() -> int: ) from err -def check_sparql_file(query_file: str) -> Path: - """Check meta information of SPARQL query file. +def check_sparql_file(fpath: str) -> Path: + """ + Check meta information of SPARQL query file. Args: - query_file (str): the file to validate. + fpath (str): the file to validate. Returns: Path: the validated file. """ - fpath = Path(query_file) + path = Path(fpath) - if not fpath.is_file(): - raise argparse.ArgumentTypeError(f"Not a valid file path: {fpath}") + if not path.is_file(): + raise argparse.ArgumentTypeError(f"Not a valid file path: {path}") - if fpath.suffix != ".sparql": - raise argparse.ArgumentTypeError(f"{fpath} does not have a '.sparql' extension") + if path.suffix != ".sparql": + raise argparse.ArgumentTypeError(f"{path} does not have a '.sparql' extension") - return fpath + return path -def check_positive_number(value: str, err_msg: str) -> int: - """Ensure 'value' is a positive number. +def check_positive_int(value: str, err_msg: str) -> int: + """ + Ensure 'value' is a positive number. Args: value (str): the value to be validated. @@ -248,7 +264,8 @@ def check_positive_number(value: str, err_msg: str) -> int: def check_limit(limit: str) -> int: - """Validate the 'limit' argument. + """ + Validate the 'limit' argument. Args: limit (str): the LIMIT to be validated. @@ -259,13 +276,12 @@ def check_limit(limit: str) -> int: Returns: int: the validated LIMIT """ - return check_positive_number( - limit, "LIMIT must be an integer of value 1 or greater." - ) + return check_positive_int(limit, "LIMIT must be an integer of value 1 or greater.") def check_timeout(timeout: str) -> int: - """Validate the 'timeout' argument. + """ + Validate the 'timeout' argument. Args: timeout (str): the timeout to be validated. @@ -276,13 +292,14 @@ def check_timeout(timeout: str) -> int: Returns: int: the validated timeout. """ - return check_positive_number( + return check_positive_int( timeout, "timeout must be an integer of value 1 or greater." ) def main(argv=None) -> int: - """The main function. + """ + The main function. Args: argv : If set to None then argparse will use sys.argv as the arguments. @@ -408,7 +425,8 @@ def main(argv=None) -> int: def error_report(failures: list[QueryExecutionException]) -> None: - """Report failed queries. + """ + Report failed queries. Args: failures (list[QueryExecutionException]): failed queries. @@ -423,7 +441,8 @@ def error_report(failures: list[QueryExecutionException]) -> None: def success_report(successes: list[tuple[QueryFile, dict]], display: bool) -> None: - """Report successful queries. + """ + Report successful queries. Args: successes (list[tuple[QueryFile, dict]]): successful queries. diff --git a/tests/load/test_checkquery.py b/tests/load/test_checkquery.py index ad69c93c0..6ca22a3e2 100755 --- a/tests/load/test_checkquery.py +++ b/tests/load/test_checkquery.py @@ -280,7 +280,9 @@ def test_main_help(arg): ], ) def test_main_mutex_opts(args): - """some options cannot be used together""" + """ + Some options cannot be used together. + """ with pytest.raises(SystemExit) as err: _ = main([args]) assert err.code == 2 From f4f992be42ecb17ebd49e387dfcbad9a4f65c265 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Fri, 3 Nov 2023 14:57:34 +0000 Subject: [PATCH 3/5] Revert "feat(checkquery.py): address review comments (resolves #48)" This reverts commit 2680a894ecccb62019062b15631e6a4ebd5d1f86. --- src/scribe_data/checkquery.py | 79 +++++++++++++---------------------- tests/load/test_checkquery.py | 4 +- 2 files changed, 31 insertions(+), 52 deletions(-) diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py index 1d27f0293..bef1e448e 100755 --- a/src/scribe_data/checkquery.py +++ b/src/scribe_data/checkquery.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 -""" -Command line tool for testing SPARQl queries against an endpoint. -""" +"""Command line tool for testing SPARQl queries against an endpoint.""" import argparse import math @@ -31,15 +29,12 @@ @dataclass(repr=False, frozen=True) class QueryFile: - """ - Holds a reference to a file containing a SPARQL query. - """ + """Holds a reference to a file containing a SPARQL query.""" path: Path def load(self, limit: int) -> str: - """ - Load the SPARQL query from 'path' into a string. + """Load the SPARQL query from 'path' into a string. Args: limit (int): the maximum number of results a query should return. @@ -55,9 +50,7 @@ def __repr__(self) -> str: class QueryExecutionException(Exception): - """ - Raised when execution of a query fails. - """ + """Raised when execution of a query fails.""" def __init__(self, message: str, query: QueryFile) -> None: """ @@ -74,8 +67,7 @@ def __str__(self) -> str: def ping(url: str, timeout: int) -> bool: - """ - Test if a URL is reachable. + """Test if a URL is reachable. Args: url (str): the URL to test. @@ -94,8 +86,7 @@ def ping(url: str, timeout: int) -> bool: def all_queries() -> list[QueryFile]: - """ - All the SPARQL queries in, and below, 'Scribe-Data/' . + """All the SPARQL queries in, and below, 'Scribe-Data/'. Returns: list[QueryFile]: the SPARQL query files. @@ -116,8 +107,7 @@ def all_queries() -> list[QueryFile]: def changed_queries() -> Optional[list[QueryFile]]: - """ - Find all the SPARQL queries that have changed. + """Find all the SPARQL queries that have changed. Includes new queries. @@ -156,8 +146,7 @@ def changed_queries() -> Optional[list[QueryFile]]: def sparql_context(url: str) -> SPARQL.SPARQLWrapper: - """ - Configure a SPARQL context. + """Configure a SPARQL context. A context allows the execution of SPARQL queries. @@ -177,8 +166,7 @@ def sparql_context(url: str) -> SPARQL.SPARQLWrapper: def execute( query: QueryFile, limit: int, context: SPARQL.SPARQLWrapper, tries: int = 3 ) -> dict: - """ - Execute a SPARQL query in a given context. + """Execute a SPARQL query in a given context. Args: query (QueryFile): the SPARQL query to run. @@ -192,9 +180,7 @@ def execute( """ def delay_in_seconds() -> int: - """ - How long to wait, in seconds, between executing repeat queries. - """ + """How long to wait, in seconds, between executing repeat queries.""" return int(math.ceil(10.0 / math.sqrt(tries))) if tries <= 0: @@ -218,30 +204,28 @@ def delay_in_seconds() -> int: ) from err -def check_sparql_file(fpath: str) -> Path: - """ - Check meta information of SPARQL query file. +def check_sparql_file(query_file: str) -> Path: + """Check meta information of SPARQL query file. Args: - fpath (str): the file to validate. + query_file (str): the file to validate. Returns: Path: the validated file. """ - path = Path(fpath) + fpath = Path(query_file) - if not path.is_file(): - raise argparse.ArgumentTypeError(f"Not a valid file path: {path}") + if not fpath.is_file(): + raise argparse.ArgumentTypeError(f"Not a valid file path: {fpath}") - if path.suffix != ".sparql": - raise argparse.ArgumentTypeError(f"{path} does not have a '.sparql' extension") + if fpath.suffix != ".sparql": + raise argparse.ArgumentTypeError(f"{fpath} does not have a '.sparql' extension") - return path + return fpath -def check_positive_int(value: str, err_msg: str) -> int: - """ - Ensure 'value' is a positive number. +def check_positive_number(value: str, err_msg: str) -> int: + """Ensure 'value' is a positive number. Args: value (str): the value to be validated. @@ -264,8 +248,7 @@ def check_positive_int(value: str, err_msg: str) -> int: def check_limit(limit: str) -> int: - """ - Validate the 'limit' argument. + """Validate the 'limit' argument. Args: limit (str): the LIMIT to be validated. @@ -276,12 +259,13 @@ def check_limit(limit: str) -> int: Returns: int: the validated LIMIT """ - return check_positive_int(limit, "LIMIT must be an integer of value 1 or greater.") + return check_positive_number( + limit, "LIMIT must be an integer of value 1 or greater." + ) def check_timeout(timeout: str) -> int: - """ - Validate the 'timeout' argument. + """Validate the 'timeout' argument. Args: timeout (str): the timeout to be validated. @@ -292,14 +276,13 @@ def check_timeout(timeout: str) -> int: Returns: int: the validated timeout. """ - return check_positive_int( + return check_positive_number( timeout, "timeout must be an integer of value 1 or greater." ) def main(argv=None) -> int: - """ - The main function. + """The main function. Args: argv : If set to None then argparse will use sys.argv as the arguments. @@ -425,8 +408,7 @@ def main(argv=None) -> int: def error_report(failures: list[QueryExecutionException]) -> None: - """ - Report failed queries. + """Report failed queries. Args: failures (list[QueryExecutionException]): failed queries. @@ -441,8 +423,7 @@ def error_report(failures: list[QueryExecutionException]) -> None: def success_report(successes: list[tuple[QueryFile, dict]], display: bool) -> None: - """ - Report successful queries. + """Report successful queries. Args: successes (list[tuple[QueryFile, dict]]): successful queries. diff --git a/tests/load/test_checkquery.py b/tests/load/test_checkquery.py index 6ca22a3e2..ad69c93c0 100755 --- a/tests/load/test_checkquery.py +++ b/tests/load/test_checkquery.py @@ -280,9 +280,7 @@ def test_main_help(arg): ], ) def test_main_mutex_opts(args): - """ - Some options cannot be used together. - """ + """some options cannot be used together""" with pytest.raises(SystemExit) as err: _ = main([args]) assert err.code == 2 From 3daa99130be6b66e76e957d14703b95f2fa7f6a0 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Fri, 3 Nov 2023 15:22:11 +0000 Subject: [PATCH 4/5] feat(checkquery.py): address review comments (resolves #48) --- src/scribe_data/checkquery.py | 74 +++++++++++++++++++++-------------- tests/load/test_checkquery.py | 4 +- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py index bef1e448e..0a18d5e42 100755 --- a/src/scribe_data/checkquery.py +++ b/src/scribe_data/checkquery.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 -"""Command line tool for testing SPARQl queries against an endpoint.""" +""" +Command line tool for testing SPARQl queries against an endpoint. +""" import argparse import math @@ -29,7 +31,8 @@ @dataclass(repr=False, frozen=True) class QueryFile: - """Holds a reference to a file containing a SPARQL query.""" + """ + Holds a reference to a file containing a SPARQL query.""" path: Path @@ -50,7 +53,9 @@ def __repr__(self) -> str: class QueryExecutionException(Exception): - """Raised when execution of a query fails.""" + """ + Raised when execution of a query fails. + """ def __init__(self, message: str, query: QueryFile) -> None: """ @@ -67,7 +72,8 @@ def __str__(self) -> str: def ping(url: str, timeout: int) -> bool: - """Test if a URL is reachable. + """ + Test if a URL is reachable. Args: url (str): the URL to test. @@ -86,7 +92,8 @@ def ping(url: str, timeout: int) -> bool: def all_queries() -> list[QueryFile]: - """All the SPARQL queries in, and below, 'Scribe-Data/'. + """ + All the SPARQL queries in, and below, 'Scribe-Data/'. Returns: list[QueryFile]: the SPARQL query files. @@ -107,7 +114,8 @@ def all_queries() -> list[QueryFile]: def changed_queries() -> Optional[list[QueryFile]]: - """Find all the SPARQL queries that have changed. + """ + Find all the SPARQL queries that have changed. Includes new queries. @@ -146,7 +154,8 @@ def changed_queries() -> Optional[list[QueryFile]]: def sparql_context(url: str) -> SPARQL.SPARQLWrapper: - """Configure a SPARQL context. + """ + Configure a SPARQL context. A context allows the execution of SPARQL queries. @@ -166,7 +175,8 @@ def sparql_context(url: str) -> SPARQL.SPARQLWrapper: def execute( query: QueryFile, limit: int, context: SPARQL.SPARQLWrapper, tries: int = 3 ) -> dict: - """Execute a SPARQL query in a given context. + """ + Execute a SPARQL query in a given context. Args: query (QueryFile): the SPARQL query to run. @@ -188,8 +198,7 @@ def delay_in_seconds() -> int: try: context.setQuery(query.load(limit)) - result = context.query().convert() - return result if result else [] + return context.queryAndConvert() except HTTPError: time.sleep(delay_in_seconds()) @@ -204,28 +213,30 @@ def delay_in_seconds() -> int: ) from err -def check_sparql_file(query_file: str) -> Path: - """Check meta information of SPARQL query file. +def check_sparql_file(fpath: str) -> Path: + """ + Check meta information of SPARQL query file. Args: - query_file (str): the file to validate. + fpath (str): the file to validate. Returns: Path: the validated file. """ - fpath = Path(query_file) + path = Path(fpath) - if not fpath.is_file(): - raise argparse.ArgumentTypeError(f"Not a valid file path: {fpath}") + if not path.is_file(): + raise argparse.ArgumentTypeError(f"Not a valid file path: {path}") - if fpath.suffix != ".sparql": - raise argparse.ArgumentTypeError(f"{fpath} does not have a '.sparql' extension") + if path.suffix != ".sparql": + raise argparse.ArgumentTypeError(f"{path} does not have a '.sparql' extension") - return fpath + return path -def check_positive_number(value: str, err_msg: str) -> int: - """Ensure 'value' is a positive number. +def check_positive_int(value: str, err_msg: str) -> int: + """ + Ensure 'value' is a positive number. Args: value (str): the value to be validated. @@ -248,7 +259,8 @@ def check_positive_number(value: str, err_msg: str) -> int: def check_limit(limit: str) -> int: - """Validate the 'limit' argument. + """ + Validate the 'limit' argument. Args: limit (str): the LIMIT to be validated. @@ -259,13 +271,12 @@ def check_limit(limit: str) -> int: Returns: int: the validated LIMIT """ - return check_positive_number( - limit, "LIMIT must be an integer of value 1 or greater." - ) + return check_positive_int(limit, "LIMIT must be an integer of value 1 or greater.") def check_timeout(timeout: str) -> int: - """Validate the 'timeout' argument. + """ + Validate the 'timeout' argument. Args: timeout (str): the timeout to be validated. @@ -276,13 +287,14 @@ def check_timeout(timeout: str) -> int: Returns: int: the validated timeout. """ - return check_positive_number( + return check_positive_int( timeout, "timeout must be an integer of value 1 or greater." ) def main(argv=None) -> int: - """The main function. + """ + The main function. Args: argv : If set to None then argparse will use sys.argv as the arguments. @@ -408,7 +420,8 @@ def main(argv=None) -> int: def error_report(failures: list[QueryExecutionException]) -> None: - """Report failed queries. + """ + Report failed queries. Args: failures (list[QueryExecutionException]): failed queries. @@ -423,7 +436,8 @@ def error_report(failures: list[QueryExecutionException]) -> None: def success_report(successes: list[tuple[QueryFile, dict]], display: bool) -> None: - """Report successful queries. + """ + Report successful queries. Args: successes (list[tuple[QueryFile, dict]]): successful queries. diff --git a/tests/load/test_checkquery.py b/tests/load/test_checkquery.py index ad69c93c0..6ca22a3e2 100755 --- a/tests/load/test_checkquery.py +++ b/tests/load/test_checkquery.py @@ -280,7 +280,9 @@ def test_main_help(arg): ], ) def test_main_mutex_opts(args): - """some options cannot be used together""" + """ + Some options cannot be used together. + """ with pytest.raises(SystemExit) as err: _ = main([args]) assert err.code == 2 From 1763a4dbcd507c1850aaddf49e0da0b893dbba21 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Fri, 3 Nov 2023 20:04:00 +0000 Subject: [PATCH 5/5] Update checkquery.py - edit docstring Final tripple quote should be on new line. --- src/scribe_data/checkquery.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py index 0a18d5e42..b2cf49602 100755 --- a/src/scribe_data/checkquery.py +++ b/src/scribe_data/checkquery.py @@ -32,7 +32,8 @@ @dataclass(repr=False, frozen=True) class QueryFile: """ - Holds a reference to a file containing a SPARQL query.""" + Holds a reference to a file containing a SPARQL query. + """ path: Path