From 334acdb188fff09f0fb8d54b7400e77aa2388ce7 Mon Sep 17 00:00:00 2001 From: Vincent Privat Date: Mon, 2 Dec 2024 11:03:55 +0100 Subject: [PATCH] Allow to provide HTTP headers --- CHANGELOG.md | 1 + README.md | 8 +++++ requirements-dev.txt | 1 + setup.py | 3 +- stac_validator/stac_validator.py | 9 ++++++ stac_validator/utilities.py | 17 ++++++----- stac_validator/validate.py | 21 +++++++++----- tests/test_header.py | 50 ++++++++++++++++++++++++++++++++ tox.ini | 4 ++- tox/Dockerfile-tox | 2 +- 10 files changed, 98 insertions(+), 18 deletions(-) create mode 100644 tests/test_header.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 77b4843c..31a6811d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) - Added publish.yml to automatically publish new releases to PyPI [#236](https://github.com/stac-utils/stac-validator/pull/236) - Configure whether to open URLs when validating assets [#238](https://github.com/stac-utils/stac-validator/pull/238) +- Allow to provide HTTP headers [#239](https://github.com/stac-utils/stac-validator/pull/239) ## [v3.4.0] - 2024-10-08 diff --git a/README.md b/README.md index 48d3b0ab..761d5eda 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,8 @@ Options: with --pages. Defaults to one page. --no-assets-urls Disables the opening of href links when validating assets (enabled by default). + --header KEY VALUE HTTP header to include in the requests. Can be used + multiple times. -p, --pages INTEGER Maximum number of pages to validate via --item- collection. Defaults to one page. -v, --verbose Enables verbose output for recursive mode. @@ -332,3 +334,9 @@ stac-validator https://spot-canada-ortho.s3.amazonaws.com/catalog.json --recursi ```bash stac-validator https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items --item-collection --pages 2 ``` + +**--header** + +```bash +stac-validator https://stac-catalog.eu/collections/sentinel-s2-l2a/items --header x-api-key $MY_API_KEY --header foo bar +``` diff --git a/requirements-dev.txt b/requirements-dev.txt index d5516dd8..aec90522 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,4 +2,5 @@ black pytest pytest-mypy pre-commit +requests-mock types-jsonschema diff --git a/setup.py b/setup.py index d8ef30da..700549f4 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ extras_require={ "dev": [ "pytest", + "requests-mock", "types-setuptools", ], }, @@ -41,5 +42,5 @@ "console_scripts": ["stac-validator = stac_validator.stac_validator:main"] }, python_requires=">=3.8", - tests_require=["pytest"], + tests_require=["pytest", "requests-mock"], ) diff --git a/stac_validator/stac_validator.py b/stac_validator/stac_validator.py index 2e8becd2..48c69be9 100644 --- a/stac_validator/stac_validator.py +++ b/stac_validator/stac_validator.py @@ -114,6 +114,12 @@ def collections_summary(message: List[Dict[str, Any]]) -> None: is_flag=True, help="Disables the opening of href links when validating assets (enabled by default).", ) +@click.option( + "--header", + type=(str, str), + multiple=True, + help="HTTP header to include in the requests. Can be used multiple times.", +) @click.option( "--pages", "-p", @@ -134,6 +140,7 @@ def main( collections: bool, item_collection: bool, no_assets_urls: bool, + header: list, pages: int, recursive: bool, max_depth: int, @@ -154,6 +161,7 @@ def main( collections (bool): Validate response from /collections endpoint. item_collection (bool): Whether to validate item collection responses. no_assets_urls (bool): Whether to open href links when validating assets (enabled by default). + headers (dict): HTTP headers to include in the requests. pages (int): Maximum number of pages to validate via `item_collection`. recursive (bool): Whether to recursively validate all related STAC objects. max_depth (int): Maximum depth to traverse when recursing. @@ -185,6 +193,7 @@ def main( links=links, assets=assets, assets_open_urls=not no_assets_urls, + headers=dict(header), extensions=extensions, custom=custom, verbose=verbose, diff --git a/stac_validator/utilities.py b/stac_validator/utilities.py index 1c70e516..8d10e6e6 100644 --- a/stac_validator/utilities.py +++ b/stac_validator/utilities.py @@ -3,7 +3,7 @@ import ssl from typing import Dict from urllib.parse import urlparse -from urllib.request import urlopen +from urllib.request import Request, urlopen import requests # type: ignore @@ -77,7 +77,7 @@ def get_stac_type(stac_content: Dict) -> str: return str(e) -def fetch_and_parse_file(input_path: str) -> Dict: +def fetch_and_parse_file(input_path: str, headers: Dict = {}) -> Dict: """Fetches and parses a JSON file from a URL or local file. Given a URL or local file path to a JSON file, this function fetches the file, @@ -87,6 +87,7 @@ def fetch_and_parse_file(input_path: str) -> Dict: Args: input_path: A string representing the URL or local file path to the JSON file. + headers: For URLs: HTTP headers to include in the request Returns: A dictionary containing the parsed contents of the JSON file. @@ -97,7 +98,7 @@ def fetch_and_parse_file(input_path: str) -> Dict: """ try: if is_url(input_path): - resp = requests.get(input_path) + resp = requests.get(input_path, headers=headers) resp.raise_for_status() data = resp.json() else: @@ -150,9 +151,7 @@ def set_schema_addr(version: str, stac_type: str) -> str: def link_request( - link: Dict, - initial_message: Dict, - open_urls: bool = True, + link: Dict, initial_message: Dict, open_urls: bool = True, headers: Dict = {} ) -> None: """Makes a request to a URL and appends it to the relevant field of the initial message. @@ -161,6 +160,7 @@ def link_request( initial_message: A dictionary containing lists for "request_valid", "request_invalid", "format_valid", and "format_invalid" URLs. open_urls: Whether to open link href URL + headers: HTTP headers to include in the request Returns: None @@ -169,11 +169,12 @@ def link_request( if is_url(link["href"]): try: if open_urls: + request = Request(link["href"], headers=headers) if "s3" in link["href"]: context = ssl._create_unverified_context() - response = urlopen(link["href"], context=context) + response = urlopen(request, context=context) else: - response = urlopen(link["href"]) + response = urlopen(request) status_code = response.getcode() if status_code == 200: initial_message["request_valid"].append(link["href"]) diff --git a/stac_validator/validate.py b/stac_validator/validate.py index b48d3c06..030a1cf2 100644 --- a/stac_validator/validate.py +++ b/stac_validator/validate.py @@ -34,6 +34,7 @@ class StacValidate: links (bool): Whether to additionally validate links (only works in default mode). assets (bool): Whether to additionally validate assets (only works in default mode). assets_open_urls (bool): Whether to open assets URLs when validating assets. + headers (dict): HTTP headers to include in the requests. extensions (bool): Whether to only validate STAC object extensions. custom (str): The local filepath or remote URL of a custom JSON schema to validate the STAC object. verbose (bool): Whether to enable verbose output in recursive mode. @@ -56,6 +57,7 @@ def __init__( links: bool = False, assets: bool = False, assets_open_urls: bool = True, + headers: dict = {}, extensions: bool = False, custom: str = "", verbose: bool = False, @@ -70,6 +72,7 @@ def __init__( self.links = links self.assets = assets self.assets_open_urls = assets_open_urls + self.headers: Dict = headers self.recursive = recursive self.max_depth = max_depth self.extensions = extensions @@ -125,7 +128,9 @@ def assets_validator(self) -> Dict: assets = self.stac_content.get("assets") if assets: for asset in assets.values(): - link_request(asset, initial_message, self.assets_open_urls) + link_request( + asset, initial_message, self.assets_open_urls, self.headers + ) return initial_message def links_validator(self) -> Dict: @@ -145,7 +150,7 @@ def links_validator(self) -> Dict: for link in self.stac_content["links"]: if not is_valid_url(link["href"]): link["href"] = root_url + link["href"][1:] - link_request(link, initial_message) + link_request(link, initial_message, True, self.headers) return initial_message @@ -345,7 +350,9 @@ def recursive_validator(self, stac_type: str) -> bool: self.stac_file = st + "/" + address else: self.stac_file = address - self.stac_content = fetch_and_parse_file(str(self.stac_file)) + self.stac_content = fetch_and_parse_file( + str(self.stac_file), self.headers + ) self.stac_content["stac_version"] = self.version stac_type = get_stac_type(self.stac_content).lower() @@ -414,7 +421,7 @@ def validate_collections(self) -> None: Returns: None """ - collections = fetch_and_parse_file(str(self.stac_file)) + collections = fetch_and_parse_file(str(self.stac_file), self.headers) for collection in collections["collections"]: self.schema = "" self.validate_dict(collection) @@ -437,7 +444,7 @@ def validate_item_collection(self) -> None: """ page = 1 print(f"processing page {page}") - item_collection = fetch_and_parse_file(str(self.stac_file)) + item_collection = fetch_and_parse_file(str(self.stac_file), self.headers) self.validate_item_collection_dict(item_collection) try: if self.pages is not None: @@ -450,7 +457,7 @@ def validate_item_collection(self) -> None: next_link = link["href"] self.stac_file = next_link item_collection = fetch_and_parse_file( - str(self.stac_file) + str(self.stac_file), self.headers ) self.validate_item_collection_dict(item_collection) break @@ -489,7 +496,7 @@ def run(self) -> bool: and not self.item_collection and not self.collections ): - self.stac_content = fetch_and_parse_file(self.stac_file) + self.stac_content = fetch_and_parse_file(self.stac_file, self.headers) stac_type = get_stac_type(self.stac_content).upper() self.version = self.stac_content["stac_version"] diff --git a/tests/test_header.py b/tests/test_header.py new file mode 100644 index 00000000..4a9a63e4 --- /dev/null +++ b/tests/test_header.py @@ -0,0 +1,50 @@ +""" +Description: Test --header option + +""" + +import json + +import requests_mock + +from stac_validator import stac_validator + + +def test_header(): + stac_file = "tests/test_data/v110/simple-item.json" + url = "https://localhost/" + stac_file + + no_headers = {} + valid_headers = {"x-api-key": "a-valid-api-key"} + + with requests_mock.Mocker(real_http=True) as mock, open(stac_file) as json_data: + mock.get(url, request_headers=no_headers, status_code=403) + mock.get(url, request_headers=valid_headers, json=json.load(json_data)) + + stac = stac_validator.StacValidate(url, core=True, headers=valid_headers) + stac.run() + assert stac.message == [ + { + "version": "1.1.0", + "path": "https://localhost/tests/test_data/v110/simple-item.json", + "schema": [ + "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/item.json" + ], + "valid_stac": True, + "asset_type": "ITEM", + "validation_method": "core", + } + ] + + stac = stac_validator.StacValidate(url, core=True, headers=no_headers) + stac.run() + assert stac.message == [ + { + "version": "", + "path": "https://localhost/tests/test_data/v110/simple-item.json", + "schema": [""], + "valid_stac": False, + "error_type": "HTTPError", + "error_message": "403 Client Error: None for url: https://localhost/tests/test_data/v110/simple-item.json", + } + ] diff --git a/tox.ini b/tox.ini index db9f8c70..c7534888 100644 --- a/tox.ini +++ b/tox.ini @@ -2,5 +2,7 @@ envlist = py38,py39,py310,py311 [testenv] -deps = pytest +deps = + pytest + requests-mock commands = pytest \ No newline at end of file diff --git a/tox/Dockerfile-tox b/tox/Dockerfile-tox index a24ddcfb..08ace408 100644 --- a/tox/Dockerfile-tox +++ b/tox/Dockerfile-tox @@ -4,5 +4,5 @@ COPY . /code/ RUN export LC_ALL=C.UTF-8 && \ export LANG=C.UTF-8 && \ pip3 install . && \ - pip3 install tox==4.0.11 && \ + pip3 install tox==4.23.2 && \ tox \ No newline at end of file