From 6336bc7f7b85a597edd5976239ca95d59b6119c6 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 27 Dec 2023 13:30:18 -0700 Subject: [PATCH 01/27] add functionality to compare func. --- harvester/compare.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/harvester/compare.py b/harvester/compare.py index 9d1bff7a..3a8fb55d 100644 --- a/harvester/compare.py +++ b/harvester/compare.py @@ -3,9 +3,25 @@ logger = logging.getLogger("harvester") -# stub, TODO complete -def compare(compare_obj): +def compare(harvest_source, ckan_source): """Compares records""" logger.info("Hello from harvester.compare()") - return compare_obj + output = { + "create": [], + "update": [], + "delete": [], + } + + harvest_ids = set(harvest_source.keys()) + ckan_ids = set(ckan_source.keys()) + same_ids = harvest_ids & ckan_ids + + output["create"] += list(harvest_ids - ckan_ids) + output["delete"] += list(ckan_ids - harvest_ids) + + for i in same_ids: + if harvest_source[i] != ckan_source[i]: + output["update"].append(i) + + return output From 636ded52676f87c378a3e94b03745cbcc4572524 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 27 Dec 2023 13:30:28 -0700 Subject: [PATCH 02/27] add compare test --- tests/unit/compare/test_compare.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/unit/compare/test_compare.py b/tests/unit/compare/test_compare.py index b7a0560c..315e2cf4 100644 --- a/tests/unit/compare/test_compare.py +++ b/tests/unit/compare/test_compare.py @@ -4,8 +4,20 @@ def test_compare(): """tests compare""" - # stub, TODO complete - test_compare = "some test messsage" - compare_response = compare(test_compare) + harvest_source = { + "1": "de955c1b-fa16-4b84-ad6c-f891ba276056", # update + "2": "6d500ebc-19f8-4541-82b0-f02ad24c82e3", # do nothing + "3": "9aeef506-fbc4-42e4-ad27-c2e7e9f0d1c5", # create + } - assert test_compare == compare_response + ckan_source = { + "1": "fcd3428b-0ba7-48da-951d-fe44606be556", + "2": "6d500ebc-19f8-4541-82b0-f02ad24c82e3", + "4": "dae9b42c-cfc5-4f71-ae97-a5b75234b14f", # delete + } + + compare_res = compare(harvest_source, ckan_source) + + assert len(compare_res["create"]) == 1 + assert len(compare_res["update"]) == 1 + assert len(compare_res["delete"]) == 1 From bc4cfb95f7abc012a6f6944d08b76f5b07c439f3 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 27 Dec 2023 14:32:57 -0700 Subject: [PATCH 03/27] refactor to one line --- harvester/compare.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/harvester/compare.py b/harvester/compare.py index 3a8fb55d..feebcd6b 100644 --- a/harvester/compare.py +++ b/harvester/compare.py @@ -19,9 +19,6 @@ def compare(harvest_source, ckan_source): output["create"] += list(harvest_ids - ckan_ids) output["delete"] += list(ckan_ids - harvest_ids) - - for i in same_ids: - if harvest_source[i] != ckan_source[i]: - output["update"].append(i) + output["update"] += [i for i in same_ids if harvest_source[i] != ckan_source[i]] return output From 661225bcbea4297d053b5f42f422720e8ee9b070 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 27 Dec 2023 15:06:07 -0700 Subject: [PATCH 04/27] move data to fixture and refactor. --- tests/unit/compare/conftest.py | 18 ++++++++++++++++++ tests/unit/compare/test_compare.py | 16 ++-------------- 2 files changed, 20 insertions(+), 14 deletions(-) create mode 100644 tests/unit/compare/conftest.py diff --git a/tests/unit/compare/conftest.py b/tests/unit/compare/conftest.py new file mode 100644 index 00000000..ebc07305 --- /dev/null +++ b/tests/unit/compare/conftest.py @@ -0,0 +1,18 @@ +import pytest + + +@pytest.fixture +def data_sources(): + harvest_source = { + "1": "de955c1b-fa16-4b84-ad6c-f891ba276056", # update + "2": "6d500ebc-19f8-4541-82b0-f02ad24c82e3", # do nothing + "3": "9aeef506-fbc4-42e4-ad27-c2e7e9f0d1c5", # create + } + + ckan_source = { + "1": "fcd3428b-0ba7-48da-951d-fe44606be556", + "2": "6d500ebc-19f8-4541-82b0-f02ad24c82e3", + "4": "dae9b42c-cfc5-4f71-ae97-a5b75234b14f", # delete + } + + return harvest_source, ckan_source diff --git a/tests/unit/compare/test_compare.py b/tests/unit/compare/test_compare.py index 315e2cf4..712aaffe 100644 --- a/tests/unit/compare/test_compare.py +++ b/tests/unit/compare/test_compare.py @@ -1,22 +1,10 @@ from harvester.compare import compare -def test_compare(): +def test_compare(data_sources): """tests compare""" - harvest_source = { - "1": "de955c1b-fa16-4b84-ad6c-f891ba276056", # update - "2": "6d500ebc-19f8-4541-82b0-f02ad24c82e3", # do nothing - "3": "9aeef506-fbc4-42e4-ad27-c2e7e9f0d1c5", # create - } - - ckan_source = { - "1": "fcd3428b-0ba7-48da-951d-fe44606be556", - "2": "6d500ebc-19f8-4541-82b0-f02ad24c82e3", - "4": "dae9b42c-cfc5-4f71-ae97-a5b75234b14f", # delete - } - - compare_res = compare(harvest_source, ckan_source) + compare_res = compare(*data_sources) assert len(compare_res["create"]) == 1 assert len(compare_res["update"]) == 1 From 5e2dbe764aa5a52db76a95307c8f725747eb6e2b Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 27 Dec 2023 15:24:57 -0700 Subject: [PATCH 05/27] add assert on `do nothing` --- tests/unit/compare/test_compare.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/compare/test_compare.py b/tests/unit/compare/test_compare.py index 712aaffe..e9da74d1 100644 --- a/tests/unit/compare/test_compare.py +++ b/tests/unit/compare/test_compare.py @@ -9,3 +9,4 @@ def test_compare(data_sources): assert len(compare_res["create"]) == 1 assert len(compare_res["update"]) == 1 assert len(compare_res["delete"]) == 1 + assert "2" not in compare_res From a544ec09227a806d6cd269cd5bd577f39e54b45a Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:44:15 -0700 Subject: [PATCH 06/27] ignore integration tests --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 264bef9c..cc7ce6ed 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ clean-dist: ## Cleans dist dir rm -rf dist/* test: up ## Runs poetry tests, ignores ckan load - poetry run pytest --ignore=./tests/load/ckan + poetry run pytest --ignore=./tests/integration up: ## Sets up local docker environment docker compose up -d From 991166142b083341373aeb0b23920841f53c1816 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:44:27 -0700 Subject: [PATCH 07/27] add sansjson --- poetry.lock | 15 +++++++++++++-- pyproject.toml | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 70e127a8..437dbbcd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "attrs" @@ -622,6 +622,17 @@ files = [ {file = "ruff-0.0.261.tar.gz", hash = "sha256:c1c715b0d1e18f9c509d7c411ca61da3543a4aa459325b1b1e52b8301d65c6d2"}, ] +[[package]] +name = "sansjson" +version = "0.3.0" +description = "Your friendly neighborhood JSON sorter helper" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sansjson-0.3.0-py3-none-any.whl", hash = "sha256:d7acfc6fdbe1a5cb9ccff21ae114ba8c8d3f081e6884a282d014a5ed5af28958"}, + {file = "sansjson-0.3.0.tar.gz", hash = "sha256:d0dbaf53a2b412e474c58e9097819020aec2c572fb973539f10590e322d2dfd7"}, +] + [[package]] name = "setuptools" version = "69.0.2" @@ -808,4 +819,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = ">=3.10" -content-hash = "05bad374bbff6faf2eb4b3899d561c2e84516799c192ceb398d3ccde90edc41c" +content-hash = "a39784d88a5c1d6d7cf4f9e4b7f0ee98289ccedacc7b9f4e0d4daf72d54a0348" diff --git a/pyproject.toml b/pyproject.toml index 6ca5445b..85d1cbe9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ deepdiff = ">=6" pytest = ">=7.3.2" ckanapi = ">=4.7" beautifulsoup4 = "^4.12.2" +sansjson = "^0.3.0" [tool.poetry.group.dev.dependencies] pytest = "^7.3.0" From cb92509dcc4ea5eaa45d3b6c71a37a12d812f19b Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:45:09 -0700 Subject: [PATCH 08/27] add section on comparison. --- README.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/README.md b/README.md index f977fd4a..46fa7321 100644 --- a/README.md +++ b/README.md @@ -37,3 +37,36 @@ Once installed, `poetry install` installs dependencies into a local virtual envi If you followed the instructions for `CKAN load testing` and `Harvester testing` you can simply run `poetry run pytest` to run all tests. + +## Comparison +- ./tests/harvest_sources/ckan_datasets_resp.json + - represents what ckan would respond with after querying for the harvest source name +- ./tests/harvest_sources/dcatus_compare.json + - represents a changed harvest source + - what has been created? + - datasets[0] + - "identifier" = "cftc-dc10" + - what has been deleted? + - datasets[0] + - "identifier" = "cftc-dc1" + - what has been updated? + - datasets[1] + - from "modified": "R/P1M" to "modified": "R/P1M Update" + - datasets[2] + - from "keyword": ["cotton on call", "cotton on-call"] + - to "keyword": ["cotton on call", "cotton on-call", "update keyword"] + - datasets[3] + - from "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + } + - to "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "Changed Value" + } + } +- ./test/harvest_sources/dcatus.json + - represents an original harvest source prior to change occuring. \ No newline at end of file From d19bfb55ebe39e3148a142ae3ccafbab51b0eba5 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:46:41 -0700 Subject: [PATCH 09/27] general fixes, var updates, add more extras, and add ckan search. --- harvester/load.py | 62 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/harvester/load.py b/harvester/load.py index 794f51f4..0edab523 100644 --- a/harvester/load.py +++ b/harvester/load.py @@ -1,7 +1,7 @@ import logging import re - import ckanapi +from harvester.utils.util import sort_dataset logger = logging.getLogger("harvester") @@ -21,7 +21,7 @@ def create_ckan_extra_base(*args): return [{"key": d[0], "value": d[1]} for d in data] -def create_ckan_extras_additions(dcatus_catalog, additions): +def create_ckan_extras_additions(dcatus_dataset, additions): extras = [ "accessLevel", "bureauCode", @@ -35,10 +35,13 @@ def create_ckan_extras_additions(dcatus_catalog, additions): for extra in extras: data = {"key": extra, "value": None} + val = dcatus_dataset[extra] if extra == "publisher": - data["value"] = dcatus_catalog[extra]["name"] + data["value"] = val["name"] else: - data["value"] = dcatus_catalog[extra] + if isinstance(val, list): # TODO: confirm this is what we want. + val = val[0] + data["value"] = val output.append(data) return output + additions @@ -70,21 +73,28 @@ def get_email_from_str(in_str): return res.group(0) -def create_ckan_resources(dists): +def create_ckan_resources(dcatus_dataset): output = [] - for dist in dists: + if "distribution" not in dcatus_dataset: + return output + + for dist in dcatus_dataset["distribution"]: url_key = "downloadURL" if "downloadURL" in dist else "accessURL" - resource = {"url": dist[url_key], "mimetype": dist["mediaType"]} + resource = {"url": dist[url_key]} + if "mimetype" in dist: + resource["mimetype"] = dist["mediaType"] + output.append(resource) return output -def simple_transform(dcatus_catalog): +def simple_transform(dcatus_dataset): output = { - "name": "-".join(dcatus_catalog["title"].lower().split()), - "owner_org": "test", + "name": "-".join(dcatus_dataset["title"].lower().split()), + "owner_org": "test", # TODO: CHANGE THIS! + "identifier": dcatus_dataset["identifier"], } mapping = { @@ -93,14 +103,17 @@ def simple_transform(dcatus_catalog): "title": "title", } - for k, v in dcatus_catalog.items(): + for k, v in dcatus_dataset.items(): if k not in mapping: continue if isinstance(mapping[k], dict): temp = {} + to_skip = ["@type"] for k2, v2 in v.items(): if k2 == "hasEmail": v2 = get_email_from_str(v2) + if k2 in to_skip: + continue temp[mapping[k][k2]] = v2 output = {**output, **temp} else: @@ -116,7 +129,7 @@ def create_defaults(): } -def dcatus_to_ckan(dcatus_catalog): +def dcatus_to_ckan(dcatus_dataset, harvest_source_name): """ example: - from this: @@ -126,23 +139,32 @@ def dcatus_to_ckan(dcatus_catalog): """ - output = simple_transform(dcatus_catalog) + output = simple_transform(dcatus_dataset) - resources = create_ckan_resources(dcatus_catalog["distribution"]) - tags = create_ckan_tags(dcatus_catalog["keyword"]) - pubisher_hierarchy = create_ckan_publisher_hierarchy(dcatus_catalog["publisher"]) + resources = create_ckan_resources(dcatus_dataset) + tags = create_ckan_tags(dcatus_dataset["keyword"]) + pubisher_hierarchy = create_ckan_publisher_hierarchy(dcatus_dataset["publisher"]) extras_base = create_ckan_extra_base( - pubisher_hierarchy, "Dataset", dcatus_catalog["publisher"]["name"] + pubisher_hierarchy, "Dataset", dcatus_dataset["publisher"]["name"] ) - extras = create_ckan_extras_additions(dcatus_catalog, extras_base) + extras = create_ckan_extras_additions(dcatus_dataset, extras_base) defaults = create_defaults() output["resources"] = resources output["tags"] = tags + output["extras"] = extras_base output["extras"] += extras + output["extras"] += [ + { + "key": "dcat_metadata", + "value": str(sort_dataset(dcatus_dataset)), + } + ] + + output["extras"] += [{"key": "harvest_source_name", "value": harvest_source_name}] return {**output, **defaults} @@ -167,3 +189,7 @@ def update_ckan_package(ckan, update_data): def purge_ckan_package(ckan, package_data): return ckan.action.dataset_purge(**package_data) + + +def search_ckan(ckan, query): + return ckan.action.package_search(**query) From 6dc9787cf29c371fe8559d2a99eb26024a4b20ac Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:46:54 -0700 Subject: [PATCH 10/27] add util module --- harvester/utils/__init__.py | 3 ++- harvester/utils/util.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 harvester/utils/util.py diff --git a/harvester/utils/__init__.py b/harvester/utils/__init__.py index 5e7c7ea5..7437333e 100644 --- a/harvester/utils/__init__.py +++ b/harvester/utils/__init__.py @@ -1,3 +1,4 @@ from . import json +from . import util -__all__ = ["json"] +__all__ = ["json", "util"] diff --git a/harvester/utils/util.py b/harvester/utils/util.py new file mode 100644 index 00000000..6721adfd --- /dev/null +++ b/harvester/utils/util.py @@ -0,0 +1,11 @@ +import hashlib +import sansjson +import json + + +def sort_dataset(d): + return sansjson.sort_pyobject(d) + + +def dataset_to_hash(d): + return hashlib.sha256(json.dumps(d, sort_keys=True).encode("utf-8")).hexdigest() From 88eafc832c22fbd27640795aea11f4be954b2b85 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:47:05 -0700 Subject: [PATCH 11/27] change dcatus catalog --- tests/harvest-sources/dcatus/dcatus.json | 379 +++++++++-------------- 1 file changed, 140 insertions(+), 239 deletions(-) diff --git a/tests/harvest-sources/dcatus/dcatus.json b/tests/harvest-sources/dcatus/dcatus.json index 1a52086b..fc5f55e3 100644 --- a/tests/harvest-sources/dcatus/dcatus.json +++ b/tests/harvest-sources/dcatus/dcatus.json @@ -1,301 +1,202 @@ { - "conformsTo": "https://project-open-data.cio.gov/v1.1/schema", - "describedBy": "https://project-open-data.cio.gov/v1.1/schema/catalog.json", "@context": "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld", + "@id": "http://www.cftc.gov/data.json", "@type": "dcat:Catalog", + "conformsTo": "https://project-open-data.cio.gov/v1.1/schema", + "describedBy": "https://project-open-data.cio.gov/v1.1/schema/catalog.json", "dataset": [ { - "@type": "dcat:Dataset", - "title": "2015 GSA Common Baseline Implementation Plan and CIO Assignment Plan", - "description": "This is GSA's 2015 Common Baseline Implementation Plan and its CIO Assignment Plan per the requirements set forth in FITARA legislation.", - "modified": "2017-05-15", "accessLevel": "public", - "identifier": "GSA-2016-01-22-01", - "dataQuality": true, - "license": "https://creativecommons.org/publicdomain/zero/1.0/", - "publisher": { - "@type": "org:Organization", - "name": "General Services Administration" - }, - "accrualPeriodicity": "R/P1Y", + "bureauCode": ["339:00"], "contactPoint": { - "@type": "vcard:Contact", - "fn": "Mick Harris", - "hasEmail": "mailto:michael.harris@gsa.gov" + "fn": "Harold W. Hild", + "hasEmail": "mailto:hhild@CFTC.GOV" }, + "describedBy": "https://www.cftc.gov/MarketReports/CommitmentsofTraders/ExplanatoryNotes/index.htm", + "description": "COT reports provide a breakdown of each Tuesday's open interest for futures and options on futures market in which 20 or more traders hold positions equal to or above the reporting levels established by CFTC", "distribution": [ { - "@type": "dcat:Distribution", - "mediaType": "application/pdf", - "format": "pdf", - "title": "2015 GSA Common Baseline Implementation Plan and CIO Assignment Plan", - "description": "This is GSA's 2015 Common Baseline Implementation Plan and its CIO Assignment Plan per the requirements set forth in FITARA legislation. Updated April 2017. Last Major Change to version updated on March 4, 2019. Last Major change to version update don 8/5/2020.", - "downloadURL": "https://inventory.data.gov/dataset/64c56cec-4b8f-44c7-ba69-090517f9f32e/resource/87e53999-aff1-4560-8bf0-42d9dc8e4a69/download/2015gsafitaraimplementationandcioassignmentplan.pdf" + "accessURL": "https://www.cftc.gov/MarketReports/CommitmentsofTraders/index.htm" } ], - "keyword": ["Assignment Plan", "CIO", "Common Baseline", "FITARA", "GSA IT", "Implementation Plan"], - "bureauCode": ["023:00"], - "programCode": ["023:000"], - "theme": ["IT Initiatives"] - }, - { - "@type": "dcat:Dataset", - "title": "Concur - Reporting Voucher Model", - "description": "The data dictionary for the reporting voucher model within Concur.", - "modified": "2016-02-23", - "accessLevel": "non-public", - "identifier": "GSA - 139048", - "dataQuality": true, - "license": "http://www.usa.gov/publicdomain/label/1.0/", - "rights": "Trade secrets & commercial/financial info obtained from a person and privileged or confidential.", + "identifier": "cftc-dc1", + "keyword": ["commitment of traders", "cot", "open interest"], + "modified": "R/P1W", + "programCode": ["000:000"], "publisher": { - "@type": "org:Organization", - "name": "General Services Administration" - }, - "accrualPeriodicity": "R/P1Y", - "isPartOf": "GSA-2015-09-11-01", - "contactPoint": { - "@type": "vcard:Contact", - "fn": "Norma H Tolson", - "hasEmail": "mailto:norma.tolson@gsa.gov" + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } }, - "keyword": ["Credit Card", "Travel Card"], - "bureauCode": ["023:00"], - "programCode": ["023:010"], - "language": ["en-us"], - "theme": ["Travel and Transportation"] + "title": "Commitment of Traders" }, { - "@type": "dcat:Dataset", - "title": "Concur - Reporting Travel Model", - "description": "The data dictionary for the reporting travel model within Concur.", - "modified": "2016-01-20", - "accessLevel": "non-public", - "identifier": "GSA - 139046", - "dataQuality": true, - "license": "http://www.usa.gov/publicdomain/label/1.0/", - "rights": "Trade secrets & commercial/financial info obtained from a person and privileged or confidential.", - "publisher": { - "@type": "org:Organization", - "name": "General Services Administration" - }, - "accrualPeriodicity": "R/P1Y", - "isPartOf": "GSA-2015-09-11-01", + "accessLevel": "public", + "bureauCode": ["339:00"], "contactPoint": { - "@type": "vcard:Contact", - "fn": "Norma H Tolson", - "hasEmail": "mailto:norma.tolson@gsa.gov" + "fn": "Harold W. Hild", + "hasEmail": "mailto:hhild@CFTC.GOV" }, - "keyword": ["Credit Card", "travel card"], - "bureauCode": ["023:00"], - "programCode": ["023:010"], - "language": ["en-us"], - "theme": ["Travel and Transportation"] - }, - { - "@type": "dcat:Dataset", - "title": "Concur Travel Parent", - "description": "This is the Parent folder for Concur datasets reporting on; Closed-Paid Vouchers in Concur Government Edition (CGE), Authorization Model, Travel Model, User Profile, and Voucher Model.", - "modified": "2016-02-23", - "accessLevel": "non-public", - "identifier": "GSA-2015-09-11-01", - "dataQuality": true, - "license": "http://www.usa.gov/publicdomain/label/1.0/", - "rights": "Trade secrets & commercial/financial info obtained from a person and privileged or confidential.", + "describedBy": "https://www.cftc.gov/MarketReports/BankParticipationReports/ExplanatoryNotes/index.htm", + "description": "The Bank Participation Report (BPR), developed by the Division of Market Oversight to provide the U.S. banking authorities and the Bank for International Settlements (BIS, located in Basel, Switzerland) aggregate large-trader positions of banks participating in various financial and non-financial commodity futures.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/BankParticipationReports/index.htm" + } + ], + "identifier": "cftc-dc2", + "keyword": ["bank participation report", "bpr", "banking"], + "modified": "R/P1M", + "programCode": ["000:000"], "publisher": { - "@type": "org:Organization", - "name": "General Services Administration" - }, - "accrualPeriodicity": "R/P1Y", - "contactPoint": { - "@type": "vcard:Contact", - "fn": "Norma H Tolson", - "hasEmail": "mailto:norma.tolson@gsa.gov" + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } }, - "keyword": ["Authorization", "Closed", "Concur", "Paid", "Travel", "Voucher"], - "bureauCode": ["023:00"], - "programCode": ["023:010"], - "language": ["en-us"], - "theme": ["Travel and Transportation"] + "title": "Bank Participation Reports" }, { - "@type": "dcat:Dataset", - "title": "Data.gov Daily Sessions", - "description": "Data.gov Daily Sessions 20120101-20151231", - "modified": "2016-08-01", "accessLevel": "public", - "identifier": "GSA - DATA.GOVMETRICS1", - "license": "https://creativecommons.org/publicdomain/zero/1.0/", - "publisher": { - "@type": "org:Organization", - "name": "General Services Administration" - }, - "isPartOf": "GSA-2015-09-14-01", + "bureauCode": ["339:00"], "contactPoint": { - "@type": "vcard:Contact", - "fn": "Hyon Joo Kim", - "hasEmail": "mailto:hyon.kim@gsa.gov" + "fn": "Harold W. Hild", + "hasEmail": "mailto:hhild@CFTC.gov" }, + "describedBy": "https://www.cftc.gov/MarketReports/CottonOnCall/index.htm", + "description": "Cotton On-Call Report shows the quantity of call cotton bought or sold on which the price has not been fixed, together with the respective futures on which the purchase or sale is based on.", "distribution": [ { - "@type": "dcat:Distribution", - "mediaType": "text/csv", - "title": "Data.gov Daily Sessions", - "description": "Data.gov Daily Sessions 20120101-20151231", - "downloadURL": "https://inventory.data.gov/dataset/e0de4198-eaaa-423a-9154-7af76ab8d822/resource/a78ae43e-0ceb-4f2d-83ac-99c61b249afa/download/analytics-www.data.gov-data.gov-daily-sessions-20120101-20151231-analytics-www.data.gov-data.g.csv" - }, - { - "@type": "dcat:Distribution", - "mediaType": "text/csv", - "title": "Data.gov Daily Sessions Day Index", - "description": "Data.gov Daily Sessions 20120101-20151231 Day Index", - "downloadURL": "https://inventory.data.gov/dataset/e0de4198-eaaa-423a-9154-7af76ab8d822/resource/925f6204-5240-4dd1-8604-c279843c8974/download/analytics-www.data.gov-data.gov-daily-sessions-20120101-20151231b-sheet1.csv" + "accessURL": "https://www.cftc.gov/MarketReports/CottonOnCall/index.htm" } ], - "keyword": ["Data.gov Analytics", "metrics"], - "bureauCode": ["023:00"], - "programCode": ["023:019"] + "identifier": "cftc-dc3", + "keyword": ["cotton on call", "cotton on-call"], + "modified": "R/P1W", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Cotton On Call" }, { - "@type": "dcat:Dataset", - "title": "Data.gov Statistics Parent", - "description": "Various reports regarding the Data.gov sites, from Daily Visitors, to Top 10 Countries, and States.", - "modified": "2015-09-14", "accessLevel": "public", - "identifier": "GSA-2015-09-14-01", - "dataQuality": true, - "issued": "2013-04-11", - "license": "https://creativecommons.org/publicdomain/zero/1.0/", - "spatial": "Worldwide", - "publisher": { - "@type": "org:Organization", - "name": "General Services Administration" - }, + "bureauCode": ["339:00"], "contactPoint": { - "@type": "vcard:Contact", - "fn": "Hyon Joo Kim", - "hasEmail": "mailto:hyon.kim@gsa.gov" + "fn": "Carrie L Coffin", + "hasEmail": "mailto:ccoffin@CFTC.gov" + }, + "describedBy": "https://www.cftc.gov/MarketReports/financialfcmdata/DescriptionofReportDataFields/indesx.htm", + "description": "Futures commission merchants (FCMs) and retail foreign exchange dealers (RFEDs) must file monthly financial reports with the CFTC's Market Participants Division (MPD) within 17 business days after the end of the month. Selected financial information from these reports is published.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/financialfcmdata/index.htm" + } + ], + "identifier": "cftc-dc4", + "keyword": [ + "fcm", + "retail foreign exchange", + "dealer", + "market participants", + "mpd", + "futures commission merchant" + ], + "modified": "R/P1M", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } }, - "keyword": ["Countries", "States", "Statistics", "Visitors", "data.gov"], - "bureauCode": ["023:00"], - "programCode": ["023:019"], - "language": ["en-us"], - "theme": ["Data.gov Site"] + "title": "Financial Data for FCMS" }, { - "@type": "dcat:Dataset", "accessLevel": "public", - "bureauCode": ["010:12"], - "programCode": ["010:012"], + "bureauCode": ["339:00"], "contactPoint": { - "@type": "vcard:Contact", - "fn": "David C. Twichell", - "hasEmail": "mailto:dtwichell@usgs.gov" + "fn": "Byung-IL Seo", + "hasEmail": "mailto:BSeo@CFTC.gov" }, - "description": "In 2010, the U.S. Geological Survey in Woods Hole, MA and St. Petersburg, FL, in partnership with the U.S. Army Corps of Engineers, Mobile District conducted geologic mapping to characterize the seafloor and shallow subsurface stratigraphy offshore of the Gulf Islands of Mississippi. The mapping was carried out during two cruises in March, 2010 on the R/V Tommy Munro of Biloxi, MS. Data were acquired with the following equipment: an SEA Ltd SwathPlus interferometric sonar (both 234 kHz and 468 kHz systems), a Klein 3000 and a Klein 3900 dual frequency sidescan-sonar, and an Edgetech 512i chirp subbottom profiling system. The long-term goal of this mapping effort is to produce high-quality, high-resolution geologic maps and geophysical interpretations that can be utilized to identify sand resources within the region and better understand the Holocene evolution and anticipate future changes in this coastal system. More information on the field work can be accessed from the Woods Hole Coastal and Marine Science Center Field Activity webpage https://cmgds.marine.usgs.gov/fan_info.php?fan=2010-012-FA or the St. Petersburg Coastal and Marine Geology InfoBank https://walrus.wr.usgs.gov/infobank/m/m210gm/html/m-2-10-gm.meta.html.", + "describedBy": "https://www.cftc.gov/MarketReports/LgTraderExplanatory.html", + "description": "The Large Trader Net Position Changes and the Trading Account Net Position Changes data provides the public with a view of the amount of trading that results in net changes to positions at the trader level and at the account level. The data reflects trading that changes or creates an end-of-day position, as contrasted with trading that does not change a trader’s end-of-day net position, such as spread or day trading.", "distribution": [ { - "@type": "dcat:Distribution", - "conformsTo": "https://www.fgdc.gov/schemas/metadata/", - "description": "The metadata original format", - "downloadURL": "https://data.usgs.gov/datacatalog/metadata/USGS.0000a76f-c6be-4366-8be3-6f8487442e8a.xml", - "format": "XML", - "mediaType": "text/xml", - "title": "Original Metadata" - }, - { - "@type": "dcat:Distribution", - "accessURL": "https://doi.org/10.5066/P9KM5FT2", - "description": "Landing page for access to the data", - "format": "XML", - "mediaType": "application/http", - "title": "Digital Data" + "accessURL": "https://www.cftc.gov/MarketReports/NetPositionChangesData/index.htm" } ], - "identifier": "USGS:0000a76f-c6be-4366-8be3-6f8487442e8a", + "identifier": "cftc-dc5", "keyword": [ - "USGS:0000a76f-c6be-4366-8be3-6f8487442e8a", - "U.S. Geological Survey", - "USGS", - "Woods Hole Coastal and Marine Science Center", - "WHCMSC", - "St. Petersburg Coastal and Marine Science Center", - "Coastal and Marine Geology Program", - "CMGP", - "Global Positioning", - "Navigation", - "Hypack Hydrographic Survey Software", - "R/V Tommy Munro", - "St. Petersburg field activity serial number 10cct02", - "oceans", - "location", - "navigational data", - "marine geophysics", - "Mississippi", - "Gulf Islands", - "North Central Gulf Coast", - "United States", - "West Ship Island", - "East Ship Island", - "Horn Island", - "Dog Key Pass", - "Camille Cut", - "Petit Bois Island", - "Gulfport Ship Channel", - "Gulf of Mexico", - "Cat Island" + "net positions", + "larger trader net position", + "trading account net positions" ], - "modified": "20200908", + "modified": "2011-06-30", + "programCode": ["000:000"], "publisher": { - "@type": "org:Organization", - "name": "U.S. Geological Survey", + "name": "U.S. Commodity Futures Trading Commission", "subOrganizationOf": { - "@type": "org:Organization", - "name": "Department of the Interior" + "name": "U.S. Government" } }, - "spatial": "-179.231086,-14.601813,+179.859681,+71.441059", - "theme": ["geospatial"], - "title": "Raw HYPACK navigation logs (text) collected by the U.S. Geological Survey - St. Petersburg Coastal and Marine Science Center offshore of the Gulf Islands, MS, 2010" + "title": "Net Positions Changes Data" }, { - "@type": "dcat:Dataset", - "title": "ConformsTo ISO Example: TIGER/Line Shapefile, 2013, nation, U.S., Current County and Equivalent National Shapefile", - "description": "The TIGER/Line shapefiles and related database files (.dbf) are an extract of selected geographic and cartographic information from the U.S. Census Bureau's Master Address File / Topologically Integrated Geographic Encoding and Referencing (MAF/TIGER) Database (MTDB). The MTDB represents a seamless national file with no overlaps or gaps between parts, however, each TIGER/Line shapefile is designed to stand alone as an independent data set, or they can be combined to cover the entire nation. The primary legal divisions of most states are termed counties. In Louisiana, these divisions are known as parishes. In Alaska, which has no counties, the equivalent entities are the organized boroughs, city and boroughs, municipalities, and for the unorganized area, census areas. The latter are delineated cooperatively for statistical purposes by the State of Alaska and the Census Bureau. In four states (Maryland, Missouri, Nevada, and Virginia), there are one or more incorporated places that are independent of any county organization and thus constitute primary divisions of their states. These incorporated places are known as independent cities and are treated as equivalent entities for purposes of data presentation. The District of Columbia and Guam have no primary divisions, and each area is considered an equivalent entity for purposes of data presentation. The Census Bureau treats the following entities as equivalents of counties for purposes of data presentation: Municipios in Puerto Rico, Districts and Islands in American Samoa, Municipalities in the Commonwealth of the Northern Mariana Islands, and Islands in the U.S. Virgin Islands. The entire area of the United States, Puerto Rico, and the Island Areas is covered by counties or equivalent entities. The boundaries for counties and equivalent entities are as of January 1of the shapefile release year, primarily as reported through the Census Bureau's Boundary and Annexation Survey (BAS).", - "modified": "2013", - "bureauCode": ["006:07"], - "programCode": ["006:012"], - "keyword": ["Nation", "Polygon", "United States", "U.S."], - "theme": ["geospatial"], - "identifier": "tl_2013_us_county.shp.xml", "accessLevel": "public", - "spatial": "[[-14.601813, -179.231086], [71.441059, 179.859681]]", + "bureauCode": ["339:00"], + "contactPoint": { + "fn": "Richard Haynes", + "hasEmail": "mailto:RHaynes@CFTC.gov" + }, + "describedBy": "https://www.cftc.gov/MarketReports/SwapsReports/DataDictionary/index.htm", + "description": "The CFTC Swaps Report aggregates a comprehensive body of swap market data that was not previously reported to regulators or regulated entities, and makes that information freely available in a form that is readily usable by both market participants and the general public. The swaps market data included in publications produced by entities such as the BIS, ISDA, and the Office of the Comptroller of the Currency vary in scope and granularity, but none corresponds directly to the data stored in the CFTC's SDRs.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/SwapsReports/index.htm" + } + ], + "identifier": "cftc-dc6", + "keyword": ["swaps report", "swaps market"], + "modified": "R/P1W", + "programCode": ["000:000"], "publisher": { - "@type": "org:Organization", - "name": "U.S. Department of Commerce, U.S. Census Bureau, Geography Division" + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } }, + "title": "Weekly Swaps Report" + }, + { + "accessLevel": "public", + "bureauCode": ["339:00"], "contactPoint": { - "@type": "vcard:Contact", - "fn": "pointOfContact - U.S. Department of Commerce, U.S. Census Bureau, Geography Division", - "hasEmail": "mailto:ask@census.gov" + "fn": "Byung-IL Seo", + "hasEmail": "mailto:BSeo@CFTC.gov" }, + "describedBy": "https://www.cftc.gov/MarketReports/ClearedMarginReports/index.htm", + "description": "Derivatives clearing organizations (DCOs) are required to file daily reports on initial margin with the CFTC's Division of Clearing and Risk (DCR). Aggregate initial margin summary information for Chicago Mercantile Exchange (CME), ICE Clear Credit (ICC), ICE Clear US (ICUS), ICE Clear Europe (ICEU), LCH Ltd., and LCH SA is published below. The information will generally be updated within ten business days of the end of each month.", "distribution": [ { - "@type": "dcat:Distribution", - "title": "Census Tiger County Lines for 2013", - "downloadURL": "http://www2.census.gov/geo/tiger/TIGER2013/COUNTY/tl_2013_us_county.zip", - "mediaType": "application/zip" - }, - { - "@type": "dcat:Distribution", - "title": "[Anything valid here] Original Metadata", - "downloadURL": "https://meta.geo.census.gov/data/existing/decennial/GEO/GPMB/TIGERline/TIGER2013/county/tl_2013_us_county.shp.iso.xml", - "conformsTo": "http://www.isotc211.org/2005/gmi", - "description": "[Not required] The metadata original format", - "mediaType": "text/xml", - "format": "XML" + "accessURL": "https://www.cftc.gov/MarketReports/ClearedMarginReports/index.htm" } - ] + ], + "identifier": "cftc-dc7", + "keyword": ["margin", "exchange"], + "modified": "R/P1M", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Cleared Margin Reports" } ] } From 8e43022faec4f5f3ec9aeeb5e6402534b2751e6f Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:47:22 -0700 Subject: [PATCH 12/27] add ckan mock return --- .../dcatus/ckan_datasets_resp.json | 775 ++++++++++++++++++ 1 file changed, 775 insertions(+) create mode 100644 tests/harvest-sources/dcatus/ckan_datasets_resp.json diff --git a/tests/harvest-sources/dcatus/ckan_datasets_resp.json b/tests/harvest-sources/dcatus/ckan_datasets_resp.json new file mode 100644 index 00000000..36138998 --- /dev/null +++ b/tests/harvest-sources/dcatus/ckan_datasets_resp.json @@ -0,0 +1,775 @@ +{ + "help": "https://catalog-dev.data.gov/api/3/action/help_show?name=package_search", + "success": true, + "result": { + "count": 7, + "facets": {}, + "results": [ + { + "author": null, + "author_email": null, + "creator_user_id": "fe35904a-4956-478b-8d2b-3f25e6217600", + "id": "d82bf353-5ed0-4a95-a643-ce15aafefd56", + "isopen": false, + "license_id": null, + "license_title": null, + "maintainer": "Harold W. Hild", + "maintainer_email": "hhild@CFTC.GOV", + "metadata_created": "2024-01-03T17:45:28.179200", + "metadata_modified": "2024-01-03T17:45:28.179206", + "name": "commitment-of-traders", + "notes": "COT reports provide a breakdown of each Tuesday's open interest for futures and options on futures market in which 20 or more traders hold positions equal to or above the reporting levels established by CFTC", + "num_resources": 1, + "num_tags": 3, + "organization": { + "id": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "name": "test", + "title": "test", + "type": "organization", + "description": ".", + "image_url": "", + "created": "2023-04-14T13:44:38.462827", + "is_organization": true, + "approval_status": "approved", + "state": "active" + }, + "owner_org": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "private": false, + "state": "active", + "title": "Commitment of Traders", + "type": "dataset", + "url": null, + "version": null, + "extras": [ + { + "key": "publisher_hierarchy", + "value": "U.S. Government > U.S. Commodity Futures Trading Commission" + }, + { "key": "resource-type", "value": "Dataset" }, + { + "key": "publisher", + "value": "U.S. Commodity Futures Trading Commission" + }, + { "key": "accessLevel", "value": "public" }, + { "key": "bureauCode", "value": "339:00" }, + { "key": "identifier", "value": "cftc-dc1" }, + { "key": "modified", "value": "R/P1W" }, + { "key": "programCode", "value": "000:000" }, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['339:00'], 'contactPoint': {'fn': 'Harold W. Hild', 'hasEmail': 'mailto:hhild@CFTC.GOV'}, 'describedBy': 'https://www.cftc.gov/MarketReports/CommitmentsofTraders/ExplanatoryNotes/index.htm', 'description': \"COT reports provide a breakdown of each Tuesday's open interest for futures and options on futures market in which 20 or more traders hold positions equal to or above the reporting levels established by CFTC\", 'distribution': [{'accessURL': 'https://www.cftc.gov/MarketReports/CommitmentsofTraders/index.htm'}], 'identifier': 'cftc-dc1', 'keyword': ['commitment of traders', 'cot', 'open interest'], 'modified': 'R/P1W', 'programCode': ['000:000'], 'publisher': {'name': 'U.S. Commodity Futures Trading Commission', 'subOrganizationOf': {'name': 'U.S. Government'}}, 'title': 'Commitment of Traders'}" + }, + { + "key": "harvest_source_name", + "value": "test_harvest_source_name" + } + ], + "resources": [ + { + "cache_last_updated": null, + "cache_url": null, + "created": "2024-01-03T17:45:28.183169", + "description": "index.htm", + "format": "HTML", + "hash": "", + "id": "1aaa75c4-08ad-44a8-857c-fda511654832", + "last_modified": null, + "metadata_modified": "2024-01-03T17:45:28.168338", + "mimetype": null, + "mimetype_inner": null, + "name": "Web Page", + "no_real_name": true, + "package_id": "d82bf353-5ed0-4a95-a643-ce15aafefd56", + "position": 0, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://www.cftc.gov/MarketReports/CommitmentsofTraders/index.htm", + "url_type": null + } + ], + "tags": [ + { + "display_name": "commitment-of-traders", + "id": "a2879a8a-4a5f-4ecb-adf9-ffa79c19fa03", + "name": "commitment-of-traders", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "cot", + "id": "a728a985-ddb7-48be-b32f-7cda4a693b29", + "name": "cot", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "open-interest", + "id": "2ecd13e3-8c4d-4bd8-9df6-8ac3db2ca230", + "name": "open-interest", + "state": "active", + "vocabulary_id": null + } + ], + "groups": [], + "relationships_as_subject": [], + "relationships_as_object": [] + }, + { + "author": null, + "author_email": null, + "creator_user_id": "fe35904a-4956-478b-8d2b-3f25e6217600", + "id": "37452477-7c82-4fac-8363-0062435ffdbd", + "isopen": false, + "license_id": null, + "license_title": null, + "maintainer": "Harold W. Hild", + "maintainer_email": "hhild@CFTC.GOV", + "metadata_created": "2024-01-03T17:45:32.878623", + "metadata_modified": "2024-01-03T17:45:32.878629", + "name": "bank-participation-reports", + "notes": "The Bank Participation Report (BPR), developed by the Division of Market Oversight to provide the U.S. banking authorities and the Bank for International Settlements (BIS, located in Basel, Switzerland) aggregate large-trader positions of banks participating in various financial and non-financial commodity futures.", + "num_resources": 1, + "num_tags": 3, + "organization": { + "id": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "name": "test", + "title": "test", + "type": "organization", + "description": ".", + "image_url": "", + "created": "2023-04-14T13:44:38.462827", + "is_organization": true, + "approval_status": "approved", + "state": "active" + }, + "owner_org": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "private": false, + "state": "active", + "title": "Bank Participation Reports", + "type": "dataset", + "url": null, + "version": null, + "extras": [ + { + "key": "publisher_hierarchy", + "value": "U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission" + }, + { "key": "resource-type", "value": "Dataset" }, + { + "key": "publisher", + "value": "U.S. Commodity Futures Trading Commission" + }, + { "key": "accessLevel", "value": "public" }, + { "key": "bureauCode", "value": "339:00" }, + { "key": "identifier", "value": "cftc-dc2" }, + { "key": "modified", "value": "R/P1M" }, + { "key": "programCode", "value": "000:000" }, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['339:00'], 'contactPoint': {'fn': 'Harold W. Hild', 'hasEmail': 'mailto:hhild@CFTC.GOV'}, 'describedBy': 'https://www.cftc.gov/MarketReports/BankParticipationReports/ExplanatoryNotes/index.htm', 'description': 'The Bank Participation Report (BPR), developed by the Division of Market Oversight to provide the U.S. banking authorities and the Bank for International Settlements (BIS, located in Basel, Switzerland) aggregate large-trader positions of banks participating in various financial and non-financial commodity futures.', 'distribution': [{'accessURL': 'https://www.cftc.gov/MarketReports/BankParticipationReports/index.htm'}], 'identifier': 'cftc-dc2', 'keyword': ['bank participation report', 'banking', 'bpr'], 'modified': 'R/P1M', 'programCode': ['000:000'], 'publisher': {'name': 'U.S. Commodity Futures Trading Commission', 'subOrganizationOf': {'name': 'U.S. Government'}}, 'title': 'Bank Participation Reports'}" + }, + { + "key": "harvest_source_name", + "value": "test_harvest_source_name" + } + ], + "resources": [ + { + "cache_last_updated": null, + "cache_url": null, + "created": "2024-01-03T17:45:32.880277", + "description": "index.htm", + "format": "HTML", + "hash": "", + "id": "d6d227f5-973c-47a8-8e2a-00a9c1846d1a", + "last_modified": null, + "metadata_modified": "2024-01-03T17:45:32.870910", + "mimetype": null, + "mimetype_inner": null, + "name": "Web Page", + "no_real_name": true, + "package_id": "37452477-7c82-4fac-8363-0062435ffdbd", + "position": 0, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://www.cftc.gov/MarketReports/BankParticipationReports/index.htm", + "url_type": null + } + ], + "tags": [ + { + "display_name": "bank-participation-report", + "id": "8f13c9c2-58b6-458f-bdbf-ead4f48db9d4", + "name": "bank-participation-report", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "banking", + "id": "e80c3c36-f71f-4a5d-a141-4460d6739656", + "name": "banking", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "bpr", + "id": "1ce56b61-dcb2-4d4f-a4f3-2f9383a53470", + "name": "bpr", + "state": "active", + "vocabulary_id": null + } + ], + "groups": [], + "relationships_as_subject": [], + "relationships_as_object": [] + }, + { + "author": null, + "author_email": null, + "creator_user_id": "fe35904a-4956-478b-8d2b-3f25e6217600", + "id": "116078aa-94f0-4965-b0d7-95520d74aa38", + "isopen": false, + "license_id": null, + "license_title": null, + "maintainer": "Harold W. Hild", + "maintainer_email": "hhild@CFTC.gov", + "metadata_created": "2024-01-03T17:45:35.968805", + "metadata_modified": "2024-01-03T17:45:35.968810", + "name": "cotton-on-call", + "notes": "Cotton On-Call Report shows the quantity of call cotton bought or sold on which the price has not been fixed, together with the respective futures on which the purchase or sale is based on.", + "num_resources": 1, + "num_tags": 1, + "organization": { + "id": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "name": "test", + "title": "test", + "type": "organization", + "description": ".", + "image_url": "", + "created": "2023-04-14T13:44:38.462827", + "is_organization": true, + "approval_status": "approved", + "state": "active" + }, + "owner_org": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "private": false, + "state": "active", + "title": "Cotton On Call", + "type": "dataset", + "url": null, + "version": null, + "extras": [ + { + "key": "publisher_hierarchy", + "value": "U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission" + }, + { "key": "resource-type", "value": "Dataset" }, + { + "key": "publisher", + "value": "U.S. Commodity Futures Trading Commission" + }, + { "key": "accessLevel", "value": "public" }, + { "key": "bureauCode", "value": "339:00" }, + { "key": "identifier", "value": "cftc-dc3" }, + { "key": "modified", "value": "R/P1W" }, + { "key": "programCode", "value": "000:000" }, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['339:00'], 'contactPoint': {'fn': 'Harold W. Hild', 'hasEmail': 'mailto:hhild@CFTC.gov'}, 'describedBy': 'https://www.cftc.gov/MarketReports/CottonOnCall/index.htm', 'description': 'Cotton On-Call Report shows the quantity of call cotton bought or sold on which the price has not been fixed, together with the respective futures on which the purchase or sale is based on.', 'distribution': [{'accessURL': 'https://www.cftc.gov/MarketReports/CottonOnCall/index.htm'}], 'identifier': 'cftc-dc3', 'keyword': ['cotton on call', 'cotton on-call'], 'modified': 'R/P1W', 'programCode': ['000:000'], 'publisher': {'name': 'U.S. Commodity Futures Trading Commission', 'subOrganizationOf': {'name': 'U.S. Government'}}, 'title': 'Cotton On Call'}" + }, + { + "key": "harvest_source_name", + "value": "test_harvest_source_name" + } + ], + "resources": [ + { + "cache_last_updated": null, + "cache_url": null, + "created": "2024-01-03T17:45:35.970377", + "description": "index.htm", + "format": "HTML", + "hash": "", + "id": "0fe824a4-43e9-4459-a471-9993f5dc7443", + "last_modified": null, + "metadata_modified": "2024-01-03T17:45:35.964304", + "mimetype": null, + "mimetype_inner": null, + "name": "Web Page", + "no_real_name": true, + "package_id": "116078aa-94f0-4965-b0d7-95520d74aa38", + "position": 0, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://www.cftc.gov/MarketReports/CottonOnCall/index.htm", + "url_type": null + } + ], + "tags": [ + { + "display_name": "cotton-on-call", + "id": "12298da4-748e-4ab2-af27-221145fb517e", + "name": "cotton-on-call", + "state": "active", + "vocabulary_id": null + } + ], + "groups": [], + "relationships_as_subject": [], + "relationships_as_object": [] + }, + { + "author": null, + "author_email": null, + "creator_user_id": "fe35904a-4956-478b-8d2b-3f25e6217600", + "id": "fa1c3904-373f-4573-aaad-41b8e1de1143", + "isopen": false, + "license_id": null, + "license_title": null, + "maintainer": "Carrie L Coffin", + "maintainer_email": "ccoffin@CFTC.gov", + "metadata_created": "2024-01-03T17:45:39.315356", + "metadata_modified": "2024-01-03T17:45:39.315362", + "name": "financial-data-for-fcms", + "notes": "Futures commission merchants (FCMs) and retail foreign exchange dealers (RFEDs) must file monthly financial reports with the CFTC's Market Participants Division (MPD) within 17 business days after the end of the month. Selected financial information from these reports is published.", + "num_resources": 1, + "num_tags": 6, + "organization": { + "id": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "name": "test", + "title": "test", + "type": "organization", + "description": ".", + "image_url": "", + "created": "2023-04-14T13:44:38.462827", + "is_organization": true, + "approval_status": "approved", + "state": "active" + }, + "owner_org": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "private": false, + "state": "active", + "title": "Financial Data for FCMS", + "type": "dataset", + "url": null, + "version": null, + "extras": [ + { + "key": "publisher_hierarchy", + "value": "U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission" + }, + { "key": "resource-type", "value": "Dataset" }, + { + "key": "publisher", + "value": "U.S. Commodity Futures Trading Commission" + }, + { "key": "accessLevel", "value": "public" }, + { "key": "bureauCode", "value": "339:00" }, + { "key": "identifier", "value": "cftc-dc4" }, + { "key": "modified", "value": "R/P1M" }, + { "key": "programCode", "value": "000:000" }, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['339:00'], 'contactPoint': {'fn': 'Carrie L Coffin', 'hasEmail': 'mailto:ccoffin@CFTC.gov'}, 'describedBy': 'https://www.cftc.gov/MarketReports/financialfcmdata/DescriptionofReportDataFields/indesx.htm', 'description': \"Futures commission merchants (FCMs) and retail foreign exchange dealers (RFEDs) must file monthly financial reports with the CFTC's Market Participants Division (MPD) within 17 business days after the end of the month. Selected financial information from these reports is published.\", 'distribution': [{'accessURL': 'https://www.cftc.gov/MarketReports/financialfcmdata/index.htm'}], 'identifier': 'cftc-dc4', 'keyword': ['dealer', 'fcm', 'futures commission merchant', 'market participants', 'mpd', 'retail foreign exchange'], 'modified': 'R/P1M', 'programCode': ['000:000'], 'publisher': {'name': 'U.S. Commodity Futures Trading Commission', 'subOrganizationOf': {'name': 'U.S. Government'}}, 'title': 'Financial Data for FCMS'}" + }, + { + "key": "harvest_source_name", + "value": "test_harvest_source_name" + } + ], + "resources": [ + { + "cache_last_updated": null, + "cache_url": null, + "created": "2024-01-03T17:45:39.317107", + "description": "index.htm", + "format": "HTML", + "hash": "", + "id": "64f8fbab-8ad1-48f0-adb6-9d8d09ade7e8", + "last_modified": null, + "metadata_modified": "2024-01-03T17:45:39.302853", + "mimetype": null, + "mimetype_inner": null, + "name": "Web Page", + "no_real_name": true, + "package_id": "fa1c3904-373f-4573-aaad-41b8e1de1143", + "position": 0, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://www.cftc.gov/MarketReports/financialfcmdata/index.htm", + "url_type": null + } + ], + "tags": [ + { + "display_name": "dealer", + "id": "d50fff3b-f83e-40f2-a373-ea2d356de9cb", + "name": "dealer", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "fcm", + "id": "fbce5bc9-3400-46ae-a67c-5f74bf54b55a", + "name": "fcm", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "futures-commission-merchant", + "id": "4d24f60d-5eb9-40e9-adcb-0355aa981521", + "name": "futures-commission-merchant", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "market-participants", + "id": "25411659-e740-4ecf-befa-7360bda6d158", + "name": "market-participants", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "mpd", + "id": "56cb8509-dacf-474e-bfc5-02e160edc2c6", + "name": "mpd", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "retail-foreign-exchange", + "id": "3a1c2a0f-63f6-4871-9309-aad1fb088aa5", + "name": "retail-foreign-exchange", + "state": "active", + "vocabulary_id": null + } + ], + "groups": [], + "relationships_as_subject": [], + "relationships_as_object": [] + }, + { + "author": null, + "author_email": null, + "creator_user_id": "fe35904a-4956-478b-8d2b-3f25e6217600", + "id": "2cbbdabf-1a45-476a-9cf8-168a39d8225f", + "isopen": false, + "license_id": null, + "license_title": null, + "maintainer": "Byung-IL Seo", + "maintainer_email": "BSeo@CFTC.gov", + "metadata_created": "2024-01-03T17:45:42.689760", + "metadata_modified": "2024-01-03T17:45:42.689767", + "name": "net-positions-changes-data", + "notes": "The Large Trader Net Position Changes and the Trading Account Net Position Changes data provides the public with a view of the amount of trading that results in net changes to positions at the trader level and at the account level. The data reflects trading that changes or creates an end-of-day position, as contrasted with trading that does not change a trader\u2019s end-of-day net position, such as spread or day trading.", + "num_resources": 1, + "num_tags": 3, + "organization": { + "id": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "name": "test", + "title": "test", + "type": "organization", + "description": ".", + "image_url": "", + "created": "2023-04-14T13:44:38.462827", + "is_organization": true, + "approval_status": "approved", + "state": "active" + }, + "owner_org": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "private": false, + "state": "active", + "title": "Net Positions Changes Data", + "type": "dataset", + "url": null, + "version": null, + "extras": [ + { + "key": "publisher_hierarchy", + "value": "U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission" + }, + { "key": "resource-type", "value": "Dataset" }, + { + "key": "publisher", + "value": "U.S. Commodity Futures Trading Commission" + }, + { "key": "accessLevel", "value": "public" }, + { "key": "bureauCode", "value": "339:00" }, + { "key": "identifier", "value": "cftc-dc5" }, + { "key": "modified", "value": "2011-06-30" }, + { "key": "programCode", "value": "000:000" }, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['339:00'], 'contactPoint': {'fn': 'Byung-IL Seo', 'hasEmail': 'mailto:BSeo@CFTC.gov'}, 'describedBy': 'https://www.cftc.gov/MarketReports/LgTraderExplanatory.html', 'description': 'The Large Trader Net Position Changes and the Trading Account Net Position Changes data provides the public with a view of the amount of trading that results in net changes to positions at the trader level and at the account level. The data reflects trading that changes or creates an end-of-day position, as contrasted with trading that does not change a trader\u2019s end-of-day net position, such as spread or day trading.', 'distribution': [{'accessURL': 'https://www.cftc.gov/MarketReports/NetPositionChangesData/index.htm'}], 'identifier': 'cftc-dc5', 'keyword': ['larger trader net position', 'net positions', 'trading account net positions'], 'modified': '2011-06-30', 'programCode': ['000:000'], 'publisher': {'name': 'U.S. Commodity Futures Trading Commission', 'subOrganizationOf': {'name': 'U.S. Government'}}, 'title': 'Net Positions Changes Data'}" + }, + { + "key": "harvest_source_name", + "value": "test_harvest_source_name" + } + ], + "resources": [ + { + "cache_last_updated": null, + "cache_url": null, + "created": "2024-01-03T17:45:42.691834", + "description": "index.htm", + "format": "HTML", + "hash": "", + "id": "ad9d9510-37a1-4a48-9f17-9b00ce488323", + "last_modified": null, + "metadata_modified": "2024-01-03T17:45:42.682258", + "mimetype": null, + "mimetype_inner": null, + "name": "Web Page", + "no_real_name": true, + "package_id": "2cbbdabf-1a45-476a-9cf8-168a39d8225f", + "position": 0, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://www.cftc.gov/MarketReports/NetPositionChangesData/index.htm", + "url_type": null + } + ], + "tags": [ + { + "display_name": "larger-trader-net-position", + "id": "0c18826b-244e-4093-9513-d1febab5372d", + "name": "larger-trader-net-position", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "net-positions", + "id": "87b349d9-b4aa-4601-8239-1fba47f50191", + "name": "net-positions", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "trading-account-net-positions", + "id": "c42822f4-78ec-4791-aaad-08de83dfd9ac", + "name": "trading-account-net-positions", + "state": "active", + "vocabulary_id": null + } + ], + "groups": [], + "relationships_as_subject": [], + "relationships_as_object": [] + }, + { + "author": null, + "author_email": null, + "creator_user_id": "fe35904a-4956-478b-8d2b-3f25e6217600", + "id": "44954eb7-076e-45e6-b8cc-cf9119e04f6d", + "isopen": false, + "license_id": null, + "license_title": null, + "maintainer": "Richard Haynes", + "maintainer_email": "RHaynes@CFTC.gov", + "metadata_created": "2024-01-03T17:45:46.219897", + "metadata_modified": "2024-01-03T17:45:46.219902", + "name": "weekly-swaps-report", + "notes": "The CFTC Swaps Report aggregates a comprehensive body of swap market data that was not previously reported to regulators or regulated entities, and makes that information freely available in a form that is readily usable by both market participants and the general public. The swaps market data included in publications produced by entities such as the BIS, ISDA, and the Office of the Comptroller of the Currency vary in scope and granularity, but none corresponds directly to the data stored in the CFTC's SDRs.", + "num_resources": 1, + "num_tags": 2, + "organization": { + "id": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "name": "test", + "title": "test", + "type": "organization", + "description": ".", + "image_url": "", + "created": "2023-04-14T13:44:38.462827", + "is_organization": true, + "approval_status": "approved", + "state": "active" + }, + "owner_org": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "private": false, + "state": "active", + "title": "Weekly Swaps Report", + "type": "dataset", + "url": null, + "version": null, + "extras": [ + { + "key": "publisher_hierarchy", + "value": "U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission" + }, + { "key": "resource-type", "value": "Dataset" }, + { + "key": "publisher", + "value": "U.S. Commodity Futures Trading Commission" + }, + { "key": "accessLevel", "value": "public" }, + { "key": "bureauCode", "value": "339:00" }, + { "key": "identifier", "value": "cftc-dc6" }, + { "key": "modified", "value": "R/P1W" }, + { "key": "programCode", "value": "000:000" }, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['339:00'], 'contactPoint': {'fn': 'Richard Haynes', 'hasEmail': 'mailto:RHaynes@CFTC.gov'}, 'describedBy': 'https://www.cftc.gov/MarketReports/SwapsReports/DataDictionary/index.htm', 'description': \"The CFTC Swaps Report aggregates a comprehensive body of swap market data that was not previously reported to regulators or regulated entities, and makes that information freely available in a form that is readily usable by both market participants and the general public. The swaps market data included in publications produced by entities such as the BIS, ISDA, and the Office of the Comptroller of the Currency vary in scope and granularity, but none corresponds directly to the data stored in the CFTC's SDRs.\", 'distribution': [{'accessURL': 'https://www.cftc.gov/MarketReports/SwapsReports/index.htm'}], 'identifier': 'cftc-dc6', 'keyword': ['swaps market', 'swaps report'], 'modified': 'R/P1W', 'programCode': ['000:000'], 'publisher': {'name': 'U.S. Commodity Futures Trading Commission', 'subOrganizationOf': {'name': 'U.S. Government'}}, 'title': 'Weekly Swaps Report'}" + }, + { + "key": "harvest_source_name", + "value": "test_harvest_source_name" + } + ], + "resources": [ + { + "cache_last_updated": null, + "cache_url": null, + "created": "2024-01-03T17:45:46.221358", + "description": "index.htm", + "format": "HTML", + "hash": "", + "id": "8d2a2b14-157c-4e5a-b274-365610d4176d", + "last_modified": null, + "metadata_modified": "2024-01-03T17:45:46.214194", + "mimetype": null, + "mimetype_inner": null, + "name": "Web Page", + "no_real_name": true, + "package_id": "44954eb7-076e-45e6-b8cc-cf9119e04f6d", + "position": 0, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://www.cftc.gov/MarketReports/SwapsReports/index.htm", + "url_type": null + } + ], + "tags": [ + { + "display_name": "swaps-market", + "id": "23d2277b-c098-4b34-a5db-f3670863795e", + "name": "swaps-market", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "swaps-report", + "id": "f1f77e07-57f0-4e77-a931-e6822a46ce72", + "name": "swaps-report", + "state": "active", + "vocabulary_id": null + } + ], + "groups": [], + "relationships_as_subject": [], + "relationships_as_object": [] + }, + { + "author": null, + "author_email": null, + "creator_user_id": "fe35904a-4956-478b-8d2b-3f25e6217600", + "id": "73912fa4-062c-4d0e-899c-3f0924c17552", + "isopen": false, + "license_id": null, + "license_title": null, + "maintainer": "Byung-IL Seo", + "maintainer_email": "BSeo@CFTC.gov", + "metadata_created": "2024-01-03T17:45:50.286869", + "metadata_modified": "2024-01-03T17:45:50.286874", + "name": "cleared-margin-reports", + "notes": "Derivatives clearing organizations (DCOs) are required to file daily reports on initial margin with the CFTC's Division of Clearing and Risk (DCR). Aggregate initial margin summary information for Chicago Mercantile Exchange (CME), ICE Clear Credit (ICC), ICE Clear US (ICUS), ICE Clear Europe (ICEU), LCH Ltd., and LCH SA is published below. The information will generally be updated within ten business days of the end of each month.", + "num_resources": 1, + "num_tags": 2, + "organization": { + "id": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "name": "test", + "title": "test", + "type": "organization", + "description": ".", + "image_url": "", + "created": "2023-04-14T13:44:38.462827", + "is_organization": true, + "approval_status": "approved", + "state": "active" + }, + "owner_org": "1c7ba64d-27d4-458c-bd6a-5a6dcbed5d81", + "private": false, + "state": "active", + "title": "Cleared Margin Reports", + "type": "dataset", + "url": null, + "version": null, + "extras": [ + { + "key": "publisher_hierarchy", + "value": "U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission > U.S. Government > U.S. Commodity Futures Trading Commission" + }, + { "key": "resource-type", "value": "Dataset" }, + { + "key": "publisher", + "value": "U.S. Commodity Futures Trading Commission" + }, + { "key": "accessLevel", "value": "public" }, + { "key": "bureauCode", "value": "339:00" }, + { "key": "identifier", "value": "cftc-dc7" }, + { "key": "modified", "value": "R/P1M" }, + { "key": "programCode", "value": "000:000" }, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['339:00'], 'contactPoint': {'fn': 'Byung-IL Seo', 'hasEmail': 'mailto:BSeo@CFTC.gov'}, 'describedBy': 'https://www.cftc.gov/MarketReports/ClearedMarginReports/index.htm', 'description': \"Derivatives clearing organizations (DCOs) are required to file daily reports on initial margin with the CFTC's Division of Clearing and Risk (DCR). Aggregate initial margin summary information for Chicago Mercantile Exchange (CME), ICE Clear Credit (ICC), ICE Clear US (ICUS), ICE Clear Europe (ICEU), LCH Ltd., and LCH SA is published below. The information will generally be updated within ten business days of the end of each month.\", 'distribution': [{'accessURL': 'https://www.cftc.gov/MarketReports/ClearedMarginReports/index.htm'}], 'identifier': 'cftc-dc7', 'keyword': ['exchange', 'margin'], 'modified': 'R/P1M', 'programCode': ['000:000'], 'publisher': {'name': 'U.S. Commodity Futures Trading Commission', 'subOrganizationOf': {'name': 'U.S. Government'}}, 'title': 'Cleared Margin Reports'}" + }, + { + "key": "harvest_source_name", + "value": "test_harvest_source_name" + } + ], + "resources": [ + { + "cache_last_updated": null, + "cache_url": null, + "created": "2024-01-03T17:45:50.288362", + "description": "index.htm", + "format": "HTML", + "hash": "", + "id": "30dfcaf9-233e-412b-9f48-3c2552cce409", + "last_modified": null, + "metadata_modified": "2024-01-03T17:45:50.280896", + "mimetype": null, + "mimetype_inner": null, + "name": "Web Page", + "no_real_name": true, + "package_id": "73912fa4-062c-4d0e-899c-3f0924c17552", + "position": 0, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://www.cftc.gov/MarketReports/ClearedMarginReports/index.htm", + "url_type": null + } + ], + "tags": [ + { + "display_name": "exchange", + "id": "bc369889-4ee2-4b6f-ac3e-dad1edd43e7b", + "name": "exchange", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "margin", + "id": "2b36390c-df68-48bc-9219-f3d215363e10", + "name": "margin", + "state": "active", + "vocabulary_id": null + } + ], + "groups": [], + "relationships_as_subject": [], + "relationships_as_object": [] + } + ], + "sort": "views_recent desc", + "search_facets": {} + } +} From 7bc029f0072347c65dd5a197548d98f24f0bd074 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:47:34 -0700 Subject: [PATCH 13/27] add dcatus comparison delta file. --- .../dcatus/dcatus_compare.json | 202 ++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 tests/harvest-sources/dcatus/dcatus_compare.json diff --git a/tests/harvest-sources/dcatus/dcatus_compare.json b/tests/harvest-sources/dcatus/dcatus_compare.json new file mode 100644 index 00000000..f2978071 --- /dev/null +++ b/tests/harvest-sources/dcatus/dcatus_compare.json @@ -0,0 +1,202 @@ +{ + "@context": "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld", + "@id": "http://www.cftc.gov/data.json", + "@type": "dcat:Catalog", + "conformsTo": "https://project-open-data.cio.gov/v1.1/schema", + "describedBy": "https://project-open-data.cio.gov/v1.1/schema/catalog.json", + "dataset": [ + { + "accessLevel": "public", + "bureauCode": ["339:00"], + "contactPoint": { + "fn": "Harold W. Hild", + "hasEmail": "mailto:hhild@CFTC.GOV" + }, + "describedBy": "https://www.cftc.gov/MarketReports/CommitmentsofTraders/ExplanatoryNotes/index.htm", + "description": "COT reports provide a breakdown of each Tuesday's open interest for futures and options on futures market in which 20 or more traders hold positions equal to or above the reporting levels established by CFTC", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/CommitmentsofTraders/index.htm" + } + ], + "identifier": "cftc-dc10", + "keyword": ["commitment of traders", "cot", "open interest"], + "modified": "R/P1W", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Commitment of Traders" + }, + { + "accessLevel": "public", + "bureauCode": ["339:00"], + "contactPoint": { + "fn": "Harold W. Hild", + "hasEmail": "mailto:hhild@CFTC.GOV" + }, + "describedBy": "https://www.cftc.gov/MarketReports/BankParticipationReports/ExplanatoryNotes/index.htm", + "description": "The Bank Participation Report (BPR), developed by the Division of Market Oversight to provide the U.S. banking authorities and the Bank for International Settlements (BIS, located in Basel, Switzerland) aggregate large-trader positions of banks participating in various financial and non-financial commodity futures.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/BankParticipationReports/index.htm" + } + ], + "identifier": "cftc-dc2", + "keyword": ["bank participation report", "bpr", "banking"], + "modified": "R/P1M Update", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Bank Participation Reports" + }, + { + "accessLevel": "public", + "bureauCode": ["339:00"], + "contactPoint": { + "fn": "Harold W. Hild", + "hasEmail": "mailto:hhild@CFTC.gov" + }, + "describedBy": "https://www.cftc.gov/MarketReports/CottonOnCall/index.htm", + "description": "Cotton On-Call Report shows the quantity of call cotton bought or sold on which the price has not been fixed, together with the respective futures on which the purchase or sale is based on.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/CottonOnCall/index.htm" + } + ], + "identifier": "cftc-dc3", + "keyword": ["cotton on call", "cotton on-call", "update keyword"], + "modified": "R/P1W", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Cotton On Call" + }, + { + "accessLevel": "public", + "bureauCode": ["339:00"], + "contactPoint": { + "fn": "Carrie L Coffin", + "hasEmail": "mailto:ccoffin@CFTC.gov" + }, + "describedBy": "https://www.cftc.gov/MarketReports/financialfcmdata/DescriptionofReportDataFields/indesx.htm", + "description": "Futures commission merchants (FCMs) and retail foreign exchange dealers (RFEDs) must file monthly financial reports with the CFTC's Market Participants Division (MPD) within 17 business days after the end of the month. Selected financial information from these reports is published.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/financialfcmdata/index.htm" + } + ], + "identifier": "cftc-dc4", + "keyword": [ + "fcm", + "retail foreign exchange", + "dealer", + "market participants", + "mpd", + "futures commission merchant" + ], + "modified": "R/P1M", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "Changed Value" + } + }, + "title": "Financial Data for FCMS" + }, + { + "accessLevel": "public", + "bureauCode": ["339:00"], + "contactPoint": { + "fn": "Byung-IL Seo", + "hasEmail": "mailto:BSeo@CFTC.gov" + }, + "describedBy": "https://www.cftc.gov/MarketReports/LgTraderExplanatory.html", + "description": "The Large Trader Net Position Changes and the Trading Account Net Position Changes data provides the public with a view of the amount of trading that results in net changes to positions at the trader level and at the account level. The data reflects trading that changes or creates an end-of-day position, as contrasted with trading that does not change a trader’s end-of-day net position, such as spread or day trading.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/NetPositionChangesData/index.htm" + } + ], + "identifier": "cftc-dc5", + "keyword": [ + "net positions", + "larger trader net position", + "trading account net positions" + ], + "modified": "2011-06-30", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Net Positions Changes Data" + }, + { + "accessLevel": "public", + "bureauCode": ["339:00"], + "contactPoint": { + "fn": "Richard Haynes", + "hasEmail": "mailto:RHaynes@CFTC.gov" + }, + "describedBy": "https://www.cftc.gov/MarketReports/SwapsReports/DataDictionary/index.htm", + "description": "The CFTC Swaps Report aggregates a comprehensive body of swap market data that was not previously reported to regulators or regulated entities, and makes that information freely available in a form that is readily usable by both market participants and the general public. The swaps market data included in publications produced by entities such as the BIS, ISDA, and the Office of the Comptroller of the Currency vary in scope and granularity, but none corresponds directly to the data stored in the CFTC's SDRs.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/SwapsReports/index.htm" + } + ], + "identifier": "cftc-dc6", + "keyword": ["swaps report", "swaps market"], + "modified": "R/P1W", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Weekly Swaps Report" + }, + { + "accessLevel": "public", + "bureauCode": ["339:00"], + "contactPoint": { + "fn": "Byung-IL Seo", + "hasEmail": "mailto:BSeo@CFTC.gov" + }, + "describedBy": "https://www.cftc.gov/MarketReports/ClearedMarginReports/index.htm", + "description": "Derivatives clearing organizations (DCOs) are required to file daily reports on initial margin with the CFTC's Division of Clearing and Risk (DCR). Aggregate initial margin summary information for Chicago Mercantile Exchange (CME), ICE Clear Credit (ICC), ICE Clear US (ICUS), ICE Clear Europe (ICEU), LCH Ltd., and LCH SA is published below. The information will generally be updated within ten business days of the end of each month.", + "distribution": [ + { + "accessURL": "https://www.cftc.gov/MarketReports/ClearedMarginReports/index.htm" + } + ], + "identifier": "cftc-dc7", + "keyword": ["margin", "exchange"], + "modified": "R/P1M", + "programCode": ["000:000"], + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Cleared Margin Reports" + } + ] +} From 068a8663d82647b80e28bb1a1989513d15316495 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:47:57 -0700 Subject: [PATCH 14/27] add integration fixtures and test for comparison --- tests/integration/compare/conftest.py | 54 +++++++++++++++++++ tests/integration/compare/test_compare_int.py | 21 ++++++++ 2 files changed, 75 insertions(+) create mode 100644 tests/integration/compare/conftest.py create mode 100644 tests/integration/compare/test_compare_int.py diff --git a/tests/integration/compare/conftest.py b/tests/integration/compare/conftest.py new file mode 100644 index 00000000..12f39f63 --- /dev/null +++ b/tests/integration/compare/conftest.py @@ -0,0 +1,54 @@ +import pytest +from pathlib import Path +import os +from harvester.utils.json import open_json +from harvester.utils.util import sort_dataset, dataset_to_hash +from harvester.load import search_ckan, create_ckan_entrypoint + +TEST_DIR = Path(__file__).parents[2] +HARVEST_SOURCES = TEST_DIR / "harvest-sources" + + +@pytest.fixture +def ckan_entrypoint(): + catalog_dev_api_key = os.getenv("CKAN_API_TOKEN_DEV") # gha + if catalog_dev_api_key is None: # local + import credentials + + catalog_dev_api_key = credentials.ckan_catalog_dev_api_key + + return create_ckan_entrypoint("https://catalog-dev.data.gov/", catalog_dev_api_key) + + +@pytest.fixture +def data_sources(ckan_entrypoint): + harvest_source_datasets = open_json( + HARVEST_SOURCES / "dcatus" / "dcatus_compare.json" + )["dataset"] + + harvest_source = {} + for d in harvest_source_datasets: + harvest_source[d["identifier"]] = dataset_to_hash( + sort_dataset(d) + ) # the extract needs to be sorted + + ckan_source_datasets = search_ckan( + ckan_entrypoint, {"q": 'harvest_source_name:"test_harvest_source_name"'} + )["results"] + + ckan_source = {} + + for d in ckan_source_datasets: + orig_meta = None + orig_id = None + for e in d["extras"]: + if e["key"] == "dcat_metadata": + orig_meta = eval(e["value"], {"__builtins__": {}}) + if e["key"] == "identifier": + orig_id = e["value"] + + ckan_source[orig_id] = dataset_to_hash( + orig_meta + ) # the response is stored sorted + + return harvest_source, ckan_source diff --git a/tests/integration/compare/test_compare_int.py b/tests/integration/compare/test_compare_int.py new file mode 100644 index 00000000..7dfd67ad --- /dev/null +++ b/tests/integration/compare/test_compare_int.py @@ -0,0 +1,21 @@ +from harvester.load import ( + create_ckan_package, + purge_ckan_package, + update_ckan_package, + dcatus_to_ckan, +) +from harvester.compare import compare +from harvester.utils.json import open_json + +from pathlib import Path + +TEST_DIR = Path(__file__).parents[3] +HARVEST_SOURCES = TEST_DIR / "harvest-sources" + + +def test_compare(data_sources): + compare_res = compare(*data_sources) + + assert len(compare_res["create"]) == 1 + assert len(compare_res["update"]) == 3 + assert len(compare_res["delete"]) == 1 From 64c7d16ab97e2553d10c958bb1f543ec30a9f69f Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:48:38 -0700 Subject: [PATCH 15/27] add real comparison fixture and test --- tests/unit/compare/conftest.py | 43 +++++++++++++++++++++++++++++- tests/unit/compare/test_compare.py | 15 ++++++++--- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/tests/unit/compare/conftest.py b/tests/unit/compare/conftest.py index ebc07305..cffbd863 100644 --- a/tests/unit/compare/conftest.py +++ b/tests/unit/compare/conftest.py @@ -1,8 +1,15 @@ import pytest +from pathlib import Path +import json +from harvester.utils.json import open_json +from harvester.utils.util import sort_dataset, dataset_to_hash + +TEST_DIR = Path(__file__).parents[2] +HARVEST_SOURCES = TEST_DIR / "harvest-sources" @pytest.fixture -def data_sources(): +def artificial_data_sources(): harvest_source = { "1": "de955c1b-fa16-4b84-ad6c-f891ba276056", # update "2": "6d500ebc-19f8-4541-82b0-f02ad24c82e3", # do nothing @@ -16,3 +23,37 @@ def data_sources(): } return harvest_source, ckan_source + + +@pytest.fixture +def data_sources(): + harvest_source_datasets = open_json( + HARVEST_SOURCES / "dcatus" / "dcatus_compare.json" + )["dataset"] + + harvest_source = {} + for d in harvest_source_datasets: + harvest_source[d["identifier"]] = dataset_to_hash( + sort_dataset(d) + ) # the extract needs to be sorted + + ckan_source_datasets = open_json( + HARVEST_SOURCES / "dcatus" / "ckan_datasets_resp.json" + )["result"]["results"] + + ckan_source = {} + + for d in ckan_source_datasets: + orig_meta = None + orig_id = None + for e in d["extras"]: + if e["key"] == "dcat_metadata": + orig_meta = eval(e["value"], {"__builtins__": {}}) + if e["key"] == "identifier": + orig_id = e["value"] + + ckan_source[orig_id] = dataset_to_hash( + orig_meta + ) # the response is stored sorted + + return harvest_source, ckan_source diff --git a/tests/unit/compare/test_compare.py b/tests/unit/compare/test_compare.py index e9da74d1..d07bc289 100644 --- a/tests/unit/compare/test_compare.py +++ b/tests/unit/compare/test_compare.py @@ -1,12 +1,19 @@ from harvester.compare import compare -def test_compare(data_sources): - """tests compare""" +def test_artificial_compare(artificial_data_sources): + """tests artificial datasets compare""" - compare_res = compare(*data_sources) + compare_res = compare(*artificial_data_sources) assert len(compare_res["create"]) == 1 assert len(compare_res["update"]) == 1 assert len(compare_res["delete"]) == 1 - assert "2" not in compare_res + + +def test_compare(data_sources): + compare_res = compare(*data_sources) + + assert len(compare_res["create"]) == 1 + assert len(compare_res["update"]) == 3 + assert len(compare_res["delete"]) == 1 From 851382f519a7c653756324848545b142819a282c Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:48:53 -0700 Subject: [PATCH 16/27] move fixture to integration test --- tests/unit/load/ckan/conftest.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/unit/load/ckan/conftest.py b/tests/unit/load/ckan/conftest.py index 57bdcb87..57d5826b 100644 --- a/tests/unit/load/ckan/conftest.py +++ b/tests/unit/load/ckan/conftest.py @@ -10,15 +10,15 @@ HARVEST_SOURCES = TEST_DIR / "harvest-sources" -@pytest.fixture -def ckan_entrypoint(): - catalog_dev_api_key = os.getenv("CKAN_API_TOKEN_DEV") # gha - if catalog_dev_api_key is None: # local - import credentials +# @pytest.fixture +# def ckan_entrypoint(): +# catalog_dev_api_key = os.getenv("CKAN_API_TOKEN_DEV") # gha +# if catalog_dev_api_key is None: # local +# import credentials - catalog_dev_api_key = credentials.ckan_catalog_dev_api_key +# catalog_dev_api_key = credentials.ckan_catalog_dev_api_key - return create_ckan_entrypoint("https://catalog-dev.data.gov/", catalog_dev_api_key) +# return create_ckan_entrypoint("https://catalog-dev.data.gov/", catalog_dev_api_key) @pytest.fixture @@ -33,7 +33,7 @@ def test_dcatus_catalog(): @pytest.fixture def test_ckan_package(test_ckan_package_id, test_dcatus_catalog): - ckan_dataset = dcatus_to_ckan(test_dcatus_catalog) + ckan_dataset = dcatus_to_ckan(test_dcatus_catalog, "test_harvest_source_name") ckan_dataset["id"] = test_ckan_package_id return ckan_dataset From 67f0592195a5ec58560e0b6dd1b9a506ff342a27 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:49:15 -0700 Subject: [PATCH 17/27] update expected result. --- tests/unit/load/ckan/test_ckan_cud.py | 34 ++++++++++++++++----------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tests/unit/load/ckan/test_ckan_cud.py b/tests/unit/load/ckan/test_ckan_cud.py index 5f5e4415..daaffee8 100644 --- a/tests/unit/load/ckan/test_ckan_cud.py +++ b/tests/unit/load/ckan/test_ckan_cud.py @@ -9,19 +9,14 @@ def test_dcatus_to_ckan_transform(test_dcatus_catalog): expected_result = { "name": "fdic-failed-bank-list", "owner_org": "test", + "identifier": "https://www.fdic.gov/bank/individual/failed/", "maintainer": "FDIC Public Data Feedback", "maintainer_email": "FDICPublicDataFeedback@fdic.gov", - "notes": "The FDIC is often appointed as receiver for failed banks. This list includes banks which have failed since October 1, 2000.", # noqa E501 + "notes": "The FDIC is often appointed as receiver for failed banks. This list includes banks which have failed since October 1, 2000.", "title": "FDIC Failed Bank List", "resources": [ - { - "url": "https://www.fdic.gov/bank/individual/failed/banklist.csv", - "mimetype": "text/csv", - }, - { - "url": "https://www.fdic.gov/bank/individual/failed/index.html", - "mimetype": "text/html", - }, + {"url": "https://www.fdic.gov/bank/individual/failed/banklist.csv"}, + {"url": "https://www.fdic.gov/bank/individual/failed/index.html"}, ], "tags": [ {"name": "financial-institution"}, @@ -32,32 +27,43 @@ def test_dcatus_to_ckan_transform(test_dcatus_catalog): "extras": [ { "key": "publisher_hierarchy", - "value": "U.S. Government > Federal Deposit Insurance Corporation > Division of Insurance and Research", # noqa E501 + "value": "U.S. Government > Federal Deposit Insurance Corporation > Division of Insurance and Research", }, {"key": "resource-type", "value": "Dataset"}, {"key": "publisher", "value": "Division of Insurance and Research"}, {"key": "accessLevel", "value": "public"}, - {"key": "bureauCode", "value": ["357:20"]}, + {"key": "bureauCode", "value": "357:20"}, { "key": "identifier", "value": "https://www.fdic.gov/bank/individual/failed/", }, {"key": "modified", "value": "R/P1W"}, - {"key": "programCode", "value": ["000:000"]}, + {"key": "programCode", "value": "000:000"}, {"key": "publisher", "value": "Division of Insurance and Research"}, { "key": "publisher_hierarchy", - "value": "U.S. Government > Federal Deposit Insurance Corporation > Division of Insurance and Research", # noqa E501 + "value": "U.S. Government > Federal Deposit Insurance Corporation > Division of Insurance and Research", }, {"key": "resource-type", "value": "Dataset"}, {"key": "publisher", "value": "Division of Insurance and Research"}, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['357:20'], 'contactPoint': {'fn': 'FDIC Public Data Feedback', 'hasEmail': 'mailto:FDICPublicDataFeedback@fdic.gov'}, 'description': 'The FDIC is often appointed as receiver for failed banks. This list includes banks which have failed since October 1, 2000.', 'distribution': [{'accessURL': 'https://www.fdic.gov/bank/individual/failed/index.html', 'mediaType': 'text/html'}, {'downloadURL': 'https://www.fdic.gov/bank/individual/failed/banklist.csv', 'mediaType': 'text/csv'}], 'identifier': 'https://www.fdic.gov/bank/individual/failed/', 'keyword': ['assistance transactions', 'banks', 'failures', 'financial institution'], 'modified': 'R/P1W', 'programCode': ['000:000'], 'publisher': {'name': 'Division of Insurance and Research', 'subOrganizationOf': {'name': 'Federal Deposit Insurance Corporation', 'subOrganizationOf': {'name': 'U.S. Government'}}}, 'title': 'FDIC Failed Bank List'}", + }, + {"key": "harvest_source_name", "value": "example_harvest_source_name"}, ], "author": None, "author_email": None, } assert ( - DeepDiff(harvester.dcatus_to_ckan(test_dcatus_catalog), expected_result) == {} + DeepDiff( + harvester.dcatus_to_ckan( + test_dcatus_catalog, "example_harvest_source_name" + ), + expected_result, + ) + == {} ) From 89614490fa98b807b8957f36772f52000c621752 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:53:08 -0700 Subject: [PATCH 18/27] fix lint issues --- tests/integration/compare/test_compare_int.py | 12 ------------ tests/unit/compare/conftest.py | 1 - tests/unit/load/ckan/conftest.py | 16 +--------------- tests/unit/load/ckan/test_ckan_cud.py | 1 + 4 files changed, 2 insertions(+), 28 deletions(-) diff --git a/tests/integration/compare/test_compare_int.py b/tests/integration/compare/test_compare_int.py index 7dfd67ad..4c65d917 100644 --- a/tests/integration/compare/test_compare_int.py +++ b/tests/integration/compare/test_compare_int.py @@ -1,16 +1,4 @@ -from harvester.load import ( - create_ckan_package, - purge_ckan_package, - update_ckan_package, - dcatus_to_ckan, -) from harvester.compare import compare -from harvester.utils.json import open_json - -from pathlib import Path - -TEST_DIR = Path(__file__).parents[3] -HARVEST_SOURCES = TEST_DIR / "harvest-sources" def test_compare(data_sources): diff --git a/tests/unit/compare/conftest.py b/tests/unit/compare/conftest.py index cffbd863..a104793a 100644 --- a/tests/unit/compare/conftest.py +++ b/tests/unit/compare/conftest.py @@ -1,6 +1,5 @@ import pytest from pathlib import Path -import json from harvester.utils.json import open_json from harvester.utils.util import sort_dataset, dataset_to_hash diff --git a/tests/unit/load/ckan/conftest.py b/tests/unit/load/ckan/conftest.py index 57d5826b..c6067abc 100644 --- a/tests/unit/load/ckan/conftest.py +++ b/tests/unit/load/ckan/conftest.py @@ -1,26 +1,12 @@ -import os from pathlib import Path - import pytest - -from harvester.load import create_ckan_entrypoint, dcatus_to_ckan +from harvester.load import dcatus_to_ckan from harvester.utils.json import open_json TEST_DIR = Path(__file__).parents[3] HARVEST_SOURCES = TEST_DIR / "harvest-sources" -# @pytest.fixture -# def ckan_entrypoint(): -# catalog_dev_api_key = os.getenv("CKAN_API_TOKEN_DEV") # gha -# if catalog_dev_api_key is None: # local -# import credentials - -# catalog_dev_api_key = credentials.ckan_catalog_dev_api_key - -# return create_ckan_entrypoint("https://catalog-dev.data.gov/", catalog_dev_api_key) - - @pytest.fixture def test_ckan_package_id(): return "e875348b-a7c3-47eb-b0c3-168d978b0c0f" diff --git a/tests/unit/load/ckan/test_ckan_cud.py b/tests/unit/load/ckan/test_ckan_cud.py index daaffee8..ec0b9d94 100644 --- a/tests/unit/load/ckan/test_ckan_cud.py +++ b/tests/unit/load/ckan/test_ckan_cud.py @@ -6,6 +6,7 @@ def test_dcatus_to_ckan_transform(test_dcatus_catalog): + # ruff: noqa: E501 expected_result = { "name": "fdic-failed-bank-list", "owner_org": "test", From d419c50444e2e9488a9b6ed2e5006260f336fea5 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:56:02 -0700 Subject: [PATCH 19/27] bump pypi version to next minor. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 85d1cbe9..05eb58f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datagov-harvesting-logic" -version = "0.0.4" +version = "0.1.0" description = "" # authors = [ # {name = "Jin Sun", email = "jin.sun@gsa.gov"}, From 9661590944f725cb1383228b5aeca490a94e54c7 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 12:58:27 -0700 Subject: [PATCH 20/27] add back ckan entrypoint fixture --- tests/unit/load/ckan/conftest.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/unit/load/ckan/conftest.py b/tests/unit/load/ckan/conftest.py index c6067abc..a71f28d2 100644 --- a/tests/unit/load/ckan/conftest.py +++ b/tests/unit/load/ckan/conftest.py @@ -1,12 +1,24 @@ from pathlib import Path import pytest -from harvester.load import dcatus_to_ckan +import os +from harvester.load import dcatus_to_ckan, create_ckan_entrypoint from harvester.utils.json import open_json TEST_DIR = Path(__file__).parents[3] HARVEST_SOURCES = TEST_DIR / "harvest-sources" +@pytest.fixture +def ckan_entrypoint(): + catalog_dev_api_key = os.getenv("CKAN_API_TOKEN_DEV") # gha + if catalog_dev_api_key is None: # local + import credentials + + catalog_dev_api_key = credentials.ckan_catalog_dev_api_key + + return create_ckan_entrypoint("https://catalog-dev.data.gov/", catalog_dev_api_key) + + @pytest.fixture def test_ckan_package_id(): return "e875348b-a7c3-47eb-b0c3-168d978b0c0f" From f2dcc75c70da5e387b77fa76ce5421327487897e Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 13:42:28 -0700 Subject: [PATCH 21/27] reorder keys in all datasets. --- tests/harvest-sources/dcatus/dcatus.json | 48 ++++++++++++------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/harvest-sources/dcatus/dcatus.json b/tests/harvest-sources/dcatus/dcatus.json index fc5f55e3..01f89791 100644 --- a/tests/harvest-sources/dcatus/dcatus.json +++ b/tests/harvest-sources/dcatus/dcatus.json @@ -6,8 +6,6 @@ "describedBy": "https://project-open-data.cio.gov/v1.1/schema/catalog.json", "dataset": [ { - "accessLevel": "public", - "bureauCode": ["339:00"], "contactPoint": { "fn": "Harold W. Hild", "hasEmail": "mailto:hhild@CFTC.GOV" @@ -19,8 +17,6 @@ "accessURL": "https://www.cftc.gov/MarketReports/CommitmentsofTraders/index.htm" } ], - "identifier": "cftc-dc1", - "keyword": ["commitment of traders", "cot", "open interest"], "modified": "R/P1W", "programCode": ["000:000"], "publisher": { @@ -29,7 +25,11 @@ "name": "U.S. Government" } }, - "title": "Commitment of Traders" + "title": "Commitment of Traders", + "accessLevel": "public", + "bureauCode": ["339:00"], + "identifier": "cftc-dc1", + "keyword": ["commitment of traders", "cot", "open interest"] }, { "accessLevel": "public", @@ -106,17 +106,23 @@ "mpd", "futures commission merchant" ], - "modified": "R/P1M", - "programCode": ["000:000"], "publisher": { "name": "U.S. Commodity Futures Trading Commission", "subOrganizationOf": { "name": "U.S. Government" } }, - "title": "Financial Data for FCMS" + "title": "Financial Data for FCMS", + "modified": "R/P1M", + "programCode": ["000:000"] }, { + "identifier": "cftc-dc5", + "keyword": [ + "net positions", + "larger trader net position", + "trading account net positions" + ], "accessLevel": "public", "bureauCode": ["339:00"], "contactPoint": { @@ -130,12 +136,6 @@ "accessURL": "https://www.cftc.gov/MarketReports/NetPositionChangesData/index.htm" } ], - "identifier": "cftc-dc5", - "keyword": [ - "net positions", - "larger trader net position", - "trading account net positions" - ], "modified": "2011-06-30", "programCode": ["000:000"], "publisher": { @@ -160,6 +160,7 @@ "accessURL": "https://www.cftc.gov/MarketReports/SwapsReports/index.htm" } ], + "title": "Weekly Swaps Report", "identifier": "cftc-dc6", "keyword": ["swaps report", "swaps market"], "modified": "R/P1W", @@ -169,8 +170,7 @@ "subOrganizationOf": { "name": "U.S. Government" } - }, - "title": "Weekly Swaps Report" + } }, { "accessLevel": "public", @@ -179,6 +179,13 @@ "fn": "Byung-IL Seo", "hasEmail": "mailto:BSeo@CFTC.gov" }, + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + "name": "U.S. Government" + } + }, + "title": "Cleared Margin Reports", "describedBy": "https://www.cftc.gov/MarketReports/ClearedMarginReports/index.htm", "description": "Derivatives clearing organizations (DCOs) are required to file daily reports on initial margin with the CFTC's Division of Clearing and Risk (DCR). Aggregate initial margin summary information for Chicago Mercantile Exchange (CME), ICE Clear Credit (ICC), ICE Clear US (ICUS), ICE Clear Europe (ICEU), LCH Ltd., and LCH SA is published below. The information will generally be updated within ten business days of the end of each month.", "distribution": [ @@ -189,14 +196,7 @@ "identifier": "cftc-dc7", "keyword": ["margin", "exchange"], "modified": "R/P1M", - "programCode": ["000:000"], - "publisher": { - "name": "U.S. Commodity Futures Trading Commission", - "subOrganizationOf": { - "name": "U.S. Government" - } - }, - "title": "Cleared Margin Reports" + "programCode": ["000:000"] } ] } From 09de3ed6c8d8c5d052bc2258b920609161bb5a31 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 13:42:53 -0700 Subject: [PATCH 22/27] add raw source fixture and with/without sort test. --- tests/unit/compare/conftest.py | 28 ++++++++++++++++++++++++++++ tests/unit/compare/test_compare.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/tests/unit/compare/conftest.py b/tests/unit/compare/conftest.py index a104793a..205dcc34 100644 --- a/tests/unit/compare/conftest.py +++ b/tests/unit/compare/conftest.py @@ -56,3 +56,31 @@ def data_sources(): ) # the response is stored sorted return harvest_source, ckan_source + + +@pytest.fixture +def data_sources_raw(): + harvest_source_datasets = open_json( + HARVEST_SOURCES / "dcatus" / "dcatus_compare.json" + )["dataset"] + + harvest_source = {d["identifier"]: d for d in harvest_source_datasets} + + ckan_source_datasets = open_json( + HARVEST_SOURCES / "dcatus" / "ckan_datasets_resp.json" + )["result"]["results"] + + ckan_source = {} + + for d in ckan_source_datasets: + orig_meta = None + orig_id = None + for e in d["extras"]: + if e["key"] == "dcat_metadata": + orig_meta = eval(e["value"], {"__builtins__": {}}) + if e["key"] == "identifier": + orig_id = e["value"] + + ckan_source[orig_id] = orig_meta + + return harvest_source, ckan_source diff --git a/tests/unit/compare/test_compare.py b/tests/unit/compare/test_compare.py index d07bc289..e1122294 100644 --- a/tests/unit/compare/test_compare.py +++ b/tests/unit/compare/test_compare.py @@ -1,4 +1,5 @@ from harvester.compare import compare +from harvester.utils.util import sort_dataset, dataset_to_hash def test_artificial_compare(artificial_data_sources): @@ -17,3 +18,32 @@ def test_compare(data_sources): assert len(compare_res["create"]) == 1 assert len(compare_res["update"]) == 3 assert len(compare_res["delete"]) == 1 + + +def test_sort(data_sources_raw): + harvest_source, ckan_source = data_sources_raw + + harvest_source_no_sort = harvest_source.copy() + for k, v in harvest_source_no_sort.items(): + harvest_source_no_sort[k] = dataset_to_hash(v) + + for k, v in ckan_source.items(): + ckan_source[k] = dataset_to_hash(v) + + compare_res_no_sort = compare(harvest_source_no_sort, ckan_source) + + # more datasets need to be updated simply because we didn't sort them + assert len(compare_res_no_sort["create"]) == 1 + assert len(compare_res_no_sort["update"]) == 6 + assert len(compare_res_no_sort["delete"]) == 1 + + harvest_source_with_sort = harvest_source.copy() + for k, v in harvest_source_with_sort.items(): + harvest_source_with_sort[k] = dataset_to_hash(sort_dataset(v)) + + compare_res = compare(harvest_source_with_sort, ckan_source) + + # applying the sort lowers us back down to what we expect. + assert len(compare_res["create"]) == 1 + assert len(compare_res["update"]) == 3 + assert len(compare_res["delete"]) == 1 From c259e33ee3de96d2b68b1772b98a3aa9223e9926 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 14:51:58 -0700 Subject: [PATCH 23/27] add notes --- tests/unit/compare/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/compare/conftest.py b/tests/unit/compare/conftest.py index 205dcc34..684e6fa4 100644 --- a/tests/unit/compare/conftest.py +++ b/tests/unit/compare/conftest.py @@ -9,6 +9,8 @@ @pytest.fixture def artificial_data_sources(): + # key = dataset identifier + # value = hash value of the dataset harvest_source = { "1": "de955c1b-fa16-4b84-ad6c-f891ba276056", # update "2": "6d500ebc-19f8-4541-82b0-f02ad24c82e3", # do nothing From 8cfe9876f807cd4eab57fba45bc491194954c063 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Wed, 3 Jan 2024 14:53:10 -0700 Subject: [PATCH 24/27] limit field returns from query --- tests/integration/compare/conftest.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integration/compare/conftest.py b/tests/integration/compare/conftest.py index 12f39f63..7ab08b4e 100644 --- a/tests/integration/compare/conftest.py +++ b/tests/integration/compare/conftest.py @@ -33,22 +33,22 @@ def data_sources(ckan_entrypoint): ) # the extract needs to be sorted ckan_source_datasets = search_ckan( - ckan_entrypoint, {"q": 'harvest_source_name:"test_harvest_source_name"'} + ckan_entrypoint, + { + "q": 'harvest_source_name:"test_harvest_source_name"', + "fl": [ + "extras_harvest_source_name", + "extras_dcat_metadata", + "extras_identifier", + ], + }, )["results"] ckan_source = {} for d in ckan_source_datasets: - orig_meta = None - orig_id = None - for e in d["extras"]: - if e["key"] == "dcat_metadata": - orig_meta = eval(e["value"], {"__builtins__": {}}) - if e["key"] == "identifier": - orig_id = e["value"] - - ckan_source[orig_id] = dataset_to_hash( - orig_meta + ckan_source[d["identifier"]] = dataset_to_hash( + eval(d["dcat_metadata"], {"__builtins__": {}}) ) # the response is stored sorted return harvest_source, ckan_source From b61562cc7fdc803562fc78165179e48de73ceb46 Mon Sep 17 00:00:00 2001 From: robert-bryson Date: Wed, 3 Jan 2024 14:52:51 -0800 Subject: [PATCH 25/27] isort lint --- harvester/__init__.py | 10 ++-------- harvester/load.py | 2 ++ harvester/utils/__init__.py | 3 +-- harvester/utils/util.py | 3 ++- tests/integration/compare/conftest.py | 10 ++++++---- tests/integration/load/ckan/test_ckan_cud_int.py | 8 ++------ tests/unit/compare/conftest.py | 6 ++++-- tests/unit/compare/test_compare.py | 2 +- tests/unit/load/ckan/conftest.py | 6 ++++-- 9 files changed, 24 insertions(+), 26 deletions(-) diff --git a/harvester/__init__.py b/harvester/__init__.py index 8f14b9fe..7723990a 100644 --- a/harvester/__init__.py +++ b/harvester/__init__.py @@ -22,14 +22,8 @@ # TODO these imports will need to be updated to ensure a consistent api from .compare import compare from .extract import download_waf, extract, traverse_waf -from .load import ( - create_ckan_package, - dcatus_to_ckan, - load, - patch_ckan_package, - purge_ckan_package, - update_ckan_package, -) +from .load import (create_ckan_package, dcatus_to_ckan, load, + patch_ckan_package, purge_ckan_package, update_ckan_package) from .transform import transform from .utils import * from .validate import * diff --git a/harvester/load.py b/harvester/load.py index 0edab523..24fb1c59 100644 --- a/harvester/load.py +++ b/harvester/load.py @@ -1,6 +1,8 @@ import logging import re + import ckanapi + from harvester.utils.util import sort_dataset logger = logging.getLogger("harvester") diff --git a/harvester/utils/__init__.py b/harvester/utils/__init__.py index 7437333e..7044a5b5 100644 --- a/harvester/utils/__init__.py +++ b/harvester/utils/__init__.py @@ -1,4 +1,3 @@ -from . import json -from . import util +from . import json, util __all__ = ["json", "util"] diff --git a/harvester/utils/util.py b/harvester/utils/util.py index 6721adfd..81174a6c 100644 --- a/harvester/utils/util.py +++ b/harvester/utils/util.py @@ -1,7 +1,8 @@ import hashlib -import sansjson import json +import sansjson + def sort_dataset(d): return sansjson.sort_pyobject(d) diff --git a/tests/integration/compare/conftest.py b/tests/integration/compare/conftest.py index 7ab08b4e..b9114a5b 100644 --- a/tests/integration/compare/conftest.py +++ b/tests/integration/compare/conftest.py @@ -1,9 +1,11 @@ -import pytest -from pathlib import Path import os +from pathlib import Path + +import pytest + +from harvester.load import create_ckan_entrypoint, search_ckan from harvester.utils.json import open_json -from harvester.utils.util import sort_dataset, dataset_to_hash -from harvester.load import search_ckan, create_ckan_entrypoint +from harvester.utils.util import dataset_to_hash, sort_dataset TEST_DIR = Path(__file__).parents[2] HARVEST_SOURCES = TEST_DIR / "harvest-sources" diff --git a/tests/integration/load/ckan/test_ckan_cud_int.py b/tests/integration/load/ckan/test_ckan_cud_int.py index 3d0dffac..a7f25bad 100644 --- a/tests/integration/load/ckan/test_ckan_cud_int.py +++ b/tests/integration/load/ckan/test_ckan_cud_int.py @@ -1,9 +1,5 @@ -from harvester.load import ( - create_ckan_package, - patch_ckan_package, - purge_ckan_package, - update_ckan_package, -) +from harvester.load import (create_ckan_package, patch_ckan_package, + purge_ckan_package, update_ckan_package) def test_create_package(ckan_entrypoint, test_ckan_package): diff --git a/tests/unit/compare/conftest.py b/tests/unit/compare/conftest.py index 684e6fa4..2ccd15cb 100644 --- a/tests/unit/compare/conftest.py +++ b/tests/unit/compare/conftest.py @@ -1,7 +1,9 @@ -import pytest from pathlib import Path + +import pytest + from harvester.utils.json import open_json -from harvester.utils.util import sort_dataset, dataset_to_hash +from harvester.utils.util import dataset_to_hash, sort_dataset TEST_DIR = Path(__file__).parents[2] HARVEST_SOURCES = TEST_DIR / "harvest-sources" diff --git a/tests/unit/compare/test_compare.py b/tests/unit/compare/test_compare.py index e1122294..84bca11d 100644 --- a/tests/unit/compare/test_compare.py +++ b/tests/unit/compare/test_compare.py @@ -1,5 +1,5 @@ from harvester.compare import compare -from harvester.utils.util import sort_dataset, dataset_to_hash +from harvester.utils.util import dataset_to_hash, sort_dataset def test_artificial_compare(artificial_data_sources): diff --git a/tests/unit/load/ckan/conftest.py b/tests/unit/load/ckan/conftest.py index a71f28d2..47d5b453 100644 --- a/tests/unit/load/ckan/conftest.py +++ b/tests/unit/load/ckan/conftest.py @@ -1,7 +1,9 @@ +import os from pathlib import Path + import pytest -import os -from harvester.load import dcatus_to_ckan, create_ckan_entrypoint + +from harvester.load import create_ckan_entrypoint, dcatus_to_ckan from harvester.utils.json import open_json TEST_DIR = Path(__file__).parents[3] From c66c4c2fccf4b6d84d86e37072f6ef1daad59ea9 Mon Sep 17 00:00:00 2001 From: robert-bryson Date: Wed, 3 Jan 2024 15:25:54 -0800 Subject: [PATCH 26/27] Readme.md fixup --- README.md | 104 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 46fa7321..dba131f8 100644 --- a/README.md +++ b/README.md @@ -5,68 +5,86 @@ transformation, and loading into the data.gov catalog. ## Features -The datagov-harvesting-logic offers the following features: - - Extract - - general purpose fetching and downloading of web resources. - - catered extraction to the following data formats: + - General purpose fetching and downloading of web resources. + - Catered extraction to the following data formats: - DCAT-US - Validation - DCAT-US - - jsonschema validation using draft 2020-12. + - `jsonschema` validation using draft 2020-12. - Load - DCAT-US - - conversion of dcatu-us catalog into ckan dataset schema - - create, delete, update, and patch of ckan package/dataset + - Conversion of dcat-us catalog into ckan dataset schema + - Create, delete, update, and patch of ckan package/dataset ## Requirements -This project is using poetry to manage this project. Install [here](https://python-poetry.org/docs/#installation). +This project is using `poetry` to manage this project. Install [here](https://python-poetry.org/docs/#installation). Once installed, `poetry install` installs dependencies into a local virtual environment. ## Testing + ### CKAN load testing + - CKAN load testing doesn't require the services provided in the `docker-compose.yml`. - [catalog-dev](https://catalog-dev.data.gov/) is used for ckan load testing. -- Create an api-key by signing into catalog-dev. +- Create an api-key by signing into catalog-dev. - Create a `credentials.py` file at the root of the project containing the variable `ckan_catalog_dev_api_key` assigned to the api-key. -- run tests with the command `poetry run pytest ./tests/load/ckan` +- Run tests with the command `poetry run pytest ./tests/load/ckan` + ### Harvester testing -- These tests are found in `extract`, and `validate`. Some of them rely on services in the `docker-compose.yml`. run using docker `docker compose up -d` and with the command `poetry run pytest --ignore=./tests/load/ckan`. + +- These tests are found in `extract`, and `validate`. Some of them rely on services in the `docker-compose.yml`. Run using docker `docker compose up -d` and with the command `poetry run pytest --ignore=./tests/load/ckan`. If you followed the instructions for `CKAN load testing` and `Harvester testing` you can simply run `poetry run pytest` to run all tests. +## Comparison + +- `./tests/harvest_sources/ckan_datasets_resp.json` + - Represents what ckan would respond with after querying for the harvest source name +- `./tests/harvest_sources/dcatus_compare.json` + - Represents a changed harvest source + - Created: + - datasets[0] + + ```diff + + "identifier" = "cftc-dc10" + ``` + + - Deleted: + - datasets[0] + + ```diff + - "identifier" = "cftc-dc1" + ``` + + - Updated: + - datasets[1] + + ```diff + - "modified": "R/P1M" + + "modified": "R/P1M Update" + ``` + + - datasets[2] + + ```diff + - "keyword": ["cotton on call", "cotton on-call"] + + "keyword": ["cotton on call", "cotton on-call", "update keyword"] + ``` + + - datasets[3] + + ```diff + "publisher": { + "name": "U.S. Commodity Futures Trading Commission", + "subOrganizationOf": { + - "name": "U.S. Government" + + "name": "Changed Value" + } + } + ``` -## Comparison -- ./tests/harvest_sources/ckan_datasets_resp.json - - represents what ckan would respond with after querying for the harvest source name -- ./tests/harvest_sources/dcatus_compare.json - - represents a changed harvest source - - what has been created? - - datasets[0] - - "identifier" = "cftc-dc10" - - what has been deleted? - - datasets[0] - - "identifier" = "cftc-dc1" - - what has been updated? - - datasets[1] - - from "modified": "R/P1M" to "modified": "R/P1M Update" - - datasets[2] - - from "keyword": ["cotton on call", "cotton on-call"] - - to "keyword": ["cotton on call", "cotton on-call", "update keyword"] - - datasets[3] - - from "publisher": { - "name": "U.S. Commodity Futures Trading Commission", - "subOrganizationOf": { - "name": "U.S. Government" - } - } - - to "publisher": { - "name": "U.S. Commodity Futures Trading Commission", - "subOrganizationOf": { - "name": "Changed Value" - } - } -- ./test/harvest_sources/dcatus.json - - represents an original harvest source prior to change occuring. \ No newline at end of file +- `./test/harvest_sources/dcatus.json` + - Represents an original harvest source prior to change occuring. From c31a32ee2f1cad23c39db718001677e698fcd014 Mon Sep 17 00:00:00 2001 From: Reid Hewitt Date: Thu, 4 Jan 2024 10:46:21 -0700 Subject: [PATCH 27/27] add more load tests and fixtures --- harvester/load.py | 4 +- tests/unit/load/ckan/conftest.py | 18 +++++ tests/unit/load/ckan/test_ckan_cud.py | 65 --------------- tests/unit/load/ckan/test_ckan_transform.py | 89 +++++++++++++++++++++ 4 files changed, 110 insertions(+), 66 deletions(-) create mode 100644 tests/unit/load/ckan/test_ckan_transform.py diff --git a/harvester/load.py b/harvester/load.py index 24fb1c59..53a2c846 100644 --- a/harvester/load.py +++ b/harvester/load.py @@ -145,7 +145,9 @@ def dcatus_to_ckan(dcatus_dataset, harvest_source_name): resources = create_ckan_resources(dcatus_dataset) tags = create_ckan_tags(dcatus_dataset["keyword"]) - pubisher_hierarchy = create_ckan_publisher_hierarchy(dcatus_dataset["publisher"]) + pubisher_hierarchy = create_ckan_publisher_hierarchy( + dcatus_dataset["publisher"], [] + ) extras_base = create_ckan_extra_base( pubisher_hierarchy, "Dataset", dcatus_dataset["publisher"]["name"] diff --git a/tests/unit/load/ckan/conftest.py b/tests/unit/load/ckan/conftest.py index 47d5b453..5eab4a22 100644 --- a/tests/unit/load/ckan/conftest.py +++ b/tests/unit/load/ckan/conftest.py @@ -51,3 +51,21 @@ def test_ckan_patch_package(test_ckan_package_id): @pytest.fixture def test_ckan_purge_package(test_ckan_package_id): return {"id": test_ckan_package_id} + + +@pytest.fixture +def test_ckan_transform_catalog(): + return { + "identifier": "test identifier", + "contactPoint": {"fn": "Bob Smith", "hasEmail": "bob.smith@example.com"}, + "description": "test description", + "title": "test title", + } + + +@pytest.fixture +def test_ckan_publisher(): + return { + "name": "U.S. Test Organization of the Tests", + "subOrganizationOf": {"name": "Test Incorporated"}, + } diff --git a/tests/unit/load/ckan/test_ckan_cud.py b/tests/unit/load/ckan/test_ckan_cud.py index ec0b9d94..fb796467 100644 --- a/tests/unit/load/ckan/test_ckan_cud.py +++ b/tests/unit/load/ckan/test_ckan_cud.py @@ -1,73 +1,8 @@ from unittest.mock import patch -from deepdiff import DeepDiff - import harvester -def test_dcatus_to_ckan_transform(test_dcatus_catalog): - # ruff: noqa: E501 - expected_result = { - "name": "fdic-failed-bank-list", - "owner_org": "test", - "identifier": "https://www.fdic.gov/bank/individual/failed/", - "maintainer": "FDIC Public Data Feedback", - "maintainer_email": "FDICPublicDataFeedback@fdic.gov", - "notes": "The FDIC is often appointed as receiver for failed banks. This list includes banks which have failed since October 1, 2000.", - "title": "FDIC Failed Bank List", - "resources": [ - {"url": "https://www.fdic.gov/bank/individual/failed/banklist.csv"}, - {"url": "https://www.fdic.gov/bank/individual/failed/index.html"}, - ], - "tags": [ - {"name": "financial-institution"}, - {"name": "banks"}, - {"name": "failures"}, - {"name": "assistance-transactions"}, - ], - "extras": [ - { - "key": "publisher_hierarchy", - "value": "U.S. Government > Federal Deposit Insurance Corporation > Division of Insurance and Research", - }, - {"key": "resource-type", "value": "Dataset"}, - {"key": "publisher", "value": "Division of Insurance and Research"}, - {"key": "accessLevel", "value": "public"}, - {"key": "bureauCode", "value": "357:20"}, - { - "key": "identifier", - "value": "https://www.fdic.gov/bank/individual/failed/", - }, - {"key": "modified", "value": "R/P1W"}, - {"key": "programCode", "value": "000:000"}, - {"key": "publisher", "value": "Division of Insurance and Research"}, - { - "key": "publisher_hierarchy", - "value": "U.S. Government > Federal Deposit Insurance Corporation > Division of Insurance and Research", - }, - {"key": "resource-type", "value": "Dataset"}, - {"key": "publisher", "value": "Division of Insurance and Research"}, - { - "key": "dcat_metadata", - "value": "{'accessLevel': 'public', 'bureauCode': ['357:20'], 'contactPoint': {'fn': 'FDIC Public Data Feedback', 'hasEmail': 'mailto:FDICPublicDataFeedback@fdic.gov'}, 'description': 'The FDIC is often appointed as receiver for failed banks. This list includes banks which have failed since October 1, 2000.', 'distribution': [{'accessURL': 'https://www.fdic.gov/bank/individual/failed/index.html', 'mediaType': 'text/html'}, {'downloadURL': 'https://www.fdic.gov/bank/individual/failed/banklist.csv', 'mediaType': 'text/csv'}], 'identifier': 'https://www.fdic.gov/bank/individual/failed/', 'keyword': ['assistance transactions', 'banks', 'failures', 'financial institution'], 'modified': 'R/P1W', 'programCode': ['000:000'], 'publisher': {'name': 'Division of Insurance and Research', 'subOrganizationOf': {'name': 'Federal Deposit Insurance Corporation', 'subOrganizationOf': {'name': 'U.S. Government'}}}, 'title': 'FDIC Failed Bank List'}", - }, - {"key": "harvest_source_name", "value": "example_harvest_source_name"}, - ], - "author": None, - "author_email": None, - } - - assert ( - DeepDiff( - harvester.dcatus_to_ckan( - test_dcatus_catalog, "example_harvest_source_name" - ), - expected_result, - ) - == {} - ) - - @patch("harvester.create_ckan_package") def test_create_package(mock_create_ckan_package, ckan_entrypoint, test_ckan_package): mock_create_ckan_package.return_value = test_ckan_package.copy() diff --git a/tests/unit/load/ckan/test_ckan_transform.py b/tests/unit/load/ckan/test_ckan_transform.py new file mode 100644 index 00000000..95b2f50c --- /dev/null +++ b/tests/unit/load/ckan/test_ckan_transform.py @@ -0,0 +1,89 @@ +from harvester.load import ( + simple_transform, + create_ckan_publisher_hierarchy, + dcatus_to_ckan, +) +from deepdiff import DeepDiff + + +def test_simple_transform(test_ckan_transform_catalog): + expected_result = { + "name": "test-title", + "owner_org": "test", + "identifier": "test identifier", + "maintainer": "Bob Smith", + "maintainer_email": "bob.smith@example.com", + "notes": "test description", + "title": "test title", + } + + res = simple_transform(test_ckan_transform_catalog) + assert res == expected_result + + +def test_publisher_name(test_ckan_publisher): + res = create_ckan_publisher_hierarchy(test_ckan_publisher, []) + assert res == "Test Incorporated > U.S. Test Organization of the Tests" + + +def test_dcatus_to_ckan_transform(test_dcatus_catalog): + # ruff: noqa: E501 + expected_result = { + "name": "fdic-failed-bank-list", + "owner_org": "test", + "identifier": "https://www.fdic.gov/bank/individual/failed/", + "maintainer": "FDIC Public Data Feedback", + "maintainer_email": "FDICPublicDataFeedback@fdic.gov", + "notes": "The FDIC is often appointed as receiver for failed banks. This list includes banks which have failed since October 1, 2000.", + "title": "FDIC Failed Bank List", + "resources": [ + {"url": "https://www.fdic.gov/bank/individual/failed/banklist.csv"}, + {"url": "https://www.fdic.gov/bank/individual/failed/index.html"}, + ], + "tags": [ + {"name": "financial-institution"}, + {"name": "banks"}, + {"name": "failures"}, + {"name": "assistance-transactions"}, + ], + "extras": [ + { + "key": "publisher_hierarchy", + "value": "U.S. Government > Federal Deposit Insurance Corporation > Division of Insurance and Research", + }, + {"key": "resource-type", "value": "Dataset"}, + {"key": "publisher", "value": "Division of Insurance and Research"}, + {"key": "accessLevel", "value": "public"}, + {"key": "bureauCode", "value": "357:20"}, + { + "key": "identifier", + "value": "https://www.fdic.gov/bank/individual/failed/", + }, + {"key": "modified", "value": "R/P1W"}, + {"key": "programCode", "value": "000:000"}, + {"key": "publisher", "value": "Division of Insurance and Research"}, + { + "key": "publisher_hierarchy", + "value": "U.S. Government > Federal Deposit Insurance Corporation > Division of Insurance and Research", + }, + {"key": "resource-type", "value": "Dataset"}, + {"key": "publisher", "value": "Division of Insurance and Research"}, + { + "key": "dcat_metadata", + "value": "{'accessLevel': 'public', 'bureauCode': ['357:20'], 'contactPoint': {'fn': 'FDIC Public Data Feedback', 'hasEmail': 'mailto:FDICPublicDataFeedback@fdic.gov'}, 'description': 'The FDIC is often appointed as receiver for failed banks. This list includes banks which have failed since October 1, 2000.', 'distribution': [{'accessURL': 'https://www.fdic.gov/bank/individual/failed/index.html', 'mediaType': 'text/html'}, {'downloadURL': 'https://www.fdic.gov/bank/individual/failed/banklist.csv', 'mediaType': 'text/csv'}], 'identifier': 'https://www.fdic.gov/bank/individual/failed/', 'keyword': ['assistance transactions', 'banks', 'failures', 'financial institution'], 'modified': 'R/P1W', 'programCode': ['000:000'], 'publisher': {'name': 'Division of Insurance and Research', 'subOrganizationOf': {'name': 'Federal Deposit Insurance Corporation', 'subOrganizationOf': {'name': 'U.S. Government'}}}, 'title': 'FDIC Failed Bank List'}", + }, + {"key": "harvest_source_name", "value": "example_harvest_source_name"}, + ], + "author": None, + "author_email": None, + } + + # res = dcatus_to_ckan(test_dcatus_catalog, "example_harvest_source_name") + + assert ( + DeepDiff( + dcatus_to_ckan(test_dcatus_catalog, "example_harvest_source_name"), + expected_result, + ) + == {} + )