Skip to content

Commit

Permalink
add raw source fixture and with/without sort test.
Browse files Browse the repository at this point in the history
  • Loading branch information
rshewitt committed Jan 3, 2024
1 parent f2dcc75 commit 09de3ed
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 0 deletions.
28 changes: 28 additions & 0 deletions tests/unit/compare/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,31 @@ def data_sources():
) # the response is stored sorted

return harvest_source, ckan_source


@pytest.fixture
def data_sources_raw():
harvest_source_datasets = open_json(
HARVEST_SOURCES / "dcatus" / "dcatus_compare.json"
)["dataset"]

harvest_source = {d["identifier"]: d for d in harvest_source_datasets}

ckan_source_datasets = open_json(
HARVEST_SOURCES / "dcatus" / "ckan_datasets_resp.json"
)["result"]["results"]

ckan_source = {}

for d in ckan_source_datasets:
orig_meta = None
orig_id = None
for e in d["extras"]:
if e["key"] == "dcat_metadata":
orig_meta = eval(e["value"], {"__builtins__": {}})
if e["key"] == "identifier":
orig_id = e["value"]

ckan_source[orig_id] = orig_meta

return harvest_source, ckan_source
30 changes: 30 additions & 0 deletions tests/unit/compare/test_compare.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from harvester.compare import compare
from harvester.utils.util import sort_dataset, dataset_to_hash


def test_artificial_compare(artificial_data_sources):
Expand All @@ -17,3 +18,32 @@ def test_compare(data_sources):
assert len(compare_res["create"]) == 1
assert len(compare_res["update"]) == 3
assert len(compare_res["delete"]) == 1


def test_sort(data_sources_raw):
harvest_source, ckan_source = data_sources_raw

harvest_source_no_sort = harvest_source.copy()
for k, v in harvest_source_no_sort.items():
harvest_source_no_sort[k] = dataset_to_hash(v)

for k, v in ckan_source.items():
ckan_source[k] = dataset_to_hash(v)

compare_res_no_sort = compare(harvest_source_no_sort, ckan_source)

# more datasets need to be updated simply because we didn't sort them
assert len(compare_res_no_sort["create"]) == 1
assert len(compare_res_no_sort["update"]) == 6
assert len(compare_res_no_sort["delete"]) == 1

harvest_source_with_sort = harvest_source.copy()
for k, v in harvest_source_with_sort.items():
harvest_source_with_sort[k] = dataset_to_hash(sort_dataset(v))

compare_res = compare(harvest_source_with_sort, ckan_source)

# applying the sort lowers us back down to what we expect.
assert len(compare_res["create"]) == 1
assert len(compare_res["update"]) == 3
assert len(compare_res["delete"]) == 1

1 comment on commit 09de3ed

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
harvester
   __init__.py120100% 
   compare.py120100% 
   extract.py4877 85%
   load.py1001010 90%
   transform.py1377 46%
harvester/utils
   __init__.py30100% 
   json.py40100% 
   util.py70100% 
harvester/validate
   __init__.py20100% 
   dcat_us.py2433 88%
TOTAL2252788% 

Tests Skipped Failures Errors Time
26 0 💤 0 ❌ 0 🔥 45.147s ⏱️

Please sign in to comment.