diff --git a/tests/unit/compare/conftest.py b/tests/unit/compare/conftest.py index a104793a..205dcc34 100644 --- a/tests/unit/compare/conftest.py +++ b/tests/unit/compare/conftest.py @@ -56,3 +56,31 @@ def data_sources(): ) # the response is stored sorted return harvest_source, ckan_source + + +@pytest.fixture +def data_sources_raw(): + harvest_source_datasets = open_json( + HARVEST_SOURCES / "dcatus" / "dcatus_compare.json" + )["dataset"] + + harvest_source = {d["identifier"]: d for d in harvest_source_datasets} + + ckan_source_datasets = open_json( + HARVEST_SOURCES / "dcatus" / "ckan_datasets_resp.json" + )["result"]["results"] + + ckan_source = {} + + for d in ckan_source_datasets: + orig_meta = None + orig_id = None + for e in d["extras"]: + if e["key"] == "dcat_metadata": + orig_meta = eval(e["value"], {"__builtins__": {}}) + if e["key"] == "identifier": + orig_id = e["value"] + + ckan_source[orig_id] = orig_meta + + return harvest_source, ckan_source diff --git a/tests/unit/compare/test_compare.py b/tests/unit/compare/test_compare.py index d07bc289..e1122294 100644 --- a/tests/unit/compare/test_compare.py +++ b/tests/unit/compare/test_compare.py @@ -1,4 +1,5 @@ from harvester.compare import compare +from harvester.utils.util import sort_dataset, dataset_to_hash def test_artificial_compare(artificial_data_sources): @@ -17,3 +18,32 @@ def test_compare(data_sources): assert len(compare_res["create"]) == 1 assert len(compare_res["update"]) == 3 assert len(compare_res["delete"]) == 1 + + +def test_sort(data_sources_raw): + harvest_source, ckan_source = data_sources_raw + + harvest_source_no_sort = harvest_source.copy() + for k, v in harvest_source_no_sort.items(): + harvest_source_no_sort[k] = dataset_to_hash(v) + + for k, v in ckan_source.items(): + ckan_source[k] = dataset_to_hash(v) + + compare_res_no_sort = compare(harvest_source_no_sort, ckan_source) + + # more datasets need to be updated simply because we didn't sort them + assert len(compare_res_no_sort["create"]) == 1 + assert len(compare_res_no_sort["update"]) == 6 + assert len(compare_res_no_sort["delete"]) == 1 + + harvest_source_with_sort = harvest_source.copy() + for k, v in harvest_source_with_sort.items(): + harvest_source_with_sort[k] = dataset_to_hash(sort_dataset(v)) + + compare_res = compare(harvest_source_with_sort, ckan_source) + + # applying the sort lowers us back down to what we expect. + assert len(compare_res["create"]) == 1 + assert len(compare_res["update"]) == 3 + assert len(compare_res["delete"]) == 1