From cfb7b80d1429e1d90823b7548e96c54ae62140e7 Mon Sep 17 00:00:00 2001 From: Mike Date: Tue, 23 Jul 2024 18:34:53 +1200 Subject: [PATCH] HDXDSYS-898 Refactor org code (#134) * Add uncleaned names as keys to lookups Remove intersectoral check dict() to {} Move branches that have continue higher up in loops in operational presence * Some rearrangement of operational presence and comments to help me understand the org processing * Small reorg of org type if code * store normalised keys as well as non normalised * Use already normalised key * Simplify org lookup code and operational presence * Add to lookup to reduce need to keep normalising * Pass around normalise variables * Remove org lookup only used in test * Remove unnecessary variable * Update CHANGELOG * Can just return value here * Combine ifs * Use named tuples for clarity * Rename value to org_info * Use org_data in populate_multiple * Use OrgData in tests * Make OrgInfo into a data class Add used and complete bools to OrgInfo Correct OrgInfo objects in org_map with corrections from looking up in data member variable * Make separate function * Add debug option to command line Add org_map debug --- CHANGELOG.md | 7 + pyproject.toml | 2 +- requirements.txt | 10 +- src/hapi/pipelines/app/__main__.py | 12 + src/hapi/pipelines/app/pipelines.py | 5 +- src/hapi/pipelines/database/conflict_event.py | 2 +- .../database/operational_presence.py | 181 ++++++-------- src/hapi/pipelines/database/org.py | 196 ++++++++++----- src/hapi/pipelines/database/org_type.py | 5 +- src/hapi/pipelines/database/sector.py | 9 +- src/hapi/pipelines/utilities/mappings.py | 7 +- tests/test_main.py | 228 +++++++++++++++++- 12 files changed, 476 insertions(+), 188 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b1e69e1..bfc5eb82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [0.9.36] - 2024-07-19 + +### Changed + +- Refactor org code +- Also add uncleaned names as keys to lookups + ## [0.9.35] - 2024-07-18 ### Fixed diff --git a/pyproject.toml b/pyproject.toml index ab13da26..40240592 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ requires-python = ">=3.8" dependencies = [ "hapi-schema>=0.8.14", "hdx-python-api>= 6.3.1", - "hdx-python-country>= 3.7.6", + "hdx-python-country>= 3.7.7", "hdx-python-database[postgresql]>= 1.3.1", "hdx-python-scraper>= 2.4.0", "hdx-python-utilities>= 3.7.2", diff --git a/requirements.txt b/requirements.txt index 4dd27bba..09904c26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ click==8.1.7 # via typer coverage==7.6.0 # via pytest-cov -cryptography==42.0.8 +cryptography==43.0.0 # via pyopenssl defopt==6.4.0 # via hdx-python-api @@ -64,7 +64,7 @@ hdx-python-api==6.3.1 # via # hapi-pipelines (pyproject.toml) # hdx-python-scraper -hdx-python-country==3.7.6 +hdx-python-country==3.7.7 # via # hapi-pipelines (pyproject.toml) # hdx-python-api @@ -172,13 +172,13 @@ pydantic-core==2.20.1 # via pydantic pygments==2.18.0 # via rich -pyopenssl==24.1.0 +pyopenssl==24.2.1 # via # hdx-python-api # ndg-httpsclient pyphonetics==0.5.3 # via hdx-python-country -pytest==8.2.2 +pytest==8.3.1 # via # hapi-pipelines (pyproject.toml) # pytest-check @@ -242,7 +242,7 @@ ruamel-yaml==0.18.6 # via hdx-python-utilities ruamel-yaml-clib==0.2.8 # via ruamel-yaml -setuptools==70.3.0 +setuptools==71.1.0 # via ckanapi shellingham==1.5.4 # via typer diff --git a/src/hapi/pipelines/app/__main__.py b/src/hapi/pipelines/app/__main__.py index e04cc5d7..87c0b7b5 100755 --- a/src/hapi/pipelines/app/__main__.py +++ b/src/hapi/pipelines/app/__main__.py @@ -84,6 +84,13 @@ def parse_args(): action="store_true", help="Use saved data", ) + parser.add_argument( + "-dbg", + "--debug", + default=False, + action="store_true", + help="Debug", + ) return parser.parse_args() @@ -95,6 +102,7 @@ def main( basic_auths: Optional[Dict[str, str]] = None, save: bool = False, use_saved: bool = False, + debug: bool = False, **ignore, ) -> None: """Run HAPI. Either a database connection string (db_uri) or database @@ -110,6 +118,7 @@ def main( basic_auths (Optional[Dict[str, str]]): Basic authorisations save (bool): Whether to save state for testing. Defaults to False. use_saved (bool): Whether to use saved state for testing. Defaults to False. + debug (bool): Whether to output debug info. Defaults to False. Returns: None @@ -156,6 +165,8 @@ def main( ) pipelines.run() pipelines.output() + if debug: + pipelines.debug("debug") logger.info("HAPI pipelines completed!") @@ -233,4 +244,5 @@ def main( basic_auths=basic_auths, save=args.save, use_saved=args.use_saved, + debug=args.debug, ) diff --git a/src/hapi/pipelines/app/pipelines.py b/src/hapi/pipelines/app/pipelines.py index b0499ce2..10576ad8 100644 --- a/src/hapi/pipelines/app/pipelines.py +++ b/src/hapi/pipelines/app/pipelines.py @@ -96,7 +96,7 @@ def __init__( errors_on_exit=errors_on_exit, scrapers_to_run=scrapers_to_run, ) - self.configurable_scrapers = dict() + self.configurable_scrapers = {} self.create_configurable_scrapers() self.metadata = Metadata( runner=self.runner, session=session, today=today @@ -333,3 +333,6 @@ def output(self): self.wfp_commodity.populate() self.wfp_market.populate() self.food_price.populate() + + def debug(self, folder: str) -> None: + self.org.output_org_map(folder) diff --git a/src/hapi/pipelines/database/conflict_event.py b/src/hapi/pipelines/database/conflict_event.py index 351561a3..e0b8919f 100644 --- a/src/hapi/pipelines/database/conflict_event.py +++ b/src/hapi/pipelines/database/conflict_event.py @@ -103,7 +103,7 @@ def populate(self): batch_populate(conflict_event_rows, self._session, DBConflictEvent) for dataset, msg in self._config.get( - "conflict_event_error_messages", dict() + "conflict_event_error_messages", {} ).items(): add_message(errors, dataset, msg) for error in sorted(errors): diff --git a/src/hapi/pipelines/database/operational_presence.py b/src/hapi/pipelines/database/operational_presence.py index 1314d0ad..2b90c648 100644 --- a/src/hapi/pipelines/database/operational_presence.py +++ b/src/hapi/pipelines/database/operational_presence.py @@ -1,12 +1,10 @@ """Functions specific to the operational presence theme.""" from logging import getLogger -from os.path import join -from typing import Dict +from typing import Dict, Optional, Set from hapi_schema.db_operational_presence import DBOperationalPresence from hdx.location.adminlevel import AdminLevel -from hdx.utilities.dictandlist import write_list_to_csv from hdx.utilities.text import normalise from sqlalchemy.orm import Session @@ -15,7 +13,7 @@ from . import admins from .base_uploader import BaseUploader from .metadata import Metadata -from .org import Org +from .org import Org, OrgInfo from .org_type import OrgType from .sector import Sector @@ -47,11 +45,39 @@ def __init__( self._results = results self._config = config - def populate(self, debug=False): + def complete_org_info( + self, + org_info: OrgInfo, + org_acronym: Optional[str], + org_type_name: Optional[str], + errors: Set[str], + dataset_name: str, + ): + if org_info.acronym is None and org_acronym is not None: + if len(org_acronym) > 32: + org_acronym = org_acronym[:32] + org_info.acronym = org_acronym + org_info.normalised_acronym = normalise(org_acronym) + + # * Org type processing + if org_info.type_code is None and org_type_name is not None: + org_type_code = self._org_type.get_org_type_code(org_type_name) + if org_type_code: + org_info.type_code = org_type_code + else: + add_missing_value_message( + errors, + dataset_name, + "org type", + org_type_name, + ) + + # * Org matching + self._org.add_or_match_org(org_info) + + def populate(self): logger.info("Populating operational presence table") operational_presence_rows = [] - if debug: - debug_rows = [] errors = set() for dataset in self._results.values(): dataset_name = dataset["hdx_stub"] @@ -59,8 +85,6 @@ def populate(self, debug=False): time_period_end = dataset["time_period"]["end"] number_duplicates = 0 for admin_level, admin_results in dataset["results"].items(): - resource_id = admin_results["hapi_resource_metadata"]["hdx_id"] - hxl_tags = admin_results["headers"][1] values = admin_results["values"] # Add this check to see if there is no data, otherwise get a confusing # sqlalchemy error @@ -70,6 +94,17 @@ def populate(self, debug=False): f" {dataset_name} has no data, " f"please check configuration" ) + hxl_tags = admin_results["headers"][1] + # If config is missing sector, add to error messages + try: + sector_index = hxl_tags.index("#sector") + except ValueError: + add_message( + errors, + dataset_name, + "missing sector in config, dataset skipped", + ) + continue # Config must contain an org name org_name_index = hxl_tags.index("#org+name") # If config is missing org acronym, use the org name @@ -82,118 +117,68 @@ def populate(self, debug=False): org_type_name_index = hxl_tags.index("#org+type+name") except ValueError: org_type_name_index = None - # If config is missing sector, add to error messages - try: - sector_index = hxl_tags.index("#sector") - except ValueError: - add_message( - errors, - dataset_name, - "missing sector in config, dataset skipped", - ) - continue + resource_id = admin_results["hapi_resource_metadata"]["hdx_id"] for admin_code, org_names in values[org_name_index].items(): - for i, org_name_orig in enumerate(org_names): - admin2_code = admins.get_admin2_code_based_on_level( - admin_code=admin_code, admin_level=admin_level - ) - org_acronym_orig = values[org_acronym_index][ - admin_code - ][i] - if not org_name_orig: - org_name_orig = org_acronym_orig + for i, org_str in enumerate(org_names): + # * Sector processing sector_orig = values[sector_index][admin_code][i] # Skip rows that are missing a sector if not sector_orig: add_message( errors, dataset_name, - f"org {org_name_orig} missing sector", + f"org {org_str} missing sector", ) continue - org_type_orig = None - if org_type_name_index: - org_type_orig = values[org_type_name_index][ - admin_code - ][i] - country_code = admin_code + sector_code = self._sector.get_sector_code(sector_orig) + if not sector_code: + add_missing_value_message( + errors, dataset_name, "sector", sector_orig + ) + continue + + # * Admin processing if admin_level == "admintwo": country_code = self._admintwo.pcode_to_iso3.get( admin_code ) - if admin_level == "adminone": + elif admin_level == "adminone": country_code = self._adminone.pcode_to_iso3.get( admin_code ) - org_info = self._org.get_org_info( - org_name_orig, location=country_code + else: + country_code = admin_code + admin2_code = admins.get_admin2_code_based_on_level( + admin_code=admin_code, admin_level=admin_level ) - org_name = org_info.get("#org+name") - self._org.add_org_to_lookup(org_name_orig, org_name) - org_acronym = org_info.get( - "#org+acronym", - values[org_acronym_index][admin_code][i], + admin2_ref = self._admins.admin2_data[admin2_code] + + # * Org processing + if not org_str: + org_str = values[org_acronym_index][admin_code][i] + org_info = self._org.get_org_info( + org_str, location=country_code ) - if org_acronym is not None and len(org_acronym) > 32: - org_acronym = org_acronym[:32] - org_type_code = org_info.get("#org+type+code") - org_type_name = None - if not org_type_code: + if not org_info.complete: if org_type_name_index: org_type_name = values[org_type_name_index][ admin_code ][i] - if org_type_name: - org_type_code = ( - self._org_type.get_org_type_code( - org_type_name - ) - ) - if org_type_name and not org_type_code: - add_missing_value_message( - errors, dataset_name, "org type", org_type_name - ) - self._org.add_or_match_org( - acronym=org_acronym, - org_name=org_name, - org_type=org_type_code, - ) - org_acronym, org_name, org_type = self._org.data[ - ( - normalise(org_acronym), - normalise(org_name), - ) - ] - sector_code = self._sector.get_sector_code(sector_orig) - if debug: - debug_row = { - "location": country_code, - "org_name_orig": org_name_orig, - "org_acronym_orig": org_acronym_orig, - "org_type_orig": org_type_orig, - "sector_orig": sector_orig, - "org_name": org_name, - "org_acronym": org_acronym, - "org_type": org_type_code, - "sector": sector_code, - } - if debug_row in debug_rows: - continue - debug_rows.append(debug_row) - continue - - if not sector_code: - add_missing_value_message( - errors, dataset_name, "sector", sector_orig + else: + org_type_name = None + self.complete_org_info( + org_info, + values[org_acronym_index][admin_code][i], + org_type_name, + errors, + dataset_name, ) - continue - admin2_ref = self._admins.admin2_data[admin2_code] operational_presence_row = dict( resource_hdx_id=resource_id, admin2_ref=admin2_ref, - org_acronym=org_acronym, - org_name=org_name, + org_acronym=org_info.acronym, + org_name=org_info.canonical_name, sector_code=sector_code, reference_period_start=time_period_start, reference_period_end=time_period_end, @@ -213,12 +198,6 @@ def populate(self, debug=False): dataset_name, f"{number_duplicates} duplicate rows found", ) - if debug: - write_list_to_csv( - join("saved_data", "debug_operational_presence.csv"), - debug_rows, - ) - return logger.info("Writing to org table") self._org.populate_multiple() @@ -228,7 +207,7 @@ def populate(self, debug=False): ) for dataset, msg in self._config.get( - "operational_presence_error_messages", dict() + "operational_presence_error_messages", {} ).items(): add_message(errors, dataset, msg) for error in sorted(errors): diff --git a/src/hapi/pipelines/database/org.py b/src/hapi/pipelines/database/org.py index 91bd7697..fab74d6c 100644 --- a/src/hapi/pipelines/database/org.py +++ b/src/hapi/pipelines/database/org.py @@ -1,11 +1,13 @@ """Populate the org table.""" import logging -from typing import Dict +from dataclasses import dataclass +from os.path import join +from typing import Dict, NamedTuple from hapi_schema.db_org import DBOrg from hdx.scraper.utilities.reader import Read -from hdx.utilities.dictandlist import dict_of_sets_add +from hdx.utilities.dictandlist import write_list_to_csv from hdx.utilities.text import normalise from sqlalchemy.orm import Session @@ -17,6 +19,23 @@ _BATCH_SIZE = 1000 +@dataclass +class OrgInfo: + canonical_name: str + normalised_name: str + acronym: str | None + normalised_acronym: str | None + type_code: str | None + used: bool = False + complete: bool = False + + +class OrgData(NamedTuple): + acronym: str + name: str + type_code: str + + class Org(BaseUploader): def __init__( self, @@ -27,7 +46,6 @@ def __init__( self._datasetinfo = datasetinfo self.data = {} self._org_map = {} - self._org_lookup = {} def populate(self): logger.info("Populating org mapping") @@ -39,74 +57,128 @@ def populate(self): format="csv", file_prefix="org", ) - for row in iterator: - org_name = row.get("#x_pattern") - canonical_org_name = row.get("#org+name") - if not canonical_org_name: + + for i, row in enumerate(iterator): + canonical_name = row["#org+name"] + if not canonical_name: + logger.error(f"Canonical name is empty in row {i}!") continue - self._org_map[org_name] = row - self._org_map[canonical_org_name] = row - org_acronym = row.get("#org+acronym") - if org_acronym: - self._org_map[org_acronym] = row + normalised_name = normalise(canonical_name) + country_code = row["#country+code"] + acronym = row["#org+acronym"] + if acronym: + normalised_acronym = normalise(acronym) + else: + normalised_acronym = None + org_name = row["#x_pattern"] + type_code = row["#org+type+code"] + org_info = OrgInfo( + canonical_name, + normalised_name, + acronym, + normalised_acronym, + type_code, + ) + self._org_map[(country_code, canonical_name)] = org_info + self._org_map[(country_code, normalised_name)] = org_info + self._org_map[(country_code, acronym)] = org_info + self._org_map[(country_code, normalised_acronym)] = org_info + self._org_map[(country_code, org_name)] = org_info + self._org_map[(country_code, normalise(org_name))] = org_info - def add_or_match_org( - self, - acronym, - org_name, - org_type, - ): - key = ( - normalise(acronym), - normalise(org_name), + def get_org_info(self, org_str: str, location: str) -> OrgInfo: + key = (location, org_str) + org_info = self._org_map.get(key) + if org_info: + return org_info + normalised_str = normalise(org_str) + org_info = self._org_map.get((location, normalised_str)) + if org_info: + self._org_map[key] = org_info + return org_info + org_info = self._org_map.get((None, org_str)) + if org_info: + self._org_map[key] = org_info + return org_info + org_info = self._org_map.get((None, normalised_str)) + if org_info: + self._org_map[key] = org_info + return org_info + org_info = OrgInfo( + canonical_name=org_str, + normalised_name=normalised_str, + acronym=None, + normalised_acronym=None, + type_code=None, ) - if key in self.data: - org_type_old = self.data[key][2] - if org_type and not org_type_old: - self.data[key][2] = org_type - # TODO: should we flag orgs if we find more than one org type? - return - self.data[ - ( - normalise(acronym), - normalise(org_name), + self._org_map[key] = org_info + return org_info + + def add_or_match_org(self, org_info: OrgInfo) -> OrgData: + key = (org_info.normalised_acronym, org_info.normalised_name) + org_data = self.data.get(key) + if org_data: + if not org_data.type_code and org_info.type_code: + org_data = OrgData( + org_data.acronym, org_data.name, org_info.type_code + ) + self.data[key] = org_data + # TODO: should we flag orgs if we find more than one org type? + else: + org_info.type_code = org_data.type_code + # Since we're looking up by normalised acronym and normalised name, + # these don't need copying here + org_info.acronym = org_data.acronym + org_info.canonical_name = org_data.name + + else: + org_data = OrgData( + org_info.acronym, org_info.canonical_name, org_info.type_code ) - ] = [acronym, org_name, org_type] + self.data[key] = org_data + if org_info.acronym and org_info.type_code: + org_info.complete = True + org_info.used = True + return org_data def populate_multiple(self): org_rows = [ dict( - acronym=values[0], - name=values[1], - org_type_code=values[2], + acronym=org_data.acronym, + name=org_data.name, + org_type_code=org_data.type_code, ) - for values in self.data.values() + for org_data in self.data.values() ] batch_populate(org_rows, self._session, DBOrg) - def get_org_info(self, org_name: str, location: str) -> Dict[str, str]: - org_name_map = { - on: self._org_map[on] - for on in self._org_map - if self._org_map[on]["#country+code"] in [location, None] - } - org_map_info = org_name_map.get(org_name) - if not org_map_info: - org_name_map_clean = { - normalise(on): org_name_map[on] for on in org_name_map - } - org_name_clean = normalise(org_name) - org_map_info = org_name_map_clean.get(org_name_clean) - if not org_map_info: - return {"#org+name": org_name} - org_info = {"#org+name": org_map_info["#org+name"]} - if not org_info["#org+name"]: - org_info["#org+name"] = org_map_info["#x_pattern"] - if org_map_info["#org+acronym"]: - org_info["#org+acronym"] = org_map_info["#org+acronym"] - if org_map_info["#org+type+code"]: - org_info["#org+type+code"] = org_map_info["#org+type+code"] - return org_info - - def add_org_to_lookup(self, org_name_orig, org_name_official): - dict_of_sets_add(self._org_lookup, org_name_official, org_name_orig) + def output_org_map(self, folder: str) -> None: + rows = [ + ( + "Country Code", + "Lookup", + "Canonical Name", + "Normalised Name", + "Acronym", + "Normalised Acronym", + "Type Code", + "Used", + "Complete", + ) + ] + for key, org_info in self._org_map.items(): + country_code, lookup = key + rows.append( + ( + country_code, + lookup, + org_info.canonical_name, + org_info.normalised_name, + org_info.acronym, + org_info.normalised_acronym, + org_info.type_code, + "Y" if org_info.used else "N", + "Y" if org_info.complete else "N", + ) + ) + write_list_to_csv(join(folder, "org_map.csv"), rows) diff --git a/src/hapi/pipelines/database/org_type.py b/src/hapi/pipelines/database/org_type.py index 4d5a3609..cfa35a48 100644 --- a/src/hapi/pipelines/database/org_type.py +++ b/src/hapi/pipelines/database/org_type.py @@ -29,6 +29,9 @@ def populate(self): logger.info("Populating org type table") def parse_org_type_values(code: str, description: str) -> None: + self.data[code] = code + self.data[description] = code + self.data[normalise(code)] = code self.data[normalise(description)] = code org_type_row = DBOrgType( code=code, @@ -60,7 +63,7 @@ def parse_org_type_values(code: str, description: str) -> None: self._session.commit() - def get_org_type_code(self, org_type: str) -> str: + def get_org_type_code(self, org_type: str) -> str | None: return get_code_from_name( name=org_type, code_lookup=self.data, diff --git a/src/hapi/pipelines/database/sector.py b/src/hapi/pipelines/database/sector.py index 8113a2c5..8f85580d 100644 --- a/src/hapi/pipelines/database/sector.py +++ b/src/hapi/pipelines/database/sector.py @@ -29,9 +29,10 @@ def populate(self): logger.info("Populating sector table") def parse_sector_values(code: str, name: str): - if code != "intersectoral": - self.data[normalise(name)] = code - self.data[normalise(code)] = code + self.data[name] = code + self.data[code] = code + self.data[normalise(name)] = code + self.data[normalise(code)] = code sector_row = DBSector( code=code, name=name, @@ -59,7 +60,7 @@ def parse_sector_values(code: str, name: str): self._session.commit() - def get_sector_code(self, sector: str) -> str: + def get_sector_code(self, sector: str) -> str | None: return get_code_from_name( name=sector, code_lookup=self.data, diff --git a/src/hapi/pipelines/utilities/mappings.py b/src/hapi/pipelines/utilities/mappings.py index 6739914d..fc11e609 100644 --- a/src/hapi/pipelines/utilities/mappings.py +++ b/src/hapi/pipelines/utilities/mappings.py @@ -20,7 +20,7 @@ def get_code_from_name( fuzzy_match (bool): Allow fuzzy matching or not Returns: - str or None: matching code + str or None: Matching code """ code = code_lookup.get(name) if code: @@ -28,6 +28,7 @@ def get_code_from_name( name_clean = normalise(name) code = code_lookup.get(name_clean) if code: + code_lookup[name] = code return code if len(name) <= MATCH_THRESHOLD: return None @@ -41,8 +42,8 @@ def get_code_from_name( ) if name_index is None: return None - name = names[name_index] - code = code_lookup.get(name) + code = code_lookup.get(names[name_index]) if code: + code_lookup[name] = code code_lookup[name_clean] = code return code diff --git a/tests/test_main.py b/tests/test_main.py index 8e31681d..8ff9e7f2 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -37,6 +37,7 @@ from hapi.pipelines.app import load_yamls from hapi.pipelines.app.__main__ import add_defaults from hapi.pipelines.app.pipelines import Pipelines +from hapi.pipelines.database.org import OrgInfo logger = logging.getLogger(__name__) @@ -122,6 +123,8 @@ def test_pipelines(self, configuration, folder): pipelines.run() logger.info("Writing to database") pipelines.output() + logger.info("Writing debug output") + pipelines.debug(temp_folder) count = session.scalar( select(func.count(DBResource.hdx_id)) @@ -195,16 +198,223 @@ def test_pipelines(self, configuration, folder): select(func.count(DBFoodPrice.resource_hdx_id)) ) check.equal(count, 31615) - org_mapping = pipelines.org._org_lookup - assert org_mapping[ - "International Organization for Migration" - ] == { - "Organisation Internationale pour les Migrations", + org_map = pipelines.org._org_map + iom = OrgInfo( "International Organization for Migration", + "international organization for migration", "IOM", - } - assert org_mapping["United Nations Children's Fund"] == { - "Fonds des Nations Unies pour l'Enfance", + "iom", + "447", + True, + True, + ) + assert org_map[(None, "IOM")] == iom + assert org_map[(None, "iom")] == iom + assert ( + org_map[ + ( + None, + "Organisation Internationale pour les Migrations", + ) + ] + == iom + ) + assert ( + org_map[ + ( + None, + "organisation internationale pour les migrations", + ) + ] + == iom + ) + + iom = OrgInfo( + "International Organization for Migration", + "international organization for migration", + "IOM", + "iom", + "447", + False, + False, + ) + assert ( + org_map[ + (None, "International Organisation for Migrations") + ] + == iom + ) + assert ( + org_map[ + (None, "international organisation for migrations") + ] + == iom + ) + assert ( + org_map[ + (None, "INTERNATIONALE ORGANISATION FOR MIGRATION") + ] + == iom + ) + assert ( + org_map[ + (None, "internationale organisation for migration") + ] + == iom + ) + assert ( + org_map[ + ( + None, + "Organisation Internationale des Migrations", + ) + ] + == iom + ) + assert ( + org_map[ + ( + None, + "organisation internationale des migrations", + ) + ] + == iom + ) + assert ( + org_map[ + ( + None, + "OIM - International Organization for Migration", + ) + ] + == iom + ) + assert ( + org_map[ + ( + None, + "oim international organization for migration", + ) + ] + == iom + ) + + unicef = OrgInfo( "United Nations Children's Fund", + "united nations childrens fund", "UNICEF", - } + "unicef", + "447", + True, + True, + ) + assert ( + org_map[(None, "United Nations Children's Fund")] + == unicef + ) + assert ( + org_map[(None, "united nations childrens fund")] + == unicef + ) + assert org_map[(None, "UNICEF")] == unicef + assert org_map[(None, "unicef")] == unicef + assert ( + org_map[ + (None, "Fonds des Nations Unies pour l'Enfance") + ] + == unicef + ) + assert ( + org_map[ + (None, "fonds des nations unies pour lenfance") + ] + == unicef + ) + assert ( + org_map[ + ( + None, + "UNICEF - Fondo de las Naciones Unidas para la Infancia", + ) + ] + == unicef + ) + assert ( + org_map[ + ( + None, + "unicef fondo de las naciones unidas para la infancia", + ) + ] + == unicef + ) + + unicef = OrgInfo( + "United Nations Children's Fund", + "united nations childrens fund", + "UNICEF", + "unicef", + "447", + False, + False, + ) + assert ( + org_map[ + (None, "United Nations Children's Emergency Fund") + ] + == unicef + ) + assert ( + org_map[ + (None, "united nations childrens emergency fund") + ] + == unicef + ) + assert ( + org_map[ + (None, "Fond des Nations Unies pour l'Enfance") + ] + == unicef + ) + assert ( + org_map[(None, "fond des nations unies pour lenfance")] + == unicef + ) + assert ( + org_map[ + ( + None, + "United Nations International Childrens Emergency Fund", + ) + ] + == unicef + ) + assert ( + org_map[ + ( + None, + "united nations international childrens emergency fund", + ) + ] + == unicef + ) + + assert org_map[("AFG", "WEWORLD")] == OrgInfo( + "WEWORLD", + "weworld", + "WEWORLD", + "weworld", + None, + True, + False, + ) + + assert org_map[("NGA", "HECADF")] == OrgInfo( + "HECADF", + "hecadf", + "HECADF", + "hecadf", + "441", + True, + True, + )