diff --git a/documentation/main.md b/documentation/main.md index 4dd66a4..29fc2f9 100644 --- a/documentation/main.md +++ b/documentation/main.md @@ -128,8 +128,8 @@ values of the form: {"iso3": "AFG", "pcode": "AF01", "name": "Kabul"} Method *setup_from_libhxl_dataset* takes a libhxl Dataset object, while -*setup_from_url* takes a URL which defaults to the global p-codes dataset on -HDX. +*setup_from_url* takes a URL which defaults to a resource in the global p-codes +dataset on HDX. These methods also have optional parameter *countryiso3s* which is a tuple or list of country ISO3 codes to be read or None if all countries are desired. @@ -142,6 +142,22 @@ Examples of usage: adminlevel.get_pcode("YEM", "Al Dhale"e / الضالع") # returns ("YE30", False) adminlevel.get_pcode("YEM", "Al Dhale"e / الضالع", fuzzy_match=False) # returns (None, True) +There is basic admin 1 p-code length conversion by default. A more advanced +p-code length conversion can be activated by calling *load_pcode_formats* +which takes a URL that defaults to a resource in the global p-codes dataset on +HDX: + + admintwo.load_pcode_formats() + admintwo.get_pcode("YEM", "YEM30001") # returns ("YE3001", True) + +The length conversion can be further enhanced by supplying either parent +AdminLevel objects in a list or lists of p-codes per parent admin level: + + admintwo.set_parent_admins_from_adminlevels([adminone]) + admintwo.get_pcode("NER", "NE00409") # returns ("NER004009", True) + admintwo.set_parent_admins([adminone.pcodes]) + admintwo.get_pcode("NER", "NE00409") # returns ("NER004009", True) + ## Currencies Various functions support the conversion of monetary amounts to USD. Note that the diff --git a/src/hdx/location/adminlevel.py b/src/hdx/location/adminlevel.py index 888baaf..147622e 100755 --- a/src/hdx/location/adminlevel.py +++ b/src/hdx/location/adminlevel.py @@ -65,6 +65,7 @@ def __init__( self.pcode_to_iso3 = {} self.pcode_formats = {} self.zeroes = {} + self.parent_admins = [] self.init_matches_errors() self.phonetics = Phonetics() @@ -217,6 +218,32 @@ def load_pcode_formats(self, formats_url: str = _formats_url) -> None: for x in re.finditer("0", pcode): dict_of_sets_add(self.zeroes, countryiso3, x.start()) + def set_parent_admins(self, parent_admins: List[List]) -> None: + """ + Set parent admins + + Args: + parent_admins (List[List]): List of P-codes per parent admin + + Returns: + None + """ + self.parent_admins = parent_admins + + def set_parent_admins_from_adminlevels( + self, adminlevels: List["AdminLevel"] + ) -> None: + """ + Set parent admins from AdminLevel objects + + Args: + parent_admins (List[AdminLevel]): List of parent AdminLevel objects + + Returns: + None + """ + self.parent_admins = [adminlevel.pcodes for adminlevel in adminlevels] + def get_pcode_list(self) -> List[str]: """Get list of all pcodes @@ -325,11 +352,39 @@ def convert_admin_pcode_length( if len_new_pcode < total_length: if pos in self.zeroes[countryiso3]: pcode_part = f"0{pcode_part}" - admin_changes.append(str(admin_no)) + if self.parent_admins and admin_no < self.admin_level: + parent_pcode = [ + pcode_parts[i] for i in range(admin_no) + ] + parent_pcode.append(pcode_part[:admin_length]) + parent_pcode = "".join(parent_pcode) + if ( + parent_pcode + not in self.parent_admins[admin_no - 1] + ): + pcode_part = pcode_part[1:] + else: + admin_changes.append(str(admin_no)) + else: + admin_changes.append(str(admin_no)) elif len_new_pcode > total_length: if admin_length == 2 and pcode_parts[admin_no][0] == "0": pcode_part = pcode_part[1:] - admin_changes.append(str(admin_no)) + if self.parent_admins and admin_no < self.admin_level: + parent_pcode = [ + pcode_parts[i] for i in range(admin_no) + ] + parent_pcode.append(pcode_part[:admin_length]) + parent_pcode = "".join(parent_pcode) + if ( + parent_pcode + not in self.parent_admins[admin_no - 1] + ): + pcode_part = f"0{pcode_part}" + else: + admin_changes.append(str(admin_no)) + else: + admin_changes.append(str(admin_no)) pcode_parts[admin_no] = pcode_part[:admin_length] pcode_parts.append(pcode_part[admin_length:]) new_pcode = "".join(pcode_parts) diff --git a/tests/hdx/location/test_adminlevel.py b/tests/hdx/location/test_adminlevel.py index 71059a9..73c59cc 100755 --- a/tests/hdx/location/test_adminlevel.py +++ b/tests/hdx/location/test_adminlevel.py @@ -385,11 +385,24 @@ def test_adminlevel_pcode_formats(self, config, url, formats_url): True, ) # Algorithm inserts 0 to make NER000409 and hence fails (it has no - # knowledge that NER000 is an invalid admin 1 and could consider - # adding this knowledge if it proves necessary) + # knowledge that NER000 is an invalid admin 1) assert admintwo.get_pcode( "NER", "NE00409", logname="test", fuzzy_match=False ) == ( None, True, ) + admintwo.set_parent_admins_from_adminlevels([adminone]) + # The lookup in admin1 reveals that adding a 0 prefix to the admin1 + # is not a valid admin1 (NER000) so the algorithm tries adding + # the 0 prefix at the admin2 level instead and hence succeeds + assert admintwo.get_pcode("NER", "NE00409", logname="test") == ( + "NER004009", + True, + ) + + admintwo.set_parent_admins([adminone.pcodes]) + assert admintwo.get_pcode("NER", "NE00409", logname="test") == ( + "NER004009", + True, + )