Skip to content

Commit

Permalink
Add facility to compare against parent admin p-codes when adding or r…
Browse files Browse the repository at this point in the history
…emoving 0s
  • Loading branch information
Mike committed Oct 31, 2023
1 parent 8eac94e commit 47edf38
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 6 deletions.
20 changes: 18 additions & 2 deletions documentation/main.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ values of the form:
{"iso3": "AFG", "pcode": "AF01", "name": "Kabul"}

Method *setup_from_libhxl_dataset* takes a libhxl Dataset object, while
*setup_from_url* takes a URL which defaults to the global p-codes dataset on
HDX.
*setup_from_url* takes a URL which defaults to a resource in the global p-codes
dataset on HDX.

These methods also have optional parameter *countryiso3s* which is a tuple or
list of country ISO3 codes to be read or None if all countries are desired.
Expand All @@ -142,6 +142,22 @@ Examples of usage:
adminlevel.get_pcode("YEM", "Al Dhale"e / الضالع") # returns ("YE30", False)
adminlevel.get_pcode("YEM", "Al Dhale"e / الضالع", fuzzy_match=False) # returns (None, True)

There is basic admin 1 p-code length conversion by default. A more advanced
p-code length conversion can be activated by calling *load_pcode_formats*
which takes a URL that defaults to a resource in the global p-codes dataset on
HDX:

admintwo.load_pcode_formats()
admintwo.get_pcode("YEM", "YEM30001") # returns ("YE3001", True)

The length conversion can be further enhanced by supplying either parent
AdminLevel objects in a list or lists of p-codes per parent admin level:

admintwo.set_parent_admins_from_adminlevels([adminone])
admintwo.get_pcode("NER", "NE00409") # returns ("NER004009", True)
admintwo.set_parent_admins([adminone.pcodes])
admintwo.get_pcode("NER", "NE00409") # returns ("NER004009", True)

## Currencies

Various functions support the conversion of monetary amounts to USD. Note that the
Expand Down
59 changes: 57 additions & 2 deletions src/hdx/location/adminlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def __init__(
self.pcode_to_iso3 = {}
self.pcode_formats = {}
self.zeroes = {}
self.parent_admins = []

self.init_matches_errors()
self.phonetics = Phonetics()
Expand Down Expand Up @@ -217,6 +218,32 @@ def load_pcode_formats(self, formats_url: str = _formats_url) -> None:
for x in re.finditer("0", pcode):
dict_of_sets_add(self.zeroes, countryiso3, x.start())

def set_parent_admins(self, parent_admins: List[List]) -> None:
"""
Set parent admins
Args:
parent_admins (List[List]): List of P-codes per parent admin
Returns:
None
"""
self.parent_admins = parent_admins

def set_parent_admins_from_adminlevels(
self, adminlevels: List["AdminLevel"]
) -> None:
"""
Set parent admins from AdminLevel objects
Args:
parent_admins (List[AdminLevel]): List of parent AdminLevel objects
Returns:
None
"""
self.parent_admins = [adminlevel.pcodes for adminlevel in adminlevels]

def get_pcode_list(self) -> List[str]:
"""Get list of all pcodes
Expand Down Expand Up @@ -325,11 +352,39 @@ def convert_admin_pcode_length(
if len_new_pcode < total_length:
if pos in self.zeroes[countryiso3]:
pcode_part = f"0{pcode_part}"
admin_changes.append(str(admin_no))
if self.parent_admins and admin_no < self.admin_level:
parent_pcode = [
pcode_parts[i] for i in range(admin_no)
]
parent_pcode.append(pcode_part[:admin_length])
parent_pcode = "".join(parent_pcode)
if (
parent_pcode
not in self.parent_admins[admin_no - 1]
):
pcode_part = pcode_part[1:]
else:
admin_changes.append(str(admin_no))
else:
admin_changes.append(str(admin_no))
elif len_new_pcode > total_length:
if admin_length == 2 and pcode_parts[admin_no][0] == "0":
pcode_part = pcode_part[1:]
admin_changes.append(str(admin_no))
if self.parent_admins and admin_no < self.admin_level:
parent_pcode = [
pcode_parts[i] for i in range(admin_no)
]
parent_pcode.append(pcode_part[:admin_length])
parent_pcode = "".join(parent_pcode)
if (
parent_pcode
not in self.parent_admins[admin_no - 1]
):
pcode_part = f"0{pcode_part}"
else:
admin_changes.append(str(admin_no))
else:
admin_changes.append(str(admin_no))
pcode_parts[admin_no] = pcode_part[:admin_length]
pcode_parts.append(pcode_part[admin_length:])
new_pcode = "".join(pcode_parts)
Expand Down
17 changes: 15 additions & 2 deletions tests/hdx/location/test_adminlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,24 @@ def test_adminlevel_pcode_formats(self, config, url, formats_url):
True,
)
# Algorithm inserts 0 to make NER000409 and hence fails (it has no
# knowledge that NER000 is an invalid admin 1 and could consider
# adding this knowledge if it proves necessary)
# knowledge that NER000 is an invalid admin 1)
assert admintwo.get_pcode(
"NER", "NE00409", logname="test", fuzzy_match=False
) == (
None,
True,
)
admintwo.set_parent_admins_from_adminlevels([adminone])
# The lookup in admin1 reveals that adding a 0 prefix to the admin1
# is not a valid admin1 (NER000) so the algorithm tries adding
# the 0 prefix at the admin2 level instead and hence succeeds
assert admintwo.get_pcode("NER", "NE00409", logname="test") == (
"NER004009",
True,
)

admintwo.set_parent_admins([adminone.pcodes])
assert admintwo.get_pcode("NER", "NE00409", logname="test") == (
"NER004009",
True,
)

0 comments on commit 47edf38

Please sign in to comment.