From a5b21b6b07c7255b9c906e099a9cd37a76db5e76 Mon Sep 17 00:00:00 2001 From: Michael Wood Date: Mon, 13 Jan 2025 17:01:45 +0000 Subject: [PATCH] additional_data: codelist_code: Add some more codelist data Adds some of the more obscure codelists for easier human reading: * locationScope * beneficiary geoCodeType * recipient_organization geoCodeType * funding_organization geoCodeType Related: https://github.com/ThreeSixtyGiving/grantnav/issues/1081 --- .../additional_data/sources/codelist_code.py | 92 +++++++++++++++---- 1 file changed, 72 insertions(+), 20 deletions(-) diff --git a/datastore/additional_data/sources/codelist_code.py b/datastore/additional_data/sources/codelist_code.py index 9ff0f17..98c4970 100644 --- a/datastore/additional_data/sources/codelist_code.py +++ b/datastore/additional_data/sources/codelist_code.py @@ -1,5 +1,6 @@ import csv import requests +from django.db import transaction from additional_data.models import CodelistCode @@ -7,12 +8,11 @@ "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/main/codelists/grantToIndividualsPurpose.csv", "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/main/codelists/grantToIndividualsReason.csv", "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/main/codelists/regrantType.csv", - # These lists aren't yet ready for use in the datastore - # https://github.com/ThreeSixtyGiving/standard/issues/348 - # https://github.com/ThreeSixtyGiving/standard/issues/349 + "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/main/codelists/locationScope.csv", + "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/main/codelists/geoCodeType.csv", + # These codelists aren't yet processed # "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/main/codelists/countryCode.csv", # "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/main/codelists/currency.csv", - # "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/main/codelists/geoCodeType.csv", ] @@ -22,23 +22,33 @@ class CodeListSource(object): """ def import_codelists(self): - CodelistCode.objects.all().delete() - - for code_list_url in code_lists_urls: - # list name = last item in split -4 to remove extension .csv - list_name = code_list_url.split("/")[-1:][0][:-4] - with requests.get(code_list_url, stream=True) as r: - r.raise_for_status() - file_data = csv.DictReader( - r.iter_lines(decode_unicode=True), delimiter="," - ) - for value in file_data: - CodelistCode.objects.create( - code=value["Code"], - title=value["Title"], - description=value["Description"], - list_name=list_name, + with transaction.atomic(): + CodelistCode.objects.all().delete() + + for code_list_url in code_lists_urls: + # list name = last item in split -4 to remove extension .csv + list_name = code_list_url.split("/")[-1:][0][:-4] + print(f"fetching codelist: {list_name}") + with requests.get(code_list_url, stream=True) as r: + r.raise_for_status() + file_data = csv.DictReader( + r.iter_lines(decode_unicode=True), delimiter="," ) + for value in file_data: + # In https://github.com/ThreeSixtyGiving/standard/blob/main/codelists/geoCodeType.csv + # we have non unique codes with differing descriptions. We have to just take the first + # one we come accross to avoid an integrity error on the unique constraints. + try: + CodelistCode.objects.get( + code=value["Code"], list_name=list_name + ) + except CodelistCode.DoesNotExist: + CodelistCode.objects.create( + code=value["Code"], + title=value["Title"], + description=value["Description"], + list_name=list_name, + ) def update_additional_data(self, grant, additional_data): # check All the fields in the grant data that use codelists and make additional data field versions of them @@ -47,6 +57,10 @@ def update_additional_data(self, grant, additional_data): secondaryGrantReason = "" grantPurpose = [] regrantType = "" + locationScope = "" + beneficiary_geoCodeType = "" + recipient_organization_geoCodeType = "" + funding_organization_geoCodeType = "" try: code = grant["toIndividualsDetails"]["primaryGrantReason"] @@ -83,6 +97,38 @@ def update_additional_data(self, grant, additional_data): except (KeyError, CodelistCode.DoesNotExist): pass + try: + code = grant["locationScope"] + locationScope = CodelistCode.objects.get( + code=code, list_name="locationScope" + ).title + except (KeyError, CodelistCode.DoesNotExist): + pass + + try: + code = grant["beneficiaryLocation"][0]["geoCodeType"] + locationScope = CodelistCode.objects.get( + code=code, list_name="geoCodeType" + ).title + except (KeyError, IndexError, CodelistCode.DoesNotExist): + pass + + try: + code = grant["fundingOrganization"][0]["location"][0]["geoCodeType"] + locationScope = CodelistCode.objects.get( + code=code, list_name="geoCodeType" + ).title + except (KeyError, IndexError, CodelistCode.DoesNotExist): + pass + + try: + code = grant["recipientOrganization"][0]["location"][0]["geoCodeType"] + locationScope = CodelistCode.objects.get( + code=code, list_name="geoCodeType" + ).title + except (KeyError, IndexError, CodelistCode.DoesNotExist): + pass + additional_data["codeListLookup"] = { "toIndividualsDetails": { "primaryGrantReason": primaryGrantReason, @@ -90,4 +136,10 @@ def update_additional_data(self, grant, additional_data): "grantPurpose": grantPurpose, }, "regrantType": regrantType, + "locationScope": locationScope, + "geoCodeType": { + "beneficiaryLocation": beneficiary_geoCodeType, + "recipientOrganization": recipient_organization_geoCodeType, + "fundingOrganization": funding_organization_geoCodeType, + }, }