-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #168 from OCHA-DAP/HDXDSYS-843-add-dtm
Hdxdsys 843 Add DTM data
- Loading branch information
Showing
15 changed files
with
447,575 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#National risk config file | ||
|
||
idps_default: | ||
scrapers_with_defaults: | ||
- "dtm" | ||
format: "csv" | ||
use_hxl: True | ||
admin_exact: True | ||
input: | ||
- "#affected+idps" | ||
- "#date+reported" | ||
- "#round+code" | ||
- "#assessment+type" | ||
- "#operation+name" | ||
list: | ||
- "#affected+idps" | ||
- "#date+reported" | ||
- "#round+code" | ||
- "#assessment+type" | ||
- "#operation+name" | ||
output: | ||
- "number_idps" | ||
- "reporting_date" | ||
- "round_number" | ||
- "asessment_type" | ||
- "operation" | ||
output_hxl: | ||
- "#affected+idps" | ||
- "#date+reported" | ||
- "#round+code" | ||
- "#assessment+type" | ||
- "#operation+name" | ||
|
||
idps_national: | ||
dtm: | ||
dataset: "global-iom-dtm-from-api" | ||
resource: "Global IOM DTM data for admin levels 0-2" | ||
filter_cols: | ||
- "#adm1+code" | ||
prefilter: "#adm1+code is None" | ||
admin: | ||
- "#country+code" | ||
|
||
idps_adminone: | ||
dtm: | ||
dataset: "global-iom-dtm-from-api" | ||
resource: "Global IOM DTM data for admin levels 0-2" | ||
filter_cols: | ||
- "#adm1+code" | ||
- "#adm2+code" | ||
prefilter: "#adm1+code is not None and #adm2+code is None" | ||
admin: | ||
- "#country+code" | ||
- "#adm1+code" | ||
|
||
idps_admintwo: | ||
dtm: | ||
dataset: "global-iom-dtm-from-api" | ||
resource: "Global IOM DTM data for admin levels 0-2" | ||
filter_cols: | ||
- "#adm1+code" | ||
- "#adm2+code" | ||
prefilter: "#adm1+code is not None and #adm2+code is not None" | ||
admin: | ||
- "#country+code" | ||
- "#adm2+code" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
"""Functions specific to the refugees theme.""" | ||
|
||
from logging import getLogger | ||
from typing import Dict | ||
|
||
from hapi_schema.db_idps import DBIDPs | ||
from sqlalchemy.orm import Session | ||
|
||
from ..utilities.logging_helpers import add_message | ||
from . import admins | ||
from .base_uploader import BaseUploader | ||
from .metadata import Metadata | ||
|
||
logger = getLogger(__name__) | ||
|
||
|
||
class IDPs(BaseUploader): | ||
def __init__( | ||
self, | ||
session: Session, | ||
metadata: Metadata, | ||
admins: admins.Admins, | ||
results: Dict, | ||
): | ||
super().__init__(session) | ||
self._metadata = metadata | ||
self._admins = admins | ||
self._results = results | ||
|
||
def populate(self) -> None: | ||
# TODO: This might be better suited to just work with the DTM resource | ||
# directly as done with HNO, rather than using a configurable scraper | ||
logger.info("Populating IDPs table") | ||
errors = set() | ||
# self._results is a dictionary where the keys are the HDX dataset ID and the | ||
# values are a dictionary with keys containing HDX metadata plus a "results" key | ||
# containing the results, stored in a dictionary with admin levels as keys. | ||
# There is only one dataset for now in the results dictionary, take first value | ||
# (popitem returns a tuple with (key, value) so take the value) | ||
dataset = self._results.popitem()[1] | ||
dataset_name = dataset["hdx_stub"] | ||
for admin_level, admin_results in dataset["results"].items(): | ||
# admin_results contains the keys "headers", "values", and "hapi_resource_metadata". | ||
# admin_results["values"] is a list of dictionaries of the format: | ||
# [{AFG: [1, 2], BFA: [3, 4]}, {AFG: [A, B], BFA: [C, D]} etc | ||
# So the way to get info from it is values[i_hdx_key][pcode][i] where | ||
# i is just an iterator for the number of rows for that particular p-code | ||
resource_id = admin_results["hapi_resource_metadata"]["hdx_id"] | ||
hxl_tags = admin_results["headers"][1] | ||
values = admin_results["values"] | ||
admin_codes = values[0].keys() | ||
for admin_code in admin_codes: | ||
admin2_code = admins.get_admin2_code_based_on_level( | ||
admin_code=admin_code, admin_level=admin_level | ||
) | ||
duplicate_rows = set() | ||
for row in zip( | ||
*[ | ||
values[hxl_tags.index(tag)][admin_code] | ||
for tag in hxl_tags | ||
] | ||
): | ||
# Keeping these defined outside of the row for now | ||
# as we may need to check for duplicates in the future | ||
admin2_ref = self._admins.admin2_data[admin2_code] | ||
assessment_type = row[hxl_tags.index("#assessment+type")] | ||
date_reported = row[hxl_tags.index("#date+reported")] | ||
reporting_round = row[hxl_tags.index("#round+code")] | ||
operation = row[hxl_tags.index("#operation+name")] | ||
duplicate_row_check = ( | ||
admin2_ref, | ||
assessment_type, | ||
date_reported, | ||
reporting_round, | ||
operation, | ||
) | ||
if duplicate_row_check in duplicate_rows: | ||
text = ( | ||
f"Duplicate row for admin code {admin2_code}, assessment type {assessment_type}, " | ||
f"reporting round {reporting_round}, operation {operation}, reporting round " | ||
f"{reporting_round}" | ||
) | ||
add_message(errors, dataset_name, text) | ||
continue | ||
idps_row = DBIDPs( | ||
resource_hdx_id=resource_id, | ||
admin2_ref=admin2_ref, | ||
assessment_type=assessment_type, | ||
reporting_round=reporting_round, | ||
operation=operation, | ||
population=row[hxl_tags.index("#affected+idps")], | ||
reference_period_start=date_reported, | ||
reference_period_end=date_reported, | ||
) | ||
self._session.add(idps_row) | ||
duplicate_rows.add(duplicate_row_check) | ||
self._session.commit() | ||
for error in sorted(errors): | ||
logger.error(error) |
Oops, something went wrong.