Skip to content

Commit

Permalink
Merge pull request #69 from OCHA-DAP/HAPI-136
Browse files Browse the repository at this point in the history
Add national risk data and tests
  • Loading branch information
b-j-mills authored Jan 18, 2024
2 parents 5a372a1 + d8f752d commit f3b629d
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 14 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.6.7] - 2024-01-17

### Added

- Add national risk AFG, BFA, MLI, NGA, TCD, YEM

## [0.6.6] - 2023-01-08

### Added
Expand Down
24 changes: 12 additions & 12 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ filelock==3.13.1
# via virtualenv
frictionless==5.16.0
# via hdx-python-utilities
google-auth==2.26.0
google-auth==2.26.2
# via
# google-auth-oauthlib
# gspread
Expand All @@ -63,11 +63,11 @@ greenlet==3.0.3
# via sqlalchemy
gspread==5.12.4
# via hdx-python-scraper
hapi-schema==0.5.0
hapi-schema==0.6.0
# via hapi-pipelines (pyproject.toml)
hdx-python-api==6.2.0
hdx-python-api==6.2.1
# via hdx-python-scraper
hdx-python-country==3.6.3
hdx-python-country==3.6.4
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
Expand All @@ -76,7 +76,7 @@ hdx-python-database[postgresql]==1.2.9
# via hapi-pipelines (pyproject.toml)
hdx-python-scraper==2.3.2
# via hapi-pipelines (pyproject.toml)
hdx-python-utilities==3.6.3
hdx-python-utilities==3.6.4
# via
# hdx-python-api
# hdx-python-country
Expand All @@ -96,17 +96,17 @@ iniconfig==2.0.0
# via pytest
isodate==0.6.1
# via frictionless
jinja2==3.1.2
jinja2==3.1.3
# via frictionless
jsonlines==4.0.0
# via hdx-python-utilities
jsonpath-ng==1.6.0
jsonpath-ng==1.6.1
# via libhxl
jsonschema==4.17.3
# via
# frictionless
# tableschema-to-template
libhxl==5.1
libhxl==5.2
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
Expand Down Expand Up @@ -149,9 +149,9 @@ pockets==0.9.1
# via sphinxcontrib-napoleon
pre-commit==3.6.0
# via hapi-pipelines (pyproject.toml)
psycopg[binary]==3.1.16
psycopg[binary]==3.1.17
# via hdx-python-database
psycopg-binary==3.1.16
psycopg-binary==3.1.17
# via psycopg
pyasn1==0.5.1
# via
Expand Down Expand Up @@ -250,7 +250,7 @@ sqlalchemy==2.0.25
# hdx-python-database
stringcase==1.2.0
# via frictionless
structlog==23.3.0
structlog==24.1.0
# via libhxl
tableschema-to-template==0.0.13
# via hdx-python-utilities
Expand All @@ -269,7 +269,7 @@ typing-extensions==4.9.0
# pydantic-core
# sqlalchemy
# typer
unidecode==1.3.7
unidecode==1.3.8
# via
# libhxl
# pyphonetics
Expand Down
1 change: 1 addition & 0 deletions src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def main(
"core.yaml",
"food_security.yaml",
"humanitarian_needs.yaml",
"national_risk.yaml",
"operational_presence.yaml",
"population.yaml",
]
Expand Down
15 changes: 15 additions & 0 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from hapi.pipelines.database.ipc_type import IpcType
from hapi.pipelines.database.locations import Locations
from hapi.pipelines.database.metadata import Metadata
from hapi.pipelines.database.national_risk import NationalRisk
from hapi.pipelines.database.operational_presence import OperationalPresence
from hapi.pipelines.database.org import Org
from hapi.pipelines.database.org_type import OrgType
Expand Down Expand Up @@ -155,6 +156,7 @@ def _create_configurable_scrapers(
_create_configurable_scrapers(
"humanitarian_needs", "admintwo", adminlevel=self.admintwo
)
_create_configurable_scrapers("national_risk", "national")

def run(self):
self.runner.run()
Expand Down Expand Up @@ -240,3 +242,16 @@ def output(self):
results=results,
)
humanitarian_needs.populate()

if not self.themes_to_run or "national_risk" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["national_risk"]
)

national_risk = NationalRisk(
session=self.session,
metadata=self.metadata,
locations=self.locations,
results=results,
)
national_risk.populate()
45 changes: 45 additions & 0 deletions src/hapi/pipelines/configs/national_risk.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#National risk config file

national_risk_national:
national_risk:
dataset: "inform-risk-index-2021"
resource: "INFORM_Risk_2024_v067 .xlsx"
format: "xlsx"
sheet: "INFORM Risk 2024 (a-z)"
headers: 2
source_date:
start: "01/01/2024"
end: "31/12/2024"
filter_cols:
- "ISO3"
prefilter: "ISO3 in ['AFG', 'BFA', 'MLI', 'NGA', 'TCD', 'YEM']"
admin:
- "ISO3"
admin_exact: True
input:
- "RISK CLASS"
- "Rank"
- "INFORM RISK"
- "HAZARD & EXPOSURE"
- "VULNERABILITY"
- "LACK OF COPING CAPACITY"
- "% of Missing Indicators"
- "Recentness data (average years)"
output:
- "risk_class"
- "global_rank"
- "overall_risk"
- "hazard_exposure_risk"
- "vulnerability_risk"
- "coping_capacity_risk"
- "meta_missing_indicators_pct"
- "meta_avg_recentness_years"
output_hxl:
- "#risk+class"
- "#risk+rank"
- "#risk+total"
- "#risk+hazard"
- "#risk+vulnerability"
- "#risk+coping+capacity"
- "#meta+missing+indicators+pct"
- "#meta+recentness+avg"
99 changes: 99 additions & 0 deletions src/hapi/pipelines/database/national_risk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Functions specific to the national risk theme."""

from logging import getLogger
from typing import Dict

from hapi_schema.db_national_risk import DBNationalRisk
from sqlalchemy.orm import Session

from . import locations
from .base_uploader import BaseUploader
from .metadata import Metadata

logger = getLogger(__name__)


class NationalRisk(BaseUploader):
def __init__(
self,
session: Session,
metadata: Metadata,
locations: locations.Locations,
results: Dict,
):
super().__init__(session)
self._metadata = metadata
self._locations = locations
self._results = results

def populate(self):
logger.info("Populating national risk table")
for dataset in self._results.values():
datasetinfo = self._metadata.runner.scrapers[
"national_risk_national"
].datasetinfo
reference_period_start = datasetinfo["source_date"][
"default_date"
]["start"]
reference_period_end = datasetinfo["source_date"]["default_date"][
"end"
]
for admin_level, admin_results in dataset["results"].items():
resource_id = admin_results["hapi_resource_metadata"]["hdx_id"]
hxl_tags = admin_results["headers"][1]
locations = list(admin_results["values"][0].keys())
values = {
hxl_tag: value
for hxl_tag, value in zip(
hxl_tags, admin_results["values"]
)
}

for location in locations:
risk_class = values["#risk+class"].get(location)
if risk_class:
risk_class = _get_risk_class_code_from_data(risk_class)

national_risk_row = DBNationalRisk(
resource_ref=self._metadata.resource_data[resource_id],
location_ref=self._locations.data[location],
risk_class=risk_class,
global_rank=values["#risk+rank"][location],
overall_risk=values["#risk+total"][location],
hazard_exposure_risk=values["#risk+hazard"][location],
vulnerability_risk=values["#risk+vulnerability"][
location
],
coping_capacity_risk=values["#risk+coping+capacity"][
location
],
meta_missing_indicators_pct=values[
"#meta+missing+indicators+pct"
].get(location),
meta_avg_recentness_years=values[
"#meta+recentness+avg"
].get(location),
reference_period_start=reference_period_start,
reference_period_end=reference_period_end,
# TODO: For v2+, add to scraper (HAPI-199)
source_data="not yet implemented",
)

self._session.add(national_risk_row)
self._session.commit()


def _get_risk_class_code_from_data(risk_class: str) -> int:
risk_class = risk_class.lower()
risk_class_code = None
if risk_class == "very high":
risk_class_code = 5
if risk_class == "high":
risk_class_code = 4
if risk_class == "medium":
risk_class_code = 3
if risk_class == "low":
risk_class_code = 2
if risk_class == "very low":
risk_class_code = 1
return risk_class_code
1 change: 1 addition & 0 deletions tests/fixtures/input/inform-risk-index-2021.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"archived": false, "creator_user_id": "4e1abd75-38a0-46f5-9b8f-6f765231eed9", "data_update_frequency": "180", "dataset_date": "[2014-01-01T00:00:00 TO 2023-11-14T23:59:59]", "dataset_preview": "first_resource", "dataset_source": "INFORM", "due_date": "2024-05-12T14:03:40", "has_geodata": false, "has_quickcharts": false, "has_showcases": true, "id": "f5ec2ee7-8a1b-49b4-864b-70bdb582a022", "is_requestdata_type": false, "isopen": true, "last_modified": "2023-11-14T14:03:40.799412", "license_id": "cc-by", "license_title": "Creative Commons Attribution International", "license_url": "http://www.opendefinition.org/licenses/cc-by", "maintainer": "4e1abd75-38a0-46f5-9b8f-6f765231eed9", "maintainer_email": "[email protected]", "metadata_created": "2021-06-28T13:59:44.873384", "metadata_modified": "2023-11-15T06:29:58.323053", "methodology": "Other", "methodology_other": "Composite Indicator", "name": "inform-risk-index-2021", "notes": "The INFORM Risk Index is a global, open-source risk assessment for humanitarian crises and disasters. It can support decisions about prevention, preparedness and response.", "num_resources": 3, "num_tags": 0, "organization": {"id": "e116c55a-d536-4b47-9308-94b1c7457afe", "name": "inform", "title": "INFORM", "type": "organization", "description": "INFORM is a multi-stakeholder forum for developing shared, quantitative analysis relevant to humanitarian crises and disasters. INFORM includes organisations from across the multilateral system, including the humanitarian and development sector, donors, and technical partners. The Joint Research Center of European Commission is the scientific and technical lead for INFORM.\r\n\r\nINFORM is developing a suite of quantitative, analytical products to support decision-making on humanitarian crises and disasters. These help make decisions at different stages of the disaster management cycle, specifically prevention, preparedness and response. INFORM develops methodologies and tools for use at the global level and also supports their application at subnational level.", "image_url": "", "created": "2014-09-13T16:09:14.878652", "is_organization": true, "approval_status": "approved", "state": "active"}, "overdue_date": "2024-06-11T14:03:40", "owner_org": "e116c55a-d536-4b47-9308-94b1c7457afe", "package_creator": "andrewthow", "pageviews_last_14_days": 18, "private": false, "qa_completed": false, "review_date": "2023-11-14T14:02:47.798897", "solr_additions": "{\"countries\": [\"World\"]}", "state": "active", "subnational": "0", "title": "INFORM Risk Index", "total_res_downloads": 127, "type": "dataset", "url": null, "version": null, "groups": [{"description": "", "display_name": "World", "id": "world", "image_display_url": "", "name": "world", "title": "World"}], "tags": [], "relationships_as_subject": [], "relationships_as_object": [], "is_fresh": true, "update_status": "fresh", "x_resource_grouping": [], "resources": [{"alt_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/603e40eb-a620-47e2-b8ac-e51961c7d661/download/", "cache_last_updated": null, "cache_url": null, "created": "2021-06-28T14:00:24.185308", "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/603e40eb-a620-47e2-b8ac-e51961c7d661/download/inform_risk_2024_v067-.xlsx", "format": "XLSX", "fs_check_info": "{\"state\": \"processing\", \"message\": \"The processing of the file structure check has started\", \"timestamp\": \"2023-11-14T14:02:54.625213\"}", "hash": "", "hdx_rel_url": "/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/603e40eb-a620-47e2-b8ac-e51961c7d661/download/inform_risk_2024_v067-.xlsx", "id": "603e40eb-a620-47e2-b8ac-e51961c7d661", "last_modified": "2023-11-14T14:02:54.750151", "metadata_modified": "2023-11-14T14:03:40.957373", "microdata": false, "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype_inner": null, "name": "INFORM_Risk_2024_v067 .xlsx", "originalHash": "-2021265870", "package_id": "f5ec2ee7-8a1b-49b4-864b-70bdb582a022", "pii": "false", "position": 0, "resource_type": "file.upload", "size": 2235008, "state": "active", "url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/603e40eb-a620-47e2-b8ac-e51961c7d661/download/inform_risk_2024_v067-.xlsx", "url_type": "upload"}, {"alt_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/a04977ed-ff5d-4b78-a0de-1fdac8765418/download/", "cache_last_updated": null, "cache_url": null, "created": "2023-05-24T12:36:48.835487", "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/a04977ed-ff5d-4b78-a0de-1fdac8765418/download/inform2024_trend_2014_2023_v67_all-.xlsx", "format": "XLSX", "fs_check_info": "{\"state\": \"processing\", \"message\": \"The processing of the file structure check has started\", \"timestamp\": \"2023-11-14T14:03:40.698771\"}", "hash": "", "hdx_rel_url": "/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/a04977ed-ff5d-4b78-a0de-1fdac8765418/download/inform2024_trend_2014_2023_v67_all-.xlsx", "id": "a04977ed-ff5d-4b78-a0de-1fdac8765418", "last_modified": "2023-11-14T14:03:40.799412", "metadata_modified": "2023-11-14T14:03:43.287422", "microdata": false, "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype_inner": null, "name": "INFORM2024_TREND_2014_2023_v67_ALL .xlsx", "originalHash": "-971618184", "package_id": "f5ec2ee7-8a1b-49b4-864b-70bdb582a022", "pii": "false", "position": 1, "resource_type": "file.upload", "size": 16651877, "state": "active", "url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/a04977ed-ff5d-4b78-a0de-1fdac8765418/download/inform2024_trend_2014_2023_v67_all-.xlsx", "url_type": "upload"}, {"alt_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/93dd74fd-7b9a-4e61-b452-62b3d5bed4ff/download/", "cache_last_updated": null, "cache_url": null, "created": "2023-05-24T12:42:29.815651", "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/93dd74fd-7b9a-4e61-b452-62b3d5bed4ff/download/inform-concept-and-methodology-version-2017-pdf-final-4.pdf", "format": "PDF", "hash": "", "hdx_rel_url": "/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/93dd74fd-7b9a-4e61-b452-62b3d5bed4ff/download/inform-concept-and-methodology-version-2017-pdf-final-4.pdf", "id": "93dd74fd-7b9a-4e61-b452-62b3d5bed4ff", "last_modified": "2023-05-24T12:42:29.602115", "metadata_modified": "2023-05-24T12:42:31.295566", "microdata": false, "mimetype": "application/pdf", "mimetype_inner": null, "name": "INFORM Concept and Methodology Version 2017 Pdf FINAL.pdf", "originalHash": "-988266717", "package_id": "f5ec2ee7-8a1b-49b4-864b-70bdb582a022", "pii": "false", "position": 2, "resource_type": "file.upload", "size": 4065660, "state": "active", "url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/93dd74fd-7b9a-4e61-b452-62b3d5bed4ff/download/inform-concept-and-methodology-version-2017-pdf-final-4.pdf", "url_type": "upload"}]}
Binary file not shown.
10 changes: 8 additions & 2 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from hapi_schema.db_ipc_phase import DBIpcPhase
from hapi_schema.db_ipc_type import DBIpcType
from hapi_schema.db_location import DBLocation
from hapi_schema.db_national_risk import DBNationalRisk
from hapi_schema.db_operational_presence import DBOperationalPresence
from hapi_schema.db_org import DBOrg
from hapi_schema.db_org_type import DBOrgType
Expand Down Expand Up @@ -45,6 +46,7 @@ def configuration(self):
"core.yaml",
"food_security.yaml",
"humanitarian_needs.yaml",
"national_risk.yaml",
"operational_presence.yaml",
"population.yaml",
]
Expand Down Expand Up @@ -98,9 +100,9 @@ def test_pipelines(self, configuration, folder):
pipelines.output()

count = session.scalar(select(func.count(DBResource.id)))
assert count == 16
assert count == 17
count = session.scalar(select(func.count(DBDataset.id)))
assert count == 10
assert count == 11
count = session.scalar(select(func.count(DBLocation.id)))
assert count == 6
count = session.scalar(select(func.count(DBAdmin1.id)))
Expand Down Expand Up @@ -143,6 +145,10 @@ def test_pipelines(self, configuration, folder):
select(func.count(DBHumanitarianNeeds.id))
)
assert count == 47126
count = session.scalar(
select(func.count(DBNationalRisk.id))
)
assert count == 6

org_mapping = pipelines.org._org_lookup
assert org_mapping["Action against Hunger"] == {
Expand Down

0 comments on commit f3b629d

Please sign in to comment.