Skip to content

Commit

Permalink
Merge pull request #81 from OCHA-DAP/HDXDSYS-596
Browse files Browse the repository at this point in the history
HDXDSYS 596 - add hapi_updated_date
  • Loading branch information
b-j-mills authored May 4, 2024
2 parents f26ff7f + c611857 commit af08758
Show file tree
Hide file tree
Showing 17 changed files with 54 additions and 20 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.7.9] - 2024-05-04

### Added

- Added hapi_updated_date fields to relevant tables

### Changed

- Updated test data for humanitarian needs theme
- Updated operational presence data for Colombia

## [0.7.8] - 2024-05-01

### Added
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ classifiers = [
requires-python = ">=3.8"

dependencies = [
"hapi-schema== 0.6.2",
"hapi-schema== 0.7.3",
"hdx-python-country>= 3.7.0",
"hdx-python-database[postgresql]>= 1.3.0",
"hdx-python-scraper>= 2.3.5",
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ docopt==0.6.2
# via
# ckanapi
# num2words

docutils==0.21.2
# via defopt
email-validator==2.1.1
Expand All @@ -57,7 +58,7 @@ greenlet==3.0.3
# via sqlalchemy
gspread==6.1.0
# via hdx-python-scraper
hapi-schema==0.6.2
hapi-schema==0.7.3
hdx-python-api==6.2.8
# via hdx-python-scraper
hdx-python-country==3.7.0
Expand Down
12 changes: 9 additions & 3 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,15 @@ def __init__(
self.session = session
self.themes_to_run = themes_to_run
self.locations = Locations(
configuration=configuration, session=session, use_live=use_live
configuration=configuration,
session=session,
today=today,
use_live=use_live,
)
countries = configuration["HAPI_countries"]
libhxl_dataset = AdminLevel.get_libhxl_dataset().cache()
self.admins = Admins(
configuration, session, self.locations, libhxl_dataset
configuration, session, self.locations, libhxl_dataset, today
)
self.adminone = AdminLevel(admin_level=1)
self.admintwo = AdminLevel(admin_level=2)
Expand All @@ -72,6 +75,7 @@ def __init__(
self.org = Org(
session=session,
datasetinfo=configuration["org"],
today=today,
)
self.org_type = OrgType(
session=session,
Expand Down Expand Up @@ -109,7 +113,9 @@ def __init__(
)
self.configurable_scrapers = dict()
self.create_configurable_scrapers()
self.metadata = Metadata(runner=self.runner, session=session)
self.metadata = Metadata(
runner=self.runner, session=session, today=today
)

def create_configurable_scrapers(self):
def _create_configurable_scrapers(
Expand Down
6 changes: 3 additions & 3 deletions src/hapi/pipelines/configs/humanitarian_needs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ humanitarian_needs_admintwo:
- "#targeted+total"
humanitarian_needs_yem_inneed:
dataset: "yemen-humanitarian-needs-overview"
resource: "YEM_PIN_2023.xlsx"
resource: "yem_pin_2024.xlsx"
format: "xlsx"
headers: 2
use_hxl: True
Expand Down Expand Up @@ -789,7 +789,7 @@ humanitarian_needs_admintwo:
- "#inneed+pro_cpn+f+age0_17"
humanitarian_needs_yem_targeted:
dataset: "yemen-humanitarian-needs-overview"
resource: "YEM_TRG_2023.xlsx"
resource: "people-targeted-2024-dataset.xlsx"
format: "xlsx"
headers:
- 1
Expand All @@ -799,7 +799,7 @@ humanitarian_needs_admintwo:
- "Demographic Information Dis_PCODE"
admin_exact: True
input:
- "Total Targeted People max"
- "Total Targeted People"
- "Target People Disaggregated by Gender Men"
- "Target People Disaggregated by Gender Women"
- "Target People Disaggregated by Gender Boys"
Expand Down
2 changes: 1 addition & 1 deletion src/hapi/pipelines/configs/operational_presence.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ operational_presence_admintwo:

operational_presence_col:
dataset: "colombia-4ws"
resource: "3W.xlsx"
resource: "COL_345W_Jan-Dec2023.xlsx"
format: "xlsx"
sheet: "Hoja-1"
headers: 1
Expand Down
7 changes: 7 additions & 0 deletions src/hapi/pipelines/database/admins.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from abc import ABC
from datetime import datetime
from typing import Dict, List, Literal

import hxl
Expand Down Expand Up @@ -27,12 +28,14 @@ def __init__(
session: Session,
locations: Locations,
libhxl_dataset: hxl.Dataset,
today: datetime,
):
super().__init__(session)
self._limit = configuration["commit_limit"]
self._orphan_admin2s = configuration["orphan_admin2s"]
self._locations = locations
self._libhxl_dataset = libhxl_dataset
self.today = today
self.admin1_data = {}
self.admin2_data = {}

Expand Down Expand Up @@ -93,13 +96,15 @@ def _update_admin_table(
code=code,
name=name,
reference_period_start=time_period_start,
hapi_updated_date=self.today,
)
elif desired_admin_level == "2":
admin_row = DBAdmin2(
admin1_ref=parent_ref,
code=code,
name=name,
reference_period_start=time_period_start,
hapi_updated_date=self.today,
)
self._session.add(admin_row)
if i % self._limit == 0:
Expand All @@ -122,6 +127,7 @@ def _add_admin1_connector_rows(self):
name="UNSPECIFIED",
is_unspecified=True,
reference_period_start=time_period_start,
hapi_updated_date=self.today,
)
self._session.add(admin_row)
self._session.commit()
Expand All @@ -142,6 +148,7 @@ def _add_admin2_connector_rows(self):
name="UNSPECIFIED",
is_unspecified=True,
reference_period_start=time_period_start,
hapi_updated_date=self.today,
)
self._session.add(admin_row)
self._session.commit()
Expand Down
9 changes: 8 additions & 1 deletion src/hapi/pipelines/database/locations.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
from typing import Dict

from hapi_schema.db_location import DBLocation
Expand All @@ -10,7 +11,11 @@

class Locations(BaseUploader):
def __init__(
self, configuration: Dict, session: Session, use_live: bool = True
self,
configuration: Dict,
session: Session,
today: datetime,
use_live: bool = True,
):
super().__init__(session)
Country.countriesdata(
Expand All @@ -19,6 +24,7 @@ def __init__(
country_name_mappings=configuration["country_name_mappings"],
)
self._hapi_countries = configuration["HAPI_countries"]
self.today = today
self.data = {}

def populate(self):
Expand All @@ -30,6 +36,7 @@ def populate(self):
code=code,
name=country["#country+name+preferred"],
reference_period_start=parse_date(country["#date+start"]),
hapi_updated_date=self.today,
)
self._session.add(location_row)
self._session.commit()
Expand Down
5 changes: 4 additions & 1 deletion src/hapi/pipelines/database/metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from datetime import datetime

from hapi_schema.db_dataset import DBDataset
from hapi_schema.db_resource import DBResource
Expand All @@ -11,9 +12,10 @@


class Metadata(BaseUploader):
def __init__(self, runner: Runner, session: Session):
def __init__(self, runner: Runner, session: Session, today: datetime):
super().__init__(session)
self.runner = runner
self.today = today
self.dataset_data = {}
self.resource_data = {}

Expand Down Expand Up @@ -50,6 +52,7 @@ def populate(self):
update_date=resource["update_date"],
is_hxl=resource["is_hxl"],
download_url=resource["download_url"],
hapi_updated_date=self.today,
)
self._session.add(resource_row)
self._session.commit()
Expand Down
4 changes: 4 additions & 0 deletions src/hapi/pipelines/database/org.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from datetime import datetime
from typing import Dict

from hapi_schema.db_org import DBOrg
Expand All @@ -18,9 +19,11 @@ def __init__(
self,
session: Session,
datasetinfo: Dict[str, str],
today: datetime,
):
super().__init__(session)
self._datasetinfo = datasetinfo
self.today = today
self.data = {}
self._org_map = {}
self._org_lookup = {}
Expand Down Expand Up @@ -57,6 +60,7 @@ def populate_single(
org_type_code=org_type,
reference_period_start=time_period_start,
reference_period_end=time_period_end,
hapi_updated_date=self.today,
)
self._session.add(org_row)
self._session.commit()
Expand Down
9 changes: 2 additions & 7 deletions src/hapi/pipelines/database/sector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from hapi_schema.db_sector import DBSector
from hdx.scraper.utilities.reader import Read
from hdx.utilities.dateparse import parse_date
from hxl import TagPattern
from sqlalchemy.orm import Session

Expand All @@ -29,13 +28,12 @@ def __init__(
def populate(self):
logger.info("Populating sector table")

def parse_sector_values(code: str, name: str, date: str):
def parse_sector_values(code: str, name: str):
self.data[name] = code
self.data[code] = code
sector_row = DBSector(
code=code,
name=name,
reference_period_start=parse_date(date),
)
self._session.add(sector_row)
pattern = code.lower().replace("-", "_")
Expand All @@ -50,7 +48,6 @@ def parse_sector_values(code: str, name: str, date: str):
parse_sector_values(
code=row["#sector +code +acronym"],
name=row["#sector +name +preferred +i_en"],
date=row["#date +created"],
)

extra_entries = {
Expand All @@ -59,9 +56,7 @@ def parse_sector_values(code: str, name: str, date: str):
"Multi": "Multi-sector (unspecified)",
}
for code in extra_entries:
parse_sector_values(
code=code, name=extra_entries[code], date="2023-11-21"
)
parse_sector_values(code=code, name=extra_entries[code])

self._session.commit()

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def test_pipelines(self, configuration, folder):
count = session.scalar(
select(func.count(DBHumanitarianNeeds.id))
)
assert count == 47126
assert count == 47582
count = session.scalar(
select(func.count(DBNationalRisk.id))
)
Expand Down

0 comments on commit af08758

Please sign in to comment.