Skip to content

Commit

Permalink
HDXDSYS-898 Refactor org code (#134)
Browse files Browse the repository at this point in the history
* Add uncleaned names as keys to lookups
Remove intersectoral check
dict() to {}
Move branches that have continue higher up in loops in operational presence

* Some rearrangement of operational presence and comments to help me understand the org processing

* Small reorg of org type if code

* store normalised keys as well as non normalised

* Use already normalised key

* Simplify org lookup code and operational presence

* Add to lookup to reduce need to keep normalising

* Pass around normalise variables

* Remove org lookup only used in test

* Remove unnecessary variable

* Update CHANGELOG

* Can just return value here

* Combine ifs

* Use named tuples for clarity

* Rename value to org_info

* Use org_data in populate_multiple

* Use OrgData in tests

* Make OrgInfo into a data class
Add used and complete bools to OrgInfo
Correct OrgInfo objects in org_map with corrections from looking up in data member variable

* Make separate function

* Add debug option to command line
Add org_map debug
  • Loading branch information
mcarans authored Jul 23, 2024
1 parent 95ec410 commit cfb7b80
Show file tree
Hide file tree
Showing 12 changed files with 476 additions and 188 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.9.36] - 2024-07-19

### Changed

- Refactor org code
- Also add uncleaned names as keys to lookups

## [0.9.35] - 2024-07-18

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ requires-python = ">=3.8"
dependencies = [
"hapi-schema>=0.8.14",
"hdx-python-api>= 6.3.1",
"hdx-python-country>= 3.7.6",
"hdx-python-country>= 3.7.7",
"hdx-python-database[postgresql]>= 1.3.1",
"hdx-python-scraper>= 2.4.0",
"hdx-python-utilities>= 3.7.2",
Expand Down
10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ click==8.1.7
# via typer
coverage==7.6.0
# via pytest-cov
cryptography==42.0.8
cryptography==43.0.0
# via pyopenssl
defopt==6.4.0
# via hdx-python-api
Expand Down Expand Up @@ -64,7 +64,7 @@ hdx-python-api==6.3.1
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-scraper
hdx-python-country==3.7.6
hdx-python-country==3.7.7
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
Expand Down Expand Up @@ -172,13 +172,13 @@ pydantic-core==2.20.1
# via pydantic
pygments==2.18.0
# via rich
pyopenssl==24.1.0
pyopenssl==24.2.1
# via
# hdx-python-api
# ndg-httpsclient
pyphonetics==0.5.3
# via hdx-python-country
pytest==8.2.2
pytest==8.3.1
# via
# hapi-pipelines (pyproject.toml)
# pytest-check
Expand Down Expand Up @@ -242,7 +242,7 @@ ruamel-yaml==0.18.6
# via hdx-python-utilities
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
setuptools==70.3.0
setuptools==71.1.0
# via ckanapi
shellingham==1.5.4
# via typer
Expand Down
12 changes: 12 additions & 0 deletions src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ def parse_args():
action="store_true",
help="Use saved data",
)
parser.add_argument(
"-dbg",
"--debug",
default=False,
action="store_true",
help="Debug",
)
return parser.parse_args()


Expand All @@ -95,6 +102,7 @@ def main(
basic_auths: Optional[Dict[str, str]] = None,
save: bool = False,
use_saved: bool = False,
debug: bool = False,
**ignore,
) -> None:
"""Run HAPI. Either a database connection string (db_uri) or database
Expand All @@ -110,6 +118,7 @@ def main(
basic_auths (Optional[Dict[str, str]]): Basic authorisations
save (bool): Whether to save state for testing. Defaults to False.
use_saved (bool): Whether to use saved state for testing. Defaults to False.
debug (bool): Whether to output debug info. Defaults to False.
Returns:
None
Expand Down Expand Up @@ -156,6 +165,8 @@ def main(
)
pipelines.run()
pipelines.output()
if debug:
pipelines.debug("debug")
logger.info("HAPI pipelines completed!")


Expand Down Expand Up @@ -233,4 +244,5 @@ def main(
basic_auths=basic_auths,
save=args.save,
use_saved=args.use_saved,
debug=args.debug,
)
5 changes: 4 additions & 1 deletion src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def __init__(
errors_on_exit=errors_on_exit,
scrapers_to_run=scrapers_to_run,
)
self.configurable_scrapers = dict()
self.configurable_scrapers = {}
self.create_configurable_scrapers()
self.metadata = Metadata(
runner=self.runner, session=session, today=today
Expand Down Expand Up @@ -333,3 +333,6 @@ def output(self):
self.wfp_commodity.populate()
self.wfp_market.populate()
self.food_price.populate()

def debug(self, folder: str) -> None:
self.org.output_org_map(folder)
2 changes: 1 addition & 1 deletion src/hapi/pipelines/database/conflict_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def populate(self):
batch_populate(conflict_event_rows, self._session, DBConflictEvent)

for dataset, msg in self._config.get(
"conflict_event_error_messages", dict()
"conflict_event_error_messages", {}
).items():
add_message(errors, dataset, msg)
for error in sorted(errors):
Expand Down
Loading

0 comments on commit cfb7b80

Please sign in to comment.