Skip to content

Commit

Permalink
Another round of changes
Browse files Browse the repository at this point in the history
  • Loading branch information
rzats committed Jun 30, 2023
1 parent bfd00c3 commit d3d77a3
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,10 @@ def test_acquire_specific_issue(self):
pre_max_issue = db.get_max_issue()
self.assertEqual(pre_max_issue, pd.Timestamp('1900-01-01 00:00:00'))
with self.subTest(name='first acquisition'), \
patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \
patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()), \
patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv")]
) as mock_fetch:
acquired = Utils.update_dataset(Database,
Network,
date(2021, 3, 12),
date(2021, 3, 14))
with Database.connect() as db:
Expand Down
18 changes: 7 additions & 11 deletions src/acquisition/covid_hosp/common/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,28 @@ class Database:
DATASET_NAME = None

def __init__(self,
connection,
chs = CovidHospSomething()):
connection):
"""Create a new Database object.
Parameters
----------
connection
An open connection to a database.
table_name : str
The name of the table which holds the dataset.
hhs_dataset_id : str
The 9-character healthdata.gov identifier for this dataset.
columns_and_types : tuple[str, str, Callable]
List of 3-tuples of (CSV header name, SQL column name, data type) for
all the columns in the CSV file.
"""

self.connection = connection

if self.DATASET_NAME is None:
raise NameError('no dataset given!') # Must be defined by subclasses

chs = CovidHospSomething()
# The name of the table which holds the dataset.
self.table_name = chs.get_ds_table_name(self.DATASET_NAME)
# The 9-character healthdata.gov identifier for this dataset.
self.hhs_dataset_id = chs.get_ds_dataset_id(self.DATASET_NAME)
self.metadata_id = chs.get_ds_metadata_id(self.DATASET_NAME)
self.publication_col_name = chs.get_ds_issue_column(self.DATASET_NAME)
self.issue_column = chs.get_ds_issue_column(self.DATASET_NAME)
# List of 3-tuples of (CSV header name, SQL column name, data type) for all the columns in the CSV file.
self.columns_and_types = {c.csv_name: c for c in chs.get_ds_ordered_csv_cols(self.DATASET_NAME)}
self.key_columns = chs.get_ds_key_cols(self.DATASET_NAME)
self.aggregate_key_columns = chs.get_ds_aggregate_key_cols(self.DATASET_NAME)
Expand Down Expand Up @@ -181,7 +177,7 @@ def nan_safe_dtype(dtype, value):
num_columns = 2 + len(dataframe_columns_and_types)
value_placeholders = ', '.join(['%s'] * num_columns)
columns = ', '.join(f'`{i.sql_name}`' for i in dataframe_columns_and_types)
sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \
sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.issue_column}`, {columns}) ' \
f'VALUES ({value_placeholders})'
id_and_publication_date = (0, publication_date)
if logger:
Expand Down
10 changes: 4 additions & 6 deletions src/acquisition/covid_hosp/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,15 +163,13 @@ def merge_by_key_cols(dfs, key_cols, logger=False):
return result.reset_index(level=key_cols)

@staticmethod
def update_dataset(database, network=Network, newer_than=None, older_than=None):
def update_dataset(database, newer_than=None, older_than=None):
"""Acquire the most recent dataset, unless it was previously acquired.
Parameters
----------
database : delphi.epidata.acquisition.covid_hosp.common.database.Database
A `Database` subclass for a particular dataset.
network : delphi.epidata.acquisition.covid_hosp.common.network.Network
A `Network` subclass for a particular dataset.
newer_than : date
Lower bound (exclusive) of days to get issues for.
older_than : date
Expand All @@ -184,9 +182,8 @@ def update_dataset(database, network=Network, newer_than=None, older_than=None):
"""
logger = database.logger()

datasets = []
with database.connect() as db:
metadata = network.fetch_metadata(db.metadata_id, logger=logger)
metadata = Network.fetch_metadata(db.metadata_id, logger=logger)
max_issue = db.get_max_issue(logger=logger)

older_than = datetime.datetime.today().date() if newer_than is None else older_than
Expand All @@ -195,10 +192,11 @@ def update_dataset(database, network=Network, newer_than=None, older_than=None):
if not daily_issues:
logger.info("no new issues; nothing to do")
return False
datasets = []
for issue, revisions in daily_issues.items():
issue_int = int(issue.strftime("%Y%m%d"))
# download the dataset and add it to the database
dataset = Utils.merge_by_key_cols([network.fetch_dataset(url, logger=logger) for url, _ in revisions],
dataset = Utils.merge_by_key_cols([Network.fetch_dataset(url, logger=logger) for url, _ in revisions],
db.key_columns,
logger=logger)
# add metadata to the database
Expand Down
3 changes: 0 additions & 3 deletions src/acquisition/covid_hosp/facility/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
Facility" dataset provided by the US Department of Health & Human Services via
healthdata.gov.
"""

# first party
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
from delphi.epidata.acquisition.covid_hosp.facility.database import Database


# main entry point
if __name__ == '__main__':
Utils.update_dataset(Database)
2 changes: 0 additions & 2 deletions src/acquisition/covid_hosp/state_daily/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
dataset provided by the US Department of Health & Human Services
via healthdata.gov.
"""
# first party
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
from delphi.epidata.acquisition.covid_hosp.state_daily.database import Database


# main entry point
if __name__ == '__main__':
Utils.update_dataset(Database)
3 changes: 0 additions & 3 deletions src/acquisition/covid_hosp/state_timeseries/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
Timeseries" dataset provided by the US Department of Health & Human Services
via healthdata.gov.
"""

# first party
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
from delphi.epidata.acquisition.covid_hosp.state_timeseries.database import Database


# main entry point
if __name__ == '__main__':
Utils.update_dataset(Database)
17 changes: 8 additions & 9 deletions tests/acquisition/covid_hosp/common/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,14 @@ def create_mock_database(self, mock_connection,
csv_cols=[],
key_cols=[],
aggregate_cols=[]):
chs = CovidHospSomething()
chs.get_ds_table_name = MagicMock(return_value = table_name)
chs.get_ds_dataset_id = MagicMock(return_value = dataset_id)
chs.get_ds_metadata_id = MagicMock(return_value = metadata_id)
chs.get_ds_issue_column = MagicMock(return_value = issue_col)
chs.get_ds_ordered_csv_cols = MagicMock(return_value = csv_cols)
chs.get_ds_key_cols = MagicMock(return_value = key_cols)
chs.get_ds_aggregate_key_cols = MagicMock(return_value = aggregate_cols)
return TestDatabase(mock_connection, chs=chs)
with patch.object(CovidHospSomething, 'get_ds_table_name', return_value=table_name), \
patch.object(CovidHospSomething, 'get_ds_dataset_id', return_value=dataset_id), \
patch.object(CovidHospSomething, 'get_ds_metadata_id', return_value=metadata_id), \
patch.object(CovidHospSomething, 'get_ds_issue_column', return_value=issue_col), \
patch.object(CovidHospSomething, 'get_ds_ordered_csv_cols', return_value=csv_cols), \
patch.object(CovidHospSomething, 'get_ds_key_cols', return_value=key_cols), \
patch.object(CovidHospSomething, 'get_ds_aggregate_key_cols', return_value=aggregate_cols):
return TestDatabase(mock_connection)

def test_commit_and_close_on_success(self):
"""Commit and close the connection after success."""
Expand Down
14 changes: 8 additions & 6 deletions tests/acquisition/covid_hosp/common/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from unittest.mock import MagicMock, PropertyMock, patch

# first party
from delphi.epidata.acquisition.covid_hosp.common.network import Network
from delphi.epidata.acquisition.covid_hosp.common.test_utils import UnitTestUtils
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils, CovidHospException

Expand Down Expand Up @@ -76,18 +77,17 @@ def test_issues_to_fetch(self):
def test_run_skip_old_dataset(self):
"""Don't re-acquire an old dataset."""

mock_network = MagicMock()
mock_network.fetch_metadata.return_value = \
self.test_utils.load_sample_metadata()
mock_database = MagicMock()
with mock_database.connect() as mock_connection:
pass
mock_connection.get_max_issue.return_value = pd.Timestamp("2200/1/1")

result = Utils.update_dataset(database=mock_database, network=mock_network)
with patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()), \
patch.object(Network, 'fetch_dataset', return_value=None) as fetch_dataset:
result = Utils.update_dataset(database=mock_database)

self.assertFalse(result)
mock_network.fetch_dataset.assert_not_called()
fetch_dataset.assert_not_called()
mock_connection.insert_metadata.assert_not_called()
mock_connection.insert_dataset.assert_not_called()

Expand All @@ -107,7 +107,9 @@ def test_run_acquire_new_dataset(self):
with patch.object(Utils, 'issues_to_fetch') as mock_issues:
mock_issues.return_value = {pd.Timestamp("2021/3/15"): [("url1", pd.Timestamp("2021-03-15 00:00:00")),
("url2", pd.Timestamp("2021-03-15 00:00:00"))]}
result = Utils.update_dataset(database=mock_database, network=mock_network)
with patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()), \
patch.object(Network, 'fetch_dataset', return_value=fake_dataset):
result = Utils.update_dataset(database=mock_database)

self.assertTrue(result)

Expand Down

0 comments on commit d3d77a3

Please sign in to comment.