diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py index c0e63af8c0..f2ce7e2544 100644 --- a/changehc/delphi_changehc/update_sensor.py +++ b/changehc/delphi_changehc/update_sensor.py @@ -190,6 +190,7 @@ def update_sensor(self, # load data data.reset_index(inplace=True) data_frame = self.geo_reindex(data) + breakpoint() # handle if we need to adjust by weekday wd_params = Weekday.get_params(data_frame) if self.weekday else None # run sensor fitting code (maybe in parallel) diff --git a/quidel_covidtest/delphi_quidel_covidtest/run.py b/quidel_covidtest/delphi_quidel_covidtest/run.py index 052a5fa84a..fcdb38a774 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/run.py +++ b/quidel_covidtest/delphi_quidel_covidtest/run.py @@ -11,7 +11,8 @@ from delphi_utils import ( add_prefix, create_export_csv, - get_structured_logger + get_structured_logger, + Nans ) from .constants import (END_FROM_TODAY_MINUS, EXPORT_DAY_RANGE, @@ -32,6 +33,22 @@ def log_exit(start_time, logger): logger.info("Completed indicator run", elapsed_time_in_seconds=elapsed_time_in_seconds) +def add_nancodes(df): + """Add nancodes to the dataframe.""" + # Default missingness codes + df["missing_val"] = Nans.NOT_MISSING + df["missing_se"] = Nans.NOT_MISSING + df["missing_sample_size"] = Nans.NOT_MISSING + + # Mark any remaining nans with unknown + remaining_nans_mask = df["val"].isnull() + df.loc[remaining_nans_mask, "missing_val"] = Nans.UNKNOWN + remaining_nans_mask = df["se"].isnull() + df.loc[remaining_nans_mask, "missing_se"] = Nans.UNKNOWN + remaining_nans_mask = df["sample_size"].isnull() + df.loc[remaining_nans_mask, "missing_sample_size"] = Nans.UNKNOWN + return df + def run_module(params: Dict[str, Any]): """Run the quidel_covidtest indicator. @@ -102,6 +119,7 @@ def run_module(params: Dict[str, Any]): state_groups, smooth=smoothers[sensor][1], device=smoothers[sensor][0], first_date=first_date, last_date=last_date) + state_df = add_nancodes(state_df) create_export_csv(state_df, geo_res="state", sensor=sensor, export_dir=export_dir, start_date=export_start_date, end_date=export_end_date) @@ -114,6 +132,7 @@ def run_module(params: Dict[str, Any]): state_groups, geo_data, res_key, smooth=smoothers[sensor][1], device=smoothers[sensor][0], first_date=first_date, last_date=last_date) + res_df = add_nancodes(res_df) create_export_csv(res_df, geo_res=geo_res, sensor=sensor, export_dir=export_dir, start_date=export_start_date, end_date=export_end_date, remove_null_samples=True) diff --git a/quidel_covidtest/tests/test_run.py b/quidel_covidtest/tests/test_run.py index 5e395f33a0..6d000dba09 100644 --- a/quidel_covidtest/tests/test_run.py +++ b/quidel_covidtest/tests/test_run.py @@ -63,7 +63,11 @@ def test_output_files(self, clean_receiving_dir): df = pd.read_csv( join("./receiving", "20200718_state_covid_ag_smoothed_pct_positive.csv") ) - assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() + expected_columns = [ + "geo_id", "val", "se", "sample_size", + "missing_val", "missing_se", "missing_sample_size" + ] + assert (df.columns.values == expected_columns).all() # test_intermediate_file flag = None