Skip to content

Commit

Permalink
NANs quidel covidtest:
Browse files Browse the repository at this point in the history
* add missing columns
  • Loading branch information
dshemetov committed Apr 27, 2021
1 parent 431dd39 commit 885473d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 2 deletions.
1 change: 1 addition & 0 deletions changehc/delphi_changehc/update_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def update_sensor(self,
# load data
data.reset_index(inplace=True)
data_frame = self.geo_reindex(data)
breakpoint()
# handle if we need to adjust by weekday
wd_params = Weekday.get_params(data_frame) if self.weekday else None
# run sensor fitting code (maybe in parallel)
Expand Down
21 changes: 20 additions & 1 deletion quidel_covidtest/delphi_quidel_covidtest/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from delphi_utils import (
add_prefix,
create_export_csv,
get_structured_logger
get_structured_logger,
Nans
)

from .constants import (END_FROM_TODAY_MINUS,
Expand All @@ -32,6 +33,22 @@ def log_exit(start_time, logger):
logger.info("Completed indicator run",
elapsed_time_in_seconds=elapsed_time_in_seconds)

def add_nancodes(df):
"""Add nancodes to the dataframe."""
# Default missingness codes
df["missing_val"] = Nans.NOT_MISSING
df["missing_se"] = Nans.NOT_MISSING
df["missing_sample_size"] = Nans.NOT_MISSING

# Mark any remaining nans with unknown
remaining_nans_mask = df["val"].isnull()
df.loc[remaining_nans_mask, "missing_val"] = Nans.UNKNOWN
remaining_nans_mask = df["se"].isnull()
df.loc[remaining_nans_mask, "missing_se"] = Nans.UNKNOWN
remaining_nans_mask = df["sample_size"].isnull()
df.loc[remaining_nans_mask, "missing_sample_size"] = Nans.UNKNOWN
return df

def run_module(params: Dict[str, Any]):
"""Run the quidel_covidtest indicator.
Expand Down Expand Up @@ -103,6 +120,7 @@ def run_module(params: Dict[str, Any]):
state_groups, smooth=smoothers[sensor][1],
device=smoothers[sensor][0], first_date=first_date,
last_date=last_date)
state_df = add_nancodes(state_df)
create_export_csv(state_df, geo_res="state", sensor=sensor, export_dir=export_dir,
start_date=export_start_date, end_date=export_end_date)

Expand All @@ -115,6 +133,7 @@ def run_module(params: Dict[str, Any]):
state_groups, geo_data, res_key, smooth=smoothers[sensor][1],
device=smoothers[sensor][0], first_date=first_date,
last_date=last_date)
res_df = add_nancodes(res_df)
create_export_csv(res_df, geo_res=geo_res, sensor=sensor, export_dir=export_dir,
start_date=export_start_date, end_date=export_end_date,
remove_null_samples=True)
Expand Down
6 changes: 5 additions & 1 deletion quidel_covidtest/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ def test_output_files(self, clean_receiving_dir):
df = pd.read_csv(
join("./receiving", "20200718_state_covid_ag_smoothed_pct_positive.csv")
)
assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all()
expected_columns = [
"geo_id", "val", "se", "sample_size",
"missing_val", "missing_se", "missing_sample_size"
]
assert (df.columns.values == expected_columns).all()

# test_intermediate_file
flag = None
Expand Down

0 comments on commit 885473d

Please sign in to comment.