diff --git a/covid_xprize/scoring/prescriptor_scoring.py b/covid_xprize/scoring/prescriptor_scoring.py index e7dcb165..c424ade5 100644 --- a/covid_xprize/scoring/prescriptor_scoring.py +++ b/covid_xprize/scoring/prescriptor_scoring.py @@ -1,8 +1,6 @@ -import os - import pandas as pd -from covid_xprize.standard_predictor.predict import predict +from covid_xprize.standard_predictor.xprize_predictor import XPrizePredictor from covid_xprize.standard_predictor.xprize_predictor import NPI_COLUMNS @@ -17,28 +15,18 @@ def weight_prescriptions_by_cost(pres_df, cost_df): def generate_cases_and_stringency_for_prescriptions(start_date, end_date, prescription_file, costs_file): - # Load prescriptions - pres_df = pd.read_csv(prescription_file) + # Load the prescriptions, handling Date and regions + pres_df = XPrizePredictor.load_original_data(prescription_file) # Generate predictions for all prescriptions + predictor = XPrizePredictor() pred_dfs = [] for idx in pres_df['PrescriptionIndex'].unique(): idx_df = pres_df[pres_df['PrescriptionIndex'] == idx] idx_df = idx_df.drop(columns='PrescriptionIndex') # Predictor doesn't need this - ip_file_path = 'prescriptions/prescription_{}.csv'.format(idx) - os.makedirs(os.path.dirname(ip_file_path), exist_ok=True) - idx_df.to_csv(ip_file_path) - preds_file_path = 'predictions/predictions_{}.csv'.format(idx) - os.makedirs(os.path.dirname(preds_file_path), exist_ok=True) - - # Run predictor - predict(start_date, end_date, ip_file_path, preds_file_path) - - # Collect predictions - pred_df = pd.read_csv(preds_file_path, - parse_dates=['Date'], - encoding="ISO-8859-1", - error_bad_lines=True) + # Generate the predictions + pred_df = predictor.predict_from_df(start_date, end_date, idx_df) + print(f"Generated predictions for PrescriptionIndex {idx}") pred_df['PrescriptionIndex'] = idx pred_dfs.append(pred_df) pred_df = pd.concat(pred_dfs) diff --git a/covid_xprize/standard_predictor/predict.py b/covid_xprize/standard_predictor/predict.py index 7adac2e6..99bbc602 100644 --- a/covid_xprize/standard_predictor/predict.py +++ b/covid_xprize/standard_predictor/predict.py @@ -7,11 +7,6 @@ ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) -# Fixed weights for the standard predictor. -MODEL_WEIGHTS_FILE = os.path.join(ROOT_DIR, "models", "trained_model_weights.h5") - -DATA_FILE = os.path.join(ROOT_DIR, 'data', "OxCGRT_latest.csv") - def predict(start_date: str, end_date: str, @@ -29,7 +24,7 @@ def predict(start_date: str, with columns "CountryName,RegionName,Date,PredictedDailyNewCases" """ # !!! YOUR CODE HERE !!! - predictor = XPrizePredictor(MODEL_WEIGHTS_FILE, DATA_FILE) + predictor = XPrizePredictor() # Generate the predictions preds_df = predictor.predict(start_date, end_date, path_to_ips_file) # Create the output path diff --git a/covid_xprize/standard_predictor/xprize_predictor.py b/covid_xprize/standard_predictor/xprize_predictor.py index 3e62ce4d..63378240 100644 --- a/covid_xprize/standard_predictor/xprize_predictor.py +++ b/covid_xprize/standard_predictor/xprize_predictor.py @@ -25,6 +25,8 @@ ADDITIONAL_US_STATES_CONTEXT = os.path.join(DATA_PATH, "US_states_populations.csv") ADDITIONAL_UK_CONTEXT = os.path.join(DATA_PATH, "uk_populations.csv") ADDITIONAL_BRAZIL_CONTEXT = os.path.join(DATA_PATH, "brazil_populations.csv") +# Fixed weights for the standard predictor. +MODEL_WEIGHTS_FILE = os.path.join(ROOT_DIR, "models", "trained_model_weights.h5") NPI_COLUMNS = ['C1_School closing', 'C2_Workplace closing', @@ -72,7 +74,7 @@ class XPrizePredictor(object): A class that computes a fitness for Prescriptor candidates. """ - def __init__(self, path_to_model_weights, data_url): + def __init__(self, path_to_model_weights=MODEL_WEIGHTS_FILE, data_url=DATA_FILE_PATH): if path_to_model_weights: # Load model weights @@ -94,13 +96,18 @@ def predict(self, start_date_str: str, end_date_str: str, path_to_ips_file: str) -> pd.DataFrame: + # Load the npis into a DataFrame, handling regions + npis_df = self.load_original_data(path_to_ips_file) + return self.predict_from_df(start_date_str, end_date_str, npis_df) + + def predict_from_df(self, + start_date_str: str, + end_date_str: str, + npis_df: pd.DataFrame) -> pd.DataFrame: start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d') end_date = pd.to_datetime(end_date_str, format='%Y-%m-%d') nb_days = (end_date - start_date).days + 1 - # Load the npis into a DataFrame, handling regions - npis_df = self._load_original_data(path_to_ips_file) - # Prepare the output forecast = {"CountryName": [], "RegionName": [], @@ -177,7 +184,7 @@ def _prepare_dataframe(self, data_url: str) -> pd.DataFrame: :return: a Pandas DataFrame with the historical data """ # Original df from Oxford - df1 = self._load_original_data(data_url) + df1 = self.load_original_data(data_url) # Additional context df (e.g Population for each country) df2 = self._load_additional_context_df() @@ -224,7 +231,7 @@ def _prepare_dataframe(self, data_url: str) -> pd.DataFrame: return df @staticmethod - def _load_original_data(data_url): + def load_original_data(data_url): latest_df = pd.read_csv(data_url, parse_dates=['Date'], encoding="ISO-8859-1",