From 51d41c95faee5f1caaeb0be2d7ce3d5a44ac77ca Mon Sep 17 00:00:00 2001 From: Olivier Francon Date: Sat, 23 Jan 2021 02:48:42 +0100 Subject: [PATCH] #189 Make generate_cases_and_stringency_for_prescriptions return the generated DataFrames --- covid_xprize/scoring/prescriptor_scoring.py | 15 +++++++++++---- prescriptor_robojudge.ipynb | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/covid_xprize/scoring/prescriptor_scoring.py b/covid_xprize/scoring/prescriptor_scoring.py index c424ade5..9c6365a1 100644 --- a/covid_xprize/scoring/prescriptor_scoring.py +++ b/covid_xprize/scoring/prescriptor_scoring.py @@ -1,3 +1,5 @@ +import time + import pandas as pd from covid_xprize.standard_predictor.xprize_predictor import XPrizePredictor @@ -15,12 +17,13 @@ def weight_prescriptions_by_cost(pres_df, cost_df): def generate_cases_and_stringency_for_prescriptions(start_date, end_date, prescription_file, costs_file): + start_time = time.time() # Load the prescriptions, handling Date and regions pres_df = XPrizePredictor.load_original_data(prescription_file) # Generate predictions for all prescriptions predictor = XPrizePredictor() - pred_dfs = [] + pred_dfs = {} for idx in pres_df['PrescriptionIndex'].unique(): idx_df = pres_df[pres_df['PrescriptionIndex'] == idx] idx_df = idx_df.drop(columns='PrescriptionIndex') # Predictor doesn't need this @@ -28,8 +31,8 @@ def generate_cases_and_stringency_for_prescriptions(start_date, end_date, prescr pred_df = predictor.predict_from_df(start_date, end_date, idx_df) print(f"Generated predictions for PrescriptionIndex {idx}") pred_df['PrescriptionIndex'] = idx - pred_dfs.append(pred_df) - pred_df = pd.concat(pred_dfs) + pred_dfs[idx] = pred_df + pred_df = pd.concat(list(pred_dfs.values())) # Aggregate cases by prescription index and geo agg_pred_df = pred_df.groupby(['CountryName', @@ -65,8 +68,12 @@ def generate_cases_and_stringency_for_prescriptions(start_date, end_date, prescr 'PrescriptionIndex', 'PredictedDailyNewCases', 'Stringency']] + end_time = time.time() + elapsed_time = end_time - start_time + elapsed_time_tring = time.strftime("%H:%M:%S", time.gmtime(elapsed_time)) + print(f"Evaluated {len(pred_dfs)} PrescriptionIndex in {elapsed_time_tring} seconds") - return df + return df, pred_dfs # Compute domination relationship for each pair of prescriptors for each geo diff --git a/prescriptor_robojudge.ipynb b/prescriptor_robojudge.ipynb index 4f5e01a7..f04d3215 100644 --- a/prescriptor_robojudge.ipynb +++ b/prescriptor_robojudge.ipynb @@ -191,7 +191,7 @@ "dfs = []\n", "for prescriptor_name, prescription_file in sorted(prescription_files.items()):\n", " print(\"Generating predictions for\", prescriptor_name)\n", - " df = generate_cases_and_stringency_for_prescriptions(START_DATE, END_DATE, prescription_file, TEST_COST)\n", + " df, _ = generate_cases_and_stringency_for_prescriptions(START_DATE, END_DATE, prescription_file, TEST_COST)\n", " df['PrescriptorName'] = prescriptor_name\n", " dfs.append(df)\n", "df = pd.concat(dfs)"