-
Notifications
You must be signed in to change notification settings - Fork 30
/
power_rankings.py
50 lines (40 loc) · 2.01 KB
/
power_rankings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from collections import defaultdict
import glob
import numpy as np
import pandas as pd
"""
Compute a "Power Rankings" based on the mean 'N weeks ahead' percentiles,
where N is 1-6 weeks ahead. This includes both US and state-by-state rankings.
There are a total of 12 summaries: US + state-by-state for each of 1-6 weeks ahead.
For each model, we take its mean percentile rank across all summaries.
If a model is the best in every summary, it will have a mean percentile of 0.
If a model is the median rank in every summary, it will have a mean percentile of 0.5.
The lower the mean percentile, the better the model.
If a model does not appear in a summary (e.g. it does not make 6 week ahead forecasts),
it does not get included in the mean percentile. But a model must be included in at least
6 summaries to appear in the Power Rankings.
"""
print('========================================')
print('Power Rankings')
print('========================================')
model_to_percentiles = defaultdict(list)
for fname in glob.glob('summary/summary_[1-6]_weeks_ahead*.csv'):
df = pd.read_csv(fname, index_col=0)
# Only count models with 3 or more entries in summary
df_filt = df[[c for c in df.columns if 'mean_sq_abs_error' not in c]]
df_filt = df_filt[(~pd.isnull(df_filt)).sum(axis=1) >= 3]
n = len(df_filt) - 1
for rank, model_name in enumerate(df_filt.index):
model_to_percentiles[model_name].append(rank / n)
model_to_mean_percentile = {}
for model, percentiles in model_to_percentiles.items():
if len(percentiles) < 6:
continue # only include models in 6 or more summaries
model_to_mean_percentile[model] = (np.mean(percentiles), len(percentiles))
df_ranks = pd.DataFrame(model_to_mean_percentile,
index=['mean_percentile', 'num_summaries']).T.sort_values('mean_percentile')
df_ranks['num_summaries'] = df_ranks['num_summaries'].astype(int)
print(df_ranks)
out_fname = 'summary/power_rankings.csv'
df_ranks.to_csv(out_fname)
print('Saved rankings to:', out_fname)