forked from BlockScience/subspace
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplots.py
120 lines (97 loc) · 4.37 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from typing import Callable
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier # type: ignore
from sklearn.tree import DecisionTreeClassifier # type: ignore
from sklearn.tree import plot_tree # type: ignore
from subspace_model.psuu import GOVERNANCE_SURFACE_PARAMS
def create_decision_tree_importances_plot(data: pd.DataFrame,
col_name: str,
params_to_use: list = [],
plot_width: float = 36,
plot_height: float = 12,
success_criteria: Callable | str = ''):
if len(params_to_use) == 0:
features = GOVERNANCE_SURFACE_PARAMS
else:
features= params_to_use
X = data.loc[:, features]
if isinstance(success_criteria, str):
if success_criteria == 'smaller_than_median':
y = data.loc[:, col_name] < data.loc[:, col_name].median()
elif success_criteria == 'larger_than_median':
y = data.loc[:, col_name] > data.loc[:, col_name].median()
else:
raise Exception('criteria not specified')
else:
y = data[col_name].map(lambda x: success_criteria(x, data[col_name]))
model = DecisionTreeClassifier(max_depth=3)
rf = RandomForestClassifier()
model.fit(X, y)
rf.fit(X, y)
X_cols = list(X.columns)
rf_df = (pd.DataFrame(list(zip(X_cols, rf.feature_importances_)),
columns=['features', 'importance'])
.sort_values(by='importance', ascending=False)
)
fig, axes = plt.subplots(nrows=2,
figsize=(plot_width, plot_height),
dpi=100,
gridspec_kw={'height_ratios': [3, 1]})
(ax_dt, ax_rf) = axes[0], axes[1]
plot_tree(model,
rounded=True,
proportion=True,
fontsize=8,
feature_names=X_cols,
class_names=['threshold not met', 'threshold met'],
filled=True,
ax=ax_dt)
ax_dt.set_title(
f'Decision Tree for {col_name}, score: {model.score(X, y) :.0%}. N: {len(X) :.2e}')
sns.barplot(data=rf_df,
x=rf_df.features,
y=rf_df.importance,
ax=ax_rf,
label='small')
plt.setp(ax_rf.xaxis.get_majorticklabels(), rotation=45)
ax_rf.set_title('Feature importance')
plt.show()
return fig, axes
def create_impact_dist_plots_by_kpi(df_to_use: pd.DataFrame,
kpi_cols: list[str],
plot_height: float = 3.5,
plot_width: float = 3.5):
# Define the custom color palette
custom_palette = ["#000000", "#FF0000"]
sns.set_palette(custom_palette)
fig_width = plot_width * len(kpi_cols)
fig_height = plot_height
# Create a plot object with subplots.
fig, axs = plt.subplots(len(phase_cols), len(kpi_cols),
figsize=(fig_width, fig_height),
sharex='row', sharey='row',
gridspec_kw={'hspace': 0.65, 'wspace': 0.65})
fig.subplots_adjust(top=0.89)
fig.suptitle("Phase Impact Plot", y = 1.0)
for row_num, param in enumerate(phase_cols):
for col_num, kpi in enumerate(kpi_cols):
sns.kdeplot(
data=df_to_use,
x=kpi,
hue=param,
ax=axs[row_num, col_num],
palette=custom_palette,
# common_norm = True
)
axs[row_num, col_num].set_title(f"Impact of \n {param} \n on {kpi}",
fontsize=10)
plt.show()
return fig, axs
def create_utility_outcomes_per_parameters_heatmap(utility_df: pd.DataFrame):
kpi_by_subset = utility_df.groupby('subset').mean()
row_sums = kpi_by_subset.sum(axis=1)
sorted_df = kpi_by_subset.loc[row_sums.sort_values(ascending=False).index]
chart = sorted_df.reset_index(drop=True).hvplot.heatmap(rot=35, height=800, width=800, fontscale=1, cmap='YlGn', title='Sorted Utility outcomes by Parameter Subset', ylabel='Parameter Subset', xlabel='KPI Utility', rasterize=True)
return chart