-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.py
186 lines (147 loc) · 6.8 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import pickle
from joblib import load
import pandas as pd
from data_preprocessing.eda import update_column_names
from main import *
import streamlit as st
final = pd.DataFrame({"nhs_number": [], "latest_hba1c_value": [], "predicted_hba1c": [], "subtraction_result":[]})
def update_statin_strength(df):
# Define the mapping dictionary
statin_map = {
'Pravastatin 10mg tablets': 1,
'Pravastatin 20mg tablets': 2,
'Pravastatin 40mg tablets': 3,
'Simvastatin 20mg tablets': 4,
'Simvastatin 40mg tablets': 5,
'Simvastatin 80mg tablets': 6,
'Atorvastatin 10mg tablets': 7,
'Atorvastatin 20mg tablets': 8,
'Atorvastatin 40mg tablets': 9,
'Atorvastatin 80mg tablets': 10,
'Rosuvastatin 5mg tablets': 11,
'Rosuvastatin 10mg tablets': 12,
'Rosuvastatin 20mg tablets': 13,
'Rosuvastatin 40mg tablets': 14
}
# Update the 'statin' column using the map
df['statin_strenght'] = df['statin'].map(statin_map)
return df
def update_bame_column(df):
# Define the mapping dictionary
bame_map = {
'No': 0,
'Yes': 1,
'NK': 0
}
# Update the 'bame' column using the map
df['bame'] = df['bame'].map(bame_map)
return df
def diabetes_diagnosis_map(df):
dm_map = {
"Type 1": 1,
"Type 2": 2,
"Both Types - Latest Type 1": 1,
"Both Types - Latest Type 2": 2,
"No Type Recorded": 0,
"Both Types - Check": 2
}
df['diabetes_diagnosis'] = df['diabetes_diagnosis'].map(dm_map)
return df
def predict(df, nhs_df):
data = update_column_names(df)
# data = load_df(data, col_list, columns_to_drop)
print("🦖 Prep Dataframe")
data = update_bame_column(data)
print("😀 Bame Mapped")
data = data.drop(columns=["dob", "ethnicity", "first_dm_diagnosis", "9_kcp_complete", "3_levels_to_target", "mh_screen_-_dds_or_phq", "patient_goals", "care_plan"])
print("💧 Drop Columns")
data = add_length_columns(data, cols_toget_length, calculate_length_of_diagnosis)
print("🧮 Length of diagnosis columns")
data = data.drop(columns=['annual_review_done',
'hba1c', 'bp', 'cholesterol', 'bmi', 'egfr', 'urine_acr', 'smoking',
'foot_risk', 'retinal_screening', 'education',
'care_planning_consultation', 'struc_educ_in_l5y',
'struc_educ_in_12m_diag', 'statin_date'])
print("💧 Drop Columns")
data = update_statin_strength(data)
print("🗺️ Mapped Statiin Strength")
data = diabetes_diagnosis_map(data)
print("🗺️ Mapped Diabetes Diagnosis")
data['latest_qrisk2'] = data['latest_qrisk2'].str.replace("%", "").astype(float)
data = impute_values(data, missing_values=0, copy=False, strategy='mean', columns=impute_cols)
data = data.fillna(0) # STRATEGY FILL ALL NAA WITH ZERO
data.drop(columns=["statin"], inplace=True)
nhs_number = data[["nhs_number", "hba1c_value"]]
data.drop(columns=["nhs_number", "column9"], inplace=True)
new_col_list = ['imd_decile', 'bame', 'diabetes_diagnosis', 'column1',
'sbp', 'dbp', 'total_chol', 'non-hdl_chol', 'latest_hdl', 'latest_ldl',
'latest_egfr', 'latest_bmi', 'latest_qrisk2', 'column2', 'column3',
'column4', 'column5', 'column6', 'column7', 'column8', 'column9',
'metformin', 'sulphonylurea', 'dpp4', 'sglt2',
'pioglitazone', 'glp-1', 'basal_/_mix_insulin', 'rapid_acting_insulin',
'acei/arb', 'calcium_channel_blocker', 'diuretic', 'beta_blocker',
'spironolactone', 'doxazosin', 'age', 'lenght_of_diagnosis_months',
'lenght_of_diagnosis_years', 'annual_review_done_length',
'hba1c_length', 'cholesterol_length', 'bmi_length', 'egfr_length',
'urine_acr_length', 'smoking_length', 'foot_risk_length',
'retinal_screening_length', 'education_length', 'statin_date_length',
'statin_strenght']
arranged_col_list = ['imd_decile', 'bame', 'diabetes_diagnosis', 'sbp', 'dbp', 'total_chol',
'non-hdl_chol', 'latest_hdl', 'latest_ldl', 'latest_egfr', 'latest_bmi',
'latest_qrisk2', 'column1', 'column2', 'column3', 'column4', 'column5',
'column6', 'column7', 'column8', 'column9', 'metformin',
'sulphonylurea', 'dpp4', 'sglt2', 'pioglitazone', 'glp-1',
'basal_/_mix_insulin', 'rapid_acting_insulin', 'acei/arb',
'calcium_channel_blocker', 'diuretic', 'beta_blocker', 'spironolactone',
'doxazosin', 'age', 'lenght_of_diagnosis_months',
'lenght_of_diagnosis_years', 'annual_review_done_length',
'hba1c_length', 'cholesterol_length', 'bmi_length', 'egfr_length',
'urine_acr_length', 'smoking_length', 'foot_risk_length',
'retinal_screening_length', 'education_length', 'statin_date_length',
'statin_strenght']
data.columns = new_col_list
data = data[arranged_col_list]
drop_these = ['diabetes_diagnosis',
'sulphonylurea', 'dpp4', 'sglt2', 'pioglitazone', 'glp-1',
'basal_/_mix_insulin', 'rapid_acting_insulin', 'acei/arb',
'calcium_channel_blocker', 'diuretic', 'beta_blocker', 'spironolactone',
'doxazosin', 'lenght_of_diagnosis_months',
'annual_review_done_length',
'hba1c_length', 'cholesterol_length', 'bmi_length', 'egfr_length',
'urine_acr_length', 'smoking_length', 'foot_risk_length',
'retinal_screening_length', 'education_length', ]
data.drop(columns=drop_these, inplace=True)
with open('scaler_StandardScaler_2024-11-13_17-59-35.pkl', 'rb') as f:
scaler = pickle.load(f)
scaled_new_data = scaler.transform(data)
# Load the model from the file
gbr_loaded = load('gradient_boosting_model_13nov24.joblib')
# Now you can use it to make predictions
predictions = gbr_loaded.predict(scaled_new_data)
nhs_list = nhs['nhs_number'].to_list()
nhb_list = nhs['hba1c_value'].to_list()
data = {
"nhs_number": nhs_list,
"latest_hba1c_value": hb_list,
"predicted_hba1c": predictions,
}
final = pd.DataFrame(data)
final['subtraction_result'] = final['latest_hba1c_value'] - final['predicted_hba1c']
st.dataframe(final)
return final
def highlight_subtraction_result(df):
# Define the style function with multiple conditions
def highlight(cell):
# Apply different colors based on the cell's value
if cell < -10:
color = '#fb923c' # Highlight in red if the value is greater than 10
elif cell < -5:
color = '#fcd34d' # Highlight in yellow if the value is greater than 5
else:
color = ''
return f'background-color: {color}'
# Apply the style function to the 'subtraction_result' column
styled_df = df.style.applymap(highlight, subset=['subtraction_result'])
return styled_df
styled = highlight_subtraction_result(final)
st.dataframe(styled)