From 75228f97795f0edd40482a97fdcba453bdc4801d Mon Sep 17 00:00:00 2001 From: Guillaume VIGNAL Date: Mon, 1 Jul 2024 15:31:32 +0200 Subject: [PATCH 1/3] fix: webapp where a float column has infinite value in it --- shapash/webapp/smart_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shapash/webapp/smart_app.py b/shapash/webapp/smart_app.py index cfa524dd..036768ad 100644 --- a/shapash/webapp/smart_app.py +++ b/shapash/webapp/smart_app.py @@ -4,7 +4,7 @@ import copy import random import re -from math import log10 +from math import isfinite, log10 import dash import dash_bootstrap_components as dbc @@ -193,7 +193,7 @@ def init_data(self, rows=None): typ = self.dataframe[col].dtype if typ == float: std = self.dataframe[col].std() - if std != 0: + if isfinite(std) and std != 0: digit = max(round(log10(1 / std) + 1) + 2, 0) self.round_dataframe[col] = self.dataframe[col].map(f"{{:.{digit}f}}".format).astype(float) From 7e36db5cdaf9c200b8dad7c0667ad39996bcd08d Mon Sep 17 00:00:00 2001 From: Guillaume VIGNAL Date: Tue, 2 Jul 2024 11:29:25 +0200 Subject: [PATCH 2/3] improve quantile selection in prediction_regression_plot --- shapash/explainer/smart_plotter.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/shapash/explainer/smart_plotter.py b/shapash/explainer/smart_plotter.py index 3905bfe2..59fdf767 100644 --- a/shapash/explainer/smart_plotter.py +++ b/shapash/explainer/smart_plotter.py @@ -3717,9 +3717,20 @@ def _prediction_regression_plot( if len(y_target) > 500: lower_quantile = y_target.iloc[:, 0].quantile(0.005) upper_quantile = y_target.iloc[:, 0].quantile(0.995) - y_target = y_target.iloc[:, 0][ + y_target_tmp = y_target.iloc[:, 0][ (y_target.iloc[:, 0] > lower_quantile) & (y_target.iloc[:, 0] < upper_quantile) ] + if len(y_target_tmp) > 0.95 * len(y_target): + y_target = y_target_tmp + else: + y_target_tmp = y_target.iloc[:, 0][(y_target.iloc[:, 0] < upper_quantile)] + if len(y_target_tmp) > 0.95 * len(y_target): + y_target = y_target_tmp + else: + y_target_tmp = y_target.iloc[:, 0][(y_target.iloc[:, 0] > lower_quantile)] + if len(y_target_tmp) > 0.95 * len(y_target): + y_target = y_target_tmp + y_target_values = y_target.values.flatten() y_pred = self.explainer.y_pred.loc[y_target.index] From 786b5ff35b1c34dad5d35a0e92508bfee9a758d9 Mon Sep 17 00:00:00 2001 From: Guillaume VIGNAL Date: Thu, 4 Jul 2024 10:49:00 +0200 Subject: [PATCH 3/3] Delete nan values inplace of replacing them by the median --- shapash/explainer/smart_plotter.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/shapash/explainer/smart_plotter.py b/shapash/explainer/smart_plotter.py index 59fdf767..d933586c 100644 --- a/shapash/explainer/smart_plotter.py +++ b/shapash/explainer/smart_plotter.py @@ -374,9 +374,8 @@ def plot_scatter( val_inter = feature_values_max - feature_values_min from sklearn.neighbors import KernelDensity - feature_np = np.array(feature_values_array)[:, None] - median_value = np.nanmedian(feature_np) - feature_np = np.where(np.isnan(feature_np), median_value, feature_np) + feature_np = np.array(feature_values_array) + feature_np = feature_np[~np.isnan(feature_np)][:, None] kde = KernelDensity(bandwidth=val_inter / 100, kernel="epanechnikov").fit(feature_np) xs = np.linspace(feature_values_min, feature_values_max, 1000) log_dens = kde.score_samples(xs[:, None]) @@ -3747,9 +3746,8 @@ def _prediction_regression_plot( val_inter = feature_values_max - feature_values_min from sklearn.neighbors import KernelDensity - feature_np = np.array(feature_values_array)[:, None] - median_value = np.nanmedian(feature_np) - feature_np = np.where(np.isnan(feature_np), median_value, feature_np) + feature_np = np.array(feature_values_array) + feature_np = feature_np[~np.isnan(feature_np)][:, None] kde = KernelDensity(bandwidth=val_inter / 300, kernel="epanechnikov").fit(feature_np) xs = np.linspace(feature_values_min, feature_values_max, 1000) log_dens = kde.score_samples(xs[:, None])