From 93881061b1c55f51917a2ea17ff04ecad697c571 Mon Sep 17 00:00:00 2001 From: gvling Date: Tue, 29 Oct 2019 22:10:45 +0900 Subject: [PATCH 1/5] Add fixed feature selection (#2) * fixed * rm build folder --- lime/lime_base.py | 15 +++++++++++++-- lime/lime_tabular.py | 8 ++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lime/lime_base.py b/lime/lime_base.py index 63e62a0c..c4a15731 100644 --- a/lime/lime_base.py +++ b/lime/lime_base.py @@ -68,11 +68,16 @@ def forward_selection(self, data, labels, weights, num_features): used_features.append(best) return np.array(used_features) - def feature_selection(self, data, labels, weights, num_features, method): + def feature_selection(self, data, labels, weights, num_features, method, feature_names=None, use_feature_names=None): """Selects features for the model. see explain_instance_with_data to understand the parameters.""" if method == 'none': return np.array(range(data.shape[1])) + if method == 'fixed': + used_features = [] + for f in use_feature_names: + used_features.append(feature_names.index(f)) + return used_features elif method == 'forward_selection': return self.forward_selection(data, labels, weights, num_features) elif method == 'highest_weights': @@ -142,6 +147,8 @@ def explain_instance_with_data(self, label, num_features, feature_selection='auto', + feature_names=None, + use_feature_names=None, model_regressor=None): """Takes perturbed data, labels and distances, returns explanation. @@ -164,10 +171,12 @@ def explain_instance_with_data(self, 'none': uses all features, ignores num_features 'auto': uses forward_selection if num_features <= 6, and 'highest_weights' otherwise. + 'fixed': uses fixed features which passed by 'use_features'. model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression if None. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() + exclude_features: exclude features when select features. Returns: (intercept, exp, score, local_pred): @@ -185,7 +194,9 @@ def explain_instance_with_data(self, labels_column, weights, num_features, - feature_selection) + feature_selection, + feature_names, + use_feature_names) if model_regressor is None: model_regressor = Ridge(alpha=1, fit_intercept=True, random_state=self.random_state) diff --git a/lime/lime_tabular.py b/lime/lime_tabular.py index b1f6f94b..75be9b68 100644 --- a/lime/lime_tabular.py +++ b/lime/lime_tabular.py @@ -297,7 +297,8 @@ def explain_instance(self, num_features=10, num_samples=5000, distance_metric='euclidean', - model_regressor=None): + model_regressor=None, + use_feature_names=None): """Generates explanations for a prediction. First, we generate neighborhood data by randomly perturbing features @@ -451,7 +452,10 @@ def explain_instance(self, label, num_features, model_regressor=model_regressor, - feature_selection=self.feature_selection) + feature_selection=self.feature_selection, + feature_names=self.feature_names, + use_feature_names=use_feature_names + ) if self.mode == "regression": ret_exp.intercept[1] = ret_exp.intercept[0] From 269b06f4d2adf48e32165108c1a298c0641705e3 Mon Sep 17 00:00:00 2001 From: gvling Date: Wed, 30 Oct 2019 10:35:27 +0900 Subject: [PATCH 2/5] fixed (#4) --- lime/lime_base.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lime/lime_base.py b/lime/lime_base.py index c4a15731..0321e3dc 100644 --- a/lime/lime_base.py +++ b/lime/lime_base.py @@ -71,13 +71,14 @@ def forward_selection(self, data, labels, weights, num_features): def feature_selection(self, data, labels, weights, num_features, method, feature_names=None, use_feature_names=None): """Selects features for the model. see explain_instance_with_data to understand the parameters.""" + if use_feature_names is not None: + use_feature_index = [] + for f in use_feature_names: + use_feature_index.append(feature_names.index(f)) + data = data[:, use_feature_index] + if method == 'none': return np.array(range(data.shape[1])) - if method == 'fixed': - used_features = [] - for f in use_feature_names: - used_features.append(feature_names.index(f)) - return used_features elif method == 'forward_selection': return self.forward_selection(data, labels, weights, num_features) elif method == 'highest_weights': @@ -171,12 +172,11 @@ def explain_instance_with_data(self, 'none': uses all features, ignores num_features 'auto': uses forward_selection if num_features <= 6, and 'highest_weights' otherwise. - 'fixed': uses fixed features which passed by 'use_features'. model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression if None. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() - exclude_features: exclude features when select features. + use_feature_names: use features when select features. Returns: (intercept, exp, score, local_pred): From e0b4213ca5cd88ef83c38fcd75cc056a96d0e128 Mon Sep 17 00:00:00 2001 From: gvling Date: Wed, 30 Oct 2019 14:50:15 +0900 Subject: [PATCH 3/5] fixed (#5) --- lime/lime_base.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lime/lime_base.py b/lime/lime_base.py index 0321e3dc..cdc39268 100644 --- a/lime/lime_base.py +++ b/lime/lime_base.py @@ -68,19 +68,21 @@ def forward_selection(self, data, labels, weights, num_features): used_features.append(best) return np.array(used_features) - def feature_selection(self, data, labels, weights, num_features, method, feature_names=None, use_feature_names=None): + def feature_selection(self, datas, labels, weights, num_features, method, feature_names=None, use_feature_names=None): """Selects features for the model. see explain_instance_with_data to understand the parameters.""" + feature_index = np.array(range(datas.shape[1])) if use_feature_names is not None: use_feature_index = [] for f in use_feature_names: use_feature_index.append(feature_names.index(f)) - data = data[:, use_feature_index] + data = datas[:, use_feature_index] + feature_index = feature_index[use_feature_index] if method == 'none': - return np.array(range(data.shape[1])) + return feature_index[list(range(data.shape[1]))] elif method == 'forward_selection': - return self.forward_selection(data, labels, weights, num_features) + return feature_index[self.forward_selection(data, labels, weights, num_features)] elif method == 'highest_weights': clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state) @@ -111,14 +113,14 @@ def feature_selection(self, data, labels, weights, num_features, method, feature else: nnz_indexes = argsort_data[sdata - num_features:sdata][::-1] indices = weighted_data.indices[nnz_indexes] - return indices + return feature_index[list(indices)] else: weighted_data = coef * data[0] feature_weights = sorted( zip(range(data.shape[1]), weighted_data), key=lambda x: np.abs(x[1]), reverse=True) - return np.array([x[0] for x in feature_weights[:num_features]]) + return feature_index[list([x[0] for x in feature_weights[:num_features]])] elif method == 'lasso_path': weighted_data = ((data - np.average(data, axis=0, weights=weights)) * np.sqrt(weights[:, np.newaxis])) @@ -132,14 +134,15 @@ def feature_selection(self, data, labels, weights, num_features, method, feature if len(nonzero) <= num_features: break used_features = nonzero - return used_features + return feature_index[list(used_features)] elif method == 'auto': if num_features <= 6: n_method = 'forward_selection' else: n_method = 'highest_weights' - return self.feature_selection(data, labels, weights, - num_features, n_method) + return self.feature_selection(datas, labels, weights, + num_features, n_method, + feature_names, use_feature_names) def explain_instance_with_data(self, neighborhood_data, From 7ab500c63905b8d32b298344945abe1b67bc81bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=20=E9=8A=98?= Date: Wed, 30 Oct 2019 20:53:21 +0900 Subject: [PATCH 4/5] fix bug --- lime/lime_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lime/lime_base.py b/lime/lime_base.py index cdc39268..d1556ebb 100644 --- a/lime/lime_base.py +++ b/lime/lime_base.py @@ -78,6 +78,8 @@ def feature_selection(self, datas, labels, weights, num_features, method, featur use_feature_index.append(feature_names.index(f)) data = datas[:, use_feature_index] feature_index = feature_index[use_feature_index] + else: + data = datas if method == 'none': return feature_index[list(range(data.shape[1]))] From ea920f7aa8bd549264a02c3a64495365c48ae19d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=20=E9=8A=98?= Date: Wed, 30 Oct 2019 20:57:47 +0900 Subject: [PATCH 5/5] fix: line too long --- lime/lime_base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lime/lime_base.py b/lime/lime_base.py index d1556ebb..6b373c45 100644 --- a/lime/lime_base.py +++ b/lime/lime_base.py @@ -68,7 +68,14 @@ def forward_selection(self, data, labels, weights, num_features): used_features.append(best) return np.array(used_features) - def feature_selection(self, datas, labels, weights, num_features, method, feature_names=None, use_feature_names=None): + def feature_selection(self, + datas, + labels, + weights, + num_features, + method, + feature_names=None, + use_feature_names=None): """Selects features for the model. see explain_instance_with_data to understand the parameters.""" feature_index = np.array(range(datas.shape[1]))