From 93881061b1c55f51917a2ea17ff04ecad697c571 Mon Sep 17 00:00:00 2001
From: gvling <gvling@163.com>
Date: Tue, 29 Oct 2019 22:10:45 +0900
Subject: [PATCH 1/5] Add fixed feature selection (#2)

* fixed

* rm build folder
---
 lime/lime_base.py    | 15 +++++++++++++--
 lime/lime_tabular.py |  8 ++++++--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/lime/lime_base.py b/lime/lime_base.py
index 63e62a0c..c4a15731 100644
--- a/lime/lime_base.py
+++ b/lime/lime_base.py
@@ -68,11 +68,16 @@ def forward_selection(self, data, labels, weights, num_features):
             used_features.append(best)
         return np.array(used_features)
 
-    def feature_selection(self, data, labels, weights, num_features, method):
+    def feature_selection(self, data, labels, weights, num_features, method, feature_names=None, use_feature_names=None):
         """Selects features for the model. see explain_instance_with_data to
            understand the parameters."""
         if method == 'none':
             return np.array(range(data.shape[1]))
+        if method == 'fixed':
+            used_features = []
+            for f in use_feature_names:
+                used_features.append(feature_names.index(f))
+            return used_features
         elif method == 'forward_selection':
             return self.forward_selection(data, labels, weights, num_features)
         elif method == 'highest_weights':
@@ -142,6 +147,8 @@ def explain_instance_with_data(self,
                                    label,
                                    num_features,
                                    feature_selection='auto',
+                                   feature_names=None,
+                                   use_feature_names=None,
                                    model_regressor=None):
         """Takes perturbed data, labels and distances, returns explanation.
 
@@ -164,10 +171,12 @@ def explain_instance_with_data(self,
                 'none': uses all features, ignores num_features
                 'auto': uses forward_selection if num_features <= 6, and
                     'highest_weights' otherwise.
+                'fixed': uses fixed features which passed by 'use_features'.
             model_regressor: sklearn regressor to use in explanation.
                 Defaults to Ridge regression if None. Must have
                 model_regressor.coef_ and 'sample_weight' as a parameter
                 to model_regressor.fit()
+            exclude_features: exclude features when select features.
 
         Returns:
             (intercept, exp, score, local_pred):
@@ -185,7 +194,9 @@ def explain_instance_with_data(self,
                                                labels_column,
                                                weights,
                                                num_features,
-                                               feature_selection)
+                                               feature_selection,
+                                               feature_names,
+                                               use_feature_names)
         if model_regressor is None:
             model_regressor = Ridge(alpha=1, fit_intercept=True,
                                     random_state=self.random_state)
diff --git a/lime/lime_tabular.py b/lime/lime_tabular.py
index b1f6f94b..75be9b68 100644
--- a/lime/lime_tabular.py
+++ b/lime/lime_tabular.py
@@ -297,7 +297,8 @@ def explain_instance(self,
                          num_features=10,
                          num_samples=5000,
                          distance_metric='euclidean',
-                         model_regressor=None):
+                         model_regressor=None,
+                         use_feature_names=None):
         """Generates explanations for a prediction.
 
         First, we generate neighborhood data by randomly perturbing features
@@ -451,7 +452,10 @@ def explain_instance(self,
                     label,
                     num_features,
                     model_regressor=model_regressor,
-                    feature_selection=self.feature_selection)
+                    feature_selection=self.feature_selection,
+                    feature_names=self.feature_names,
+                    use_feature_names=use_feature_names
+                    )
 
         if self.mode == "regression":
             ret_exp.intercept[1] = ret_exp.intercept[0]

From 269b06f4d2adf48e32165108c1a298c0641705e3 Mon Sep 17 00:00:00 2001
From: gvling <gvling@163.com>
Date: Wed, 30 Oct 2019 10:35:27 +0900
Subject: [PATCH 2/5] fixed (#4)

---
 lime/lime_base.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lime/lime_base.py b/lime/lime_base.py
index c4a15731..0321e3dc 100644
--- a/lime/lime_base.py
+++ b/lime/lime_base.py
@@ -71,13 +71,14 @@ def forward_selection(self, data, labels, weights, num_features):
     def feature_selection(self, data, labels, weights, num_features, method, feature_names=None, use_feature_names=None):
         """Selects features for the model. see explain_instance_with_data to
            understand the parameters."""
+        if use_feature_names is not None:
+            use_feature_index = []
+            for f in use_feature_names:
+                use_feature_index.append(feature_names.index(f))
+            data = data[:, use_feature_index]
+
         if method == 'none':
             return np.array(range(data.shape[1]))
-        if method == 'fixed':
-            used_features = []
-            for f in use_feature_names:
-                used_features.append(feature_names.index(f))
-            return used_features
         elif method == 'forward_selection':
             return self.forward_selection(data, labels, weights, num_features)
         elif method == 'highest_weights':
@@ -171,12 +172,11 @@ def explain_instance_with_data(self,
                 'none': uses all features, ignores num_features
                 'auto': uses forward_selection if num_features <= 6, and
                     'highest_weights' otherwise.
-                'fixed': uses fixed features which passed by 'use_features'.
             model_regressor: sklearn regressor to use in explanation.
                 Defaults to Ridge regression if None. Must have
                 model_regressor.coef_ and 'sample_weight' as a parameter
                 to model_regressor.fit()
-            exclude_features: exclude features when select features.
+            use_feature_names: use features when select features.
 
         Returns:
             (intercept, exp, score, local_pred):

From e0b4213ca5cd88ef83c38fcd75cc056a96d0e128 Mon Sep 17 00:00:00 2001
From: gvling <gvling@163.com>
Date: Wed, 30 Oct 2019 14:50:15 +0900
Subject: [PATCH 3/5] fixed (#5)

---
 lime/lime_base.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/lime/lime_base.py b/lime/lime_base.py
index 0321e3dc..cdc39268 100644
--- a/lime/lime_base.py
+++ b/lime/lime_base.py
@@ -68,19 +68,21 @@ def forward_selection(self, data, labels, weights, num_features):
             used_features.append(best)
         return np.array(used_features)
 
-    def feature_selection(self, data, labels, weights, num_features, method, feature_names=None, use_feature_names=None):
+    def feature_selection(self, datas, labels, weights, num_features, method, feature_names=None, use_feature_names=None):
         """Selects features for the model. see explain_instance_with_data to
            understand the parameters."""
+        feature_index = np.array(range(datas.shape[1]))
         if use_feature_names is not None:
             use_feature_index = []
             for f in use_feature_names:
                 use_feature_index.append(feature_names.index(f))
-            data = data[:, use_feature_index]
+            data = datas[:, use_feature_index]
+            feature_index = feature_index[use_feature_index]
 
         if method == 'none':
-            return np.array(range(data.shape[1]))
+            return feature_index[list(range(data.shape[1]))]
         elif method == 'forward_selection':
-            return self.forward_selection(data, labels, weights, num_features)
+            return feature_index[self.forward_selection(data, labels, weights, num_features)]
         elif method == 'highest_weights':
             clf = Ridge(alpha=0, fit_intercept=True,
                         random_state=self.random_state)
@@ -111,14 +113,14 @@ def feature_selection(self, data, labels, weights, num_features, method, feature
                 else:
                     nnz_indexes = argsort_data[sdata - num_features:sdata][::-1]
                     indices = weighted_data.indices[nnz_indexes]
-                return indices
+                return feature_index[list(indices)]
             else:
                 weighted_data = coef * data[0]
                 feature_weights = sorted(
                     zip(range(data.shape[1]), weighted_data),
                     key=lambda x: np.abs(x[1]),
                     reverse=True)
-                return np.array([x[0] for x in feature_weights[:num_features]])
+                return feature_index[list([x[0] for x in feature_weights[:num_features]])]
         elif method == 'lasso_path':
             weighted_data = ((data - np.average(data, axis=0, weights=weights))
                              * np.sqrt(weights[:, np.newaxis]))
@@ -132,14 +134,15 @@ def feature_selection(self, data, labels, weights, num_features, method, feature
                 if len(nonzero) <= num_features:
                     break
             used_features = nonzero
-            return used_features
+            return feature_index[list(used_features)]
         elif method == 'auto':
             if num_features <= 6:
                 n_method = 'forward_selection'
             else:
                 n_method = 'highest_weights'
-            return self.feature_selection(data, labels, weights,
-                                          num_features, n_method)
+            return self.feature_selection(datas, labels, weights,
+                                          num_features, n_method,
+                                          feature_names, use_feature_names)
 
     def explain_instance_with_data(self,
                                    neighborhood_data,

From 7ab500c63905b8d32b298344945abe1b67bc81bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=20=E9=8A=98?= <m.li.ag@future.co.jp>
Date: Wed, 30 Oct 2019 20:53:21 +0900
Subject: [PATCH 4/5] fix bug

---
 lime/lime_base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lime/lime_base.py b/lime/lime_base.py
index cdc39268..d1556ebb 100644
--- a/lime/lime_base.py
+++ b/lime/lime_base.py
@@ -78,6 +78,8 @@ def feature_selection(self, datas, labels, weights, num_features, method, featur
                 use_feature_index.append(feature_names.index(f))
             data = datas[:, use_feature_index]
             feature_index = feature_index[use_feature_index]
+        else:
+            data = datas
 
         if method == 'none':
             return feature_index[list(range(data.shape[1]))]

From ea920f7aa8bd549264a02c3a64495365c48ae19d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=20=E9=8A=98?= <m.li.ag@future.co.jp>
Date: Wed, 30 Oct 2019 20:57:47 +0900
Subject: [PATCH 5/5] fix: line too long

---
 lime/lime_base.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lime/lime_base.py b/lime/lime_base.py
index d1556ebb..6b373c45 100644
--- a/lime/lime_base.py
+++ b/lime/lime_base.py
@@ -68,7 +68,14 @@ def forward_selection(self, data, labels, weights, num_features):
             used_features.append(best)
         return np.array(used_features)
 
-    def feature_selection(self, datas, labels, weights, num_features, method, feature_names=None, use_feature_names=None):
+    def feature_selection(self,
+                          datas,
+                          labels,
+                          weights,
+                          num_features,
+                          method,
+                          feature_names=None,
+                          use_feature_names=None):
         """Selects features for the model. see explain_instance_with_data to
            understand the parameters."""
         feature_index = np.array(range(datas.shape[1]))