From 00d6dfe725e6e2bb6aff237179afc01cef663b70 Mon Sep 17 00:00:00 2001 From: Piotrek Date: Wed, 9 Sep 2020 11:18:00 +0200 Subject: [PATCH] :pencil2: fix typos --- examples/scripts/binary_classifier_Titanic.py | 36 +++---------------- supervised/base_automl.py | 6 ++-- 2 files changed, 7 insertions(+), 35 deletions(-) diff --git a/examples/scripts/binary_classifier_Titanic.py b/examples/scripts/binary_classifier_Titanic.py index d408dd10..0e659774 100644 --- a/examples/scripts/binary_classifier_Titanic.py +++ b/examples/scripts/binary_classifier_Titanic.py @@ -5,44 +5,16 @@ from sklearn.metrics import accuracy_score -""" -obj_array = np.array([1, 2, "A"], dtype=object) -y = pd.DataFrame(obj_array) -X = y.copy() - -print(X) -print(np.unique(y[~pd.isnull(y)])) -for col in X.columns: - print(col, X[col].dtype) - -a = AutoML(total_time=30) - -a.fit(X, y) -""" - df = pd.read_csv("tests/data/Titanic/train.csv") X = df[df.columns[2:]] y = df["Survived"] -automl = AutoML(mode="Perform", explain_level=2, total_time_limit=1*6, feature_selection=True) +automl = AutoML(mode="Explain") automl.fit(X, y) pred = automl.predict(X) -print("Train accuracy", accuracy_score(y, pred)) #["label"])) - +print("Train accuracy", accuracy_score(y, pred)) test = pd.read_csv("tests/data/Titanic/test_with_Survived.csv") -test_cols = [ - "Parch", - "Ticket", - "Fare", - "Pclass", - "Name", - "Sex", - "Age", - "SibSp", - "Cabin", - "Embarked", -] -pred = automl.predict_all(test[test_cols]) -print("Test accuracy", accuracy_score(test["Survived"], pred["label"])) +pred = automl.predict(test) +print("Test accuracy", accuracy_score(test["Survived"], pred)) diff --git a/supervised/base_automl.py b/supervised/base_automl.py index 4a256ab4..cfed5a8b 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -468,7 +468,7 @@ def _validate_X_predict(self, X): n_features = X.shape[1] if self.n_features_in_ != n_features: raise ValueError( - f"Number of features of the model must match the input. Model n_features is {self.n_features_in_}%s and input n_features is {n_features} %s. Reshape your data." + f"Number of features of the model must match the input. Model n_features_in_ is {self.n_features_in_} and input n_features is {n_features}. Reshape your data." ) # This method builds pandas.Dataframe from input. The input can be numpy.ndarray, matrix, or pandas.Dataframe @@ -712,8 +712,7 @@ def _check_is_fitted(self): def _base_predict(self, X): self._check_is_fitted() - self._validate_X_predict(X) - + X = self._build_dataframe(X) if not isinstance(X.columns[0], str): X.columns = [str(c) for c in X.columns] @@ -726,6 +725,7 @@ def _base_predict(self, X): ) X = X[self._data_info["columns"]] + self._validate_X_predict(X) # is stacked model if self._best_model._is_stacked: