diff --git a/setup.py b/setup.py index 8a9a7bec..cbbb064a 100644 --- a/setup.py +++ b/setup.py @@ -10,8 +10,8 @@ setup( name='mljar-supervised', - version='0.5.5', - description='Automated Machine Learning for Supervised tasks', + version='0.6.0', + description='Automates Machine Learning Pipeline with Feature Engineering and Hyper-Parameters Tuning', long_description=long_description, long_description_content_type="text/markdown", url='https://github.com/mljar/mljar-supervised', diff --git a/supervised/__init__.py b/supervised/__init__.py index 7eb26d10..97fb8ad1 100644 --- a/supervised/__init__.py +++ b/supervised/__init__.py @@ -1,3 +1,3 @@ -__version__ = "0.5.5" +__version__ = "0.6.0" from supervised.automl import AutoML diff --git a/supervised/preprocessing/preprocessing.py b/supervised/preprocessing/preprocessing.py index 720b511c..94522c00 100644 --- a/supervised/preprocessing/preprocessing.py +++ b/supervised/preprocessing/preprocessing.py @@ -131,6 +131,11 @@ def fit_and_transform(self, X_train, y_train): X_train.drop(cols_to_remove, axis=1, inplace=True) self._remove_columns = cols_to_remove + numeric_cols = [] # get numeric cols before text transformations + # needed for golden features + if X_train is not None and "golden_features" in self._params: + numeric_cols = X_train.select_dtypes(include="number").columns.tolist() + # there can be missing values in the text data, # but we don't want to handle it by fill missing methods # zeros will be imputed by text_transform method @@ -164,9 +169,7 @@ def fit_and_transform(self, X_train, y_train): # golden features golden_columns = [] - if "golden_features" in self._params: - numeric_cols = X_train.select_dtypes(include="number").columns.tolist() results_path = self._params["golden_features"]["results_path"] ml_task = self._params["golden_features"]["ml_task"] # if ml_task in [BINARY_CLASSIFICATION]: