From e959f3c53b03008c4459b16336d5f8ff2b0c16c4 Mon Sep 17 00:00:00 2001 From: Piotrek Date: Tue, 9 Apr 2019 15:35:58 +0200 Subject: [PATCH] fixing tests --- README.md | 15 +++++++-------- supervised/automl.py | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2758cd04..8a4bc26b 100644 --- a/README.md +++ b/README.md @@ -10,17 +10,17 @@ ## The new standard in Machine Learning! Thanks to Automated Machine Learning you don't need to worry about different machine learning interfaces. You don't need to know all algorithms and their hyper-parameters. With AutoML model tuning and training is painless. - -In the current version only binary classification is supported with optimization of LogLoss metric. -## Example +In the current version only binary classification is supported with optimization of LogLoss metric. -``` +## Example + +```python import pandas as pd from supervised.automl import AutoML df = pd.read_csv("https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv", skipinitialspace=True) -print(df.head()) + X = df[df.columns[:-1]] y = df["income"] @@ -58,7 +58,7 @@ This is Automated Machine Learning package, so all hard tasks is done for you. T #### Train and predict -``` +```python automl = AutoML() automl.fit(X, y) predictions = automl.predict(X) @@ -76,7 +76,7 @@ By the default, the training should finish in less than 1 hour and as ML algorit The parameters that you can use to control the training process are: - **total_time_limit** - it is a total time limit that AutoML can spend for searching to the best ML model. It is in seconds. _Default is set to 3600 seconds._ -- **learner_time_limit** - the time limit for training single model, in case of `k`-fold cross validation, the time spend on training is `k*learner_time_limit`. This parameter is only considered when `total_time_limit` is set to None. _Default is set to 120 seconds_. +- **learner_time_limit** - the time limit for training single model, in case of `k`-fold cross validation, the time spend on training is `k*learner_time_limit`. This parameter is only considered when `total_time_limit` is set to None. _Default is set to 120 seconds_. - **algorithms** - the list of algorithms that will be checked. _Default is set to ["CatBoost", "Xgboost", "RF", "LightGBM", "NN"]_. - **start_random_models** - the number of models to check with _not so random_ algorithm. _Default is set to 10_. - **hill_climbing_steps** - number of hill climbing steps used in models tuning. _Default is set to 3_. @@ -84,7 +84,6 @@ The parameters that you can use to control the training process are: - **train_ensemble** - decides if ensemble model is trained at the end of AutoML fit procedure. _Default is set to True_. - **verbose** - controls printouts, _Default is set to True_. - ## Development ### Installation diff --git a/supervised/automl.py b/supervised/automl.py index 266bea54..58b5ca3e 100644 --- a/supervised/automl.py +++ b/supervised/automl.py @@ -191,6 +191,7 @@ def ensemble_step(self, y): def fit(self, X, y): start_time = time.time() X.reset_index(drop=True, inplace=True) + y = np.array(y) if not isinstance(y, pd.DataFrame): y = pd.DataFrame(y) y.reset_index(drop=True, inplace=True)