From f021a76060ce94be85817e3580e7db7e396891c7 Mon Sep 17 00:00:00 2001 From: MStarmans91 Date: Fri, 1 Apr 2022 17:15:04 +0200 Subject: [PATCH] Bugfix for when none of the workflows to be included in the workflow converges during retraining, add the next best performing workflows. --- CHANGELOG | 21 ++++++++++----------- WORC/classification/SearchCV.py | 28 +++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c23b93ae..f400a65b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,16 +6,8 @@ All notable changes to this project will be documented in this file. The format is based on `Keep a Changelog `_ and this project adheres to `Semantic Versioning `_ -3.6.0 - Unreleased ------------- - -Added -~~~~~~~ -- LightGBM classifier -- Fingerprinting - -3.6.0 - Unreleased ------------- +3.6.0 - 2022-03-31 +------------------ Added ~~~~~~~ @@ -24,9 +16,16 @@ Added Instructions to install this component are provided in the documentation. - Besides Top_N ensembling, various other methods are added: 'ForwardSelection', 'Caruana', and 'Bagging' +- LightGBM classifier +- Light fingerprinting approach to adjust config based on dataset. + +Fixed +~~~~~ +- When none of the workflows to be included in the workflow converges during + retraining, add the next best performing workflows. 3.5.0 - 2021-08-18 ------------- +------------------ Fixed ~~~~~ diff --git a/WORC/classification/SearchCV.py b/WORC/classification/SearchCV.py index 443ef28e..c623dfee 100644 --- a/WORC/classification/SearchCV.py +++ b/WORC/classification/SearchCV.py @@ -1436,7 +1436,33 @@ def compute_performance(scoring, Y_valid_truth, Y_valid_score): estimators.append(base_estimator) except (NotFittedError, ValueError): print(f'\t\t - Estimator {enum} could not be fitted (correctly), do not include in ensemble.') - pass + if enum + 1 == nest and not estimators: + print(f'\t\t - Reached end of ensemble ({enum + 1}), but ensemble is empty, thus go on untill we find an estimator that works') + while not estimators: + # We cannot have an empy ensemble, thus go on untill we find an estimator that works + enum += 1 + p_all = self.cv_results_['params'][enum] + + # Refit a SearchCV object with the provided parameters + base_estimator = clone(base_estimator) + + # Check if we need to create a multiclass estimator + base_estimator.refit_and_score(X_train, Y_train, p_all, + train, train, + verbose=False) + + # Determine whether to overfit the feature scaling on the test set + base_estimator.overfit_scaler = overfit_scaler + + try: + # Try a prediction to see if estimator is truly fitted + base_estimator.predict(np.asarray([X_train[0][0], X_train[1][0]])) + estimators.append(base_estimator) + except (NotFittedError, ValueError): + pass + print(f'\t\t - Needed estimator {enum}.') + else: + pass self.ensemble = Ensemble(estimators) self.best_estimator_ = self.ensemble