Bugfix for when none of the workflows to be included in the workflow …

…converges during retraining, add the next best performing workflows.
MStarmans91 · Apr 1, 2022 · f021a76 · f021a76
1 parent c474a3e
commit f021a76
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 12 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -6,16 +6,8 @@ All notable changes to this project will be documented in this file.
 The format is based on `Keep a Changelog <http://keepachangelog.com/>`_
 and this project adheres to `Semantic Versioning <http://semver.org/>`_
 
-3.6.0 - Unreleased
-------------
-
-Added
-~~~~~~~
-- LightGBM classifier
-- Fingerprinting
-
-3.6.0 - Unreleased
-------------
+3.6.0 - 2022-03-31
+------------------
 
 Added
 ~~~~~~~
@@ -24,9 +16,16 @@ Added
   Instructions to install this component are provided in the documentation.
 - Besides Top_N ensembling, various other methods are added:
   'ForwardSelection', 'Caruana', and 'Bagging'
+- LightGBM classifier
+- Light fingerprinting approach to adjust config based on dataset.
+
+Fixed
+~~~~~
+- When none of the workflows to be included in the workflow converges during
+  retraining, add the next best performing workflows.
 
 3.5.0 - 2021-08-18
-------------
+------------------
 
 Fixed
 ~~~~~

diff --git a/WORC/classification/SearchCV.py b/WORC/classification/SearchCV.py
@@ -1436,7 +1436,33 @@ def compute_performance(scoring, Y_valid_truth, Y_valid_score):
                     estimators.append(base_estimator)
                 except (NotFittedError, ValueError):
                     print(f'\t\t - Estimator {enum} could not be fitted (correctly), do not include in ensemble.')
-                    pass
+                    if enum + 1 == nest and not estimators:
+                        print(f'\t\t - Reached end of ensemble ({enum + 1}), but ensemble is empty, thus go on untill we find an estimator that works')
+                        while not estimators:
+                            # We cannot have an empy ensemble, thus go on untill we find an estimator that works
+                            enum += 1
+                            p_all = self.cv_results_['params'][enum]
+
+                            # Refit a SearchCV object with the provided parameters
+                            base_estimator = clone(base_estimator)
+
+                            # Check if we need to create a multiclass estimator
+                            base_estimator.refit_and_score(X_train, Y_train, p_all,
+                                                           train, train,
+                                                           verbose=False)
+
+                            # Determine whether to overfit the feature scaling on the test set
+                            base_estimator.overfit_scaler = overfit_scaler
+
+                            try:
+                                # Try a prediction to see if estimator is truly fitted
+                                base_estimator.predict(np.asarray([X_train[0][0], X_train[1][0]]))
+                                estimators.append(base_estimator)
+                            except (NotFittedError, ValueError):
+                                pass
+                        print(f'\t\t - Needed estimator {enum}.')
+                    else:
+                        pass
 
         self.ensemble = Ensemble(estimators)
         self.best_estimator_ = self.ensemble