From da293b066ae7324ab10d6fdfa21c70635cd833af Mon Sep 17 00:00:00 2001 From: Piotrek Date: Mon, 11 Jan 2021 00:29:32 +0100 Subject: [PATCH] fix tests (#284) --- supervised/tuner/mljar_tuner.py | 61 +++++++++++++++++-------- tests/tests_tuner/test_hill_climbing.py | 3 ++ 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/supervised/tuner/mljar_tuner.py b/supervised/tuner/mljar_tuner.py index 7542b1af..9c0c27ba 100644 --- a/supervised/tuner/mljar_tuner.py +++ b/supervised/tuner/mljar_tuner.py @@ -138,12 +138,14 @@ def generate_params( elif step == "loo_encoding": return self.get_loo_categorical_strategy(models, total_time_limit) elif step == "golden_features": - return self.get_golden_features_params(models, results_path, total_time_limit) + return self.get_golden_features_params( + models, results_path, total_time_limit + ) elif step == "insert_random_feature": return self.get_params_to_insert_random_feature(models, total_time_limit) elif step == "features_selection": return self.get_features_selection_params( - self.filter_random_feature_model(models), results_path + self.filter_random_feature_model(models), results_path, total_time_limit ) elif "hill_climbing" in step: return self.get_hill_climbing_params( @@ -484,7 +486,9 @@ def get_loo_categorical_strategy(self, current_models, total_time_limit): def get_categorical_strategy(self, current_models, strategy, total_time_limit): - df_models, algorithms = self.df_models_algorithms(current_models, time_limit=0.03*total_time_limit) + df_models, algorithms = self.df_models_algorithms( + current_models, time_limit=0.03 * total_time_limit + ) generated_params = [] for m_type in algorithms: # try to add categorical strategy only for below algorithms @@ -548,9 +552,15 @@ def df_models_algorithms(self, current_models, time_limit=None): model_types = [m.get_type() for m in current_models] names = [m.get_name() for m in current_models] train_times = [m.get_train_time() for m in current_models] - + df_models = pd.DataFrame( - {"model": current_models, "score": scores, "model_type": model_types, "name": names, "train_time": train_times} + { + "model": current_models, + "score": scores, + "model_type": model_types, + "name": names, + "train_time": train_times, + } ) if time_limit is not None: df_models = df_models[df_models.train_time < time_limit] @@ -560,15 +570,18 @@ def df_models_algorithms(self, current_models, time_limit=None): model_types = list(df_models.model_type) u, idx = np.unique(model_types, return_index=True) algorithms = u[np.argsort(idx)] - - #print(df_models) - #print(algorithms) - return df_models, algorithms + # print(df_models) + # print(algorithms) + return df_models, algorithms - def get_golden_features_params(self, current_models, results_path, total_time_limit): + def get_golden_features_params( + self, current_models, results_path, total_time_limit + ): - df_models, algorithms = self.df_models_algorithms(current_models, time_limit=0.03*total_time_limit) + df_models, algorithms = self.df_models_algorithms( + current_models, time_limit=0.03 * total_time_limit + ) generated_params = [] for m_type in algorithms: @@ -579,7 +592,7 @@ def get_golden_features_params(self, current_models, results_path, total_time_li for i in range(min(1, len(models))): m = models.iloc[i] - + params = copy.deepcopy(m.params) params["preprocessing"]["golden_features"] = { "results_path": results_path, @@ -598,9 +611,11 @@ def get_golden_features_params(self, current_models, results_path, total_time_li generated_params += [params] return generated_params - def time_features_selection(self, current_models): - - df_models, algorithms = self.df_models_algorithms(current_models) + def time_features_selection(self, current_models, total_time_limit): + + df_models, algorithms = self.df_models_algorithms( + current_models, time_limit=0.05 * total_time_limit + ) time_needed = 0 for m_type in algorithms: @@ -631,7 +646,7 @@ def time_features_selection(self, current_models): def get_params_to_insert_random_feature(self, current_models, total_time_limit): - time_needed = self.time_features_selection(current_models) + time_needed = self.time_features_selection(current_models, total_time_limit) if time_needed > 0.1 * total_time_limit: print("Not enough time to perform features selection. Skip") @@ -640,7 +655,9 @@ def get_params_to_insert_random_feature(self, current_models, total_time_limit): ) return None - df_models, algorithms = self.df_models_algorithms(current_models) + df_models, algorithms = self.df_models_algorithms( + current_models, time_limit=0.05 * total_time_limit + ) m = df_models.iloc[0]["model"] @@ -660,7 +677,9 @@ def get_params_to_insert_random_feature(self, current_models, total_time_limit): return [params] return None - def get_features_selection_params(self, current_models, results_path): + def get_features_selection_params( + self, current_models, results_path, total_time_limit + ): fname = os.path.join(results_path, "drop_features.json") if not os.path.exists(fname): @@ -674,8 +693,10 @@ def get_features_selection_params(self, current_models, results_path): # skip this step if len(drop_features) <= 1: return None - - df_models, algorithms = self.df_models_algorithms(current_models) + + df_models, algorithms = self.df_models_algorithms( + current_models, time_limit=0.05 * total_time_limit + ) generated_params = [] for m_type in algorithms: diff --git a/tests/tests_tuner/test_hill_climbing.py b/tests/tests_tuner/test_hill_climbing.py index 2e98e3c8..910d05cd 100644 --- a/tests/tests_tuner/test_hill_climbing.py +++ b/tests/tests_tuner/test_hill_climbing.py @@ -19,6 +19,9 @@ def get_type(self): def get_final_loss(self): return self.final_loss + def get_train_time(self): + return 0.1 + class TunerHillClimbingTest(unittest.TestCase): def test_hill_climbing(self):