aimclub · YamLyubov · Jun 4, 2024 · Nov 28, 2023 · Nov 28, 2023 · Nov 28, 2023
diff --git a/golem/core/tuning/hyperopt_tuner.py b/golem/core/tuning/hyperopt_tuner.py
@@ -1,14 +1,15 @@
 from abc import ABC
 from datetime import timedelta
-from typing import Callable, Dict, Optional
+from typing import Callable, Dict, Optional, Tuple, Any
 
 import numpy as np
-from hyperopt import hp, tpe
+from hyperopt import hp, tpe, fmin, Trials
 from hyperopt.early_stop import no_progress_loss
 from hyperopt.pyll import Apply, scope
 from hyperopt.pyll_utils import validate_label
 
 from golem.core.adapter import BaseOptimizationAdapter
+from golem.core.dag.linked_graph_node import LinkedGraphNode
 from golem.core.log import default_log
 from golem.core.optimisers.objective import ObjectiveFunction
 from golem.core.tuning.search_space import SearchSpace, get_node_operation_parameter_label
@@ -64,6 +65,49 @@ def __init__(self, objective_evaluate: ObjectiveFunction,
         self.algo = algo
         self.log = default_log(self)
 
+    def _search_near_initial_parameters(self,
+                                        objective,
+                                        search_space: dict,
+                                        initial_parameters: dict,
+                                        trials: Trials,
+                                        remaining_time: float,
+                                        show_progress: bool = True) -> Tuple[Trials, int]:
+        """ Method to search using the search space where parameters initially set for the graph are fixed.
+        This allows not to lose results obtained while composition process
+
+        Args:
+            graph: graph to be tuned
+            search_space: dict with parameters to be optimized and their search spaces
+            initial_parameters: dict with initial parameters of the graph
+            trials: Trials object to store all the search iterations
+            show_progress: shows progress of tuning if True
+
+        Returns:
+            trials: Trials object storing all the search trials
+            init_trials_num: number of iterations made using the search space with fixed initial parameters
+        """
+        try_initial_parameters = initial_parameters and self.iterations > 1
+        if not try_initial_parameters:
+            init_trials_num = 0
+            return trials, init_trials_num
+
+        is_init_params_full = len(initial_parameters) == len(search_space)
+        if self.iterations < 10 or is_init_params_full:
+            init_trials_num = 1
+        else:
+            init_trials_num = min(int(self.iterations * 0.1), 10)
+
+        # fmin updates trials with evaluation points tried out during the call
+        fmin(objective,
+             search_space,
+             trials=trials,
+             algo=self.algo,
+             max_evals=init_trials_num,
+             show_progressbar=show_progress,
+             early_stop_fn=self.early_stop_fn,
+             timeout=remaining_time)
+        return trials, init_trials_num
+
 
 def get_parameter_hyperopt_space(search_space: SearchSpace,
                                  operation_name: str,
@@ -96,31 +140,35 @@ def get_parameter_hyperopt_space(search_space: SearchSpace,
         return None
 
 
-def get_node_parameters_for_hyperopt(search_space: SearchSpace, node_id: int, operation_name: str) \
-        -> Dict[str, Apply]:
+def get_node_parameters_for_hyperopt(search_space: SearchSpace, node_id: int, node: LinkedGraphNode) \
+        -> Tuple[Dict[str, Apply], Dict[str, Any]]:
     """
     Function for forming dictionary with hyperparameters of the node operation for the ``HyperoptTuner``
 
     Args:
         search_space: SearchSpace with parameters per operation
         node_id: number of node in graph.nodes list
-        operation_name: name of operation in the node
+        node: node from the graph
 
     Returns:
         parameters_dict: dictionary-like structure with labeled hyperparameters
         and their range per operation
     """
 
     # Get available parameters for current operation
+    operation_name = node.name
     parameters_list = search_space.get_parameters_for_operation(operation_name)
 
     parameters_dict = {}
+    initial_parameters = {}
     for parameter_name in parameters_list:
         node_op_parameter_name = get_node_operation_parameter_label(node_id, operation_name, parameter_name)
 
         # For operation get range where search can be done
         space = get_parameter_hyperopt_space(search_space, operation_name, parameter_name, node_op_parameter_name)
-
         parameters_dict.update({node_op_parameter_name: space})
 
-    return parameters_dict
+        if parameter_name in node.parameters:
+            initial_parameters.update({node_op_parameter_name: node.parameters[parameter_name]})
+
+    return parameters_dict, initial_parameters
diff --git a/golem/core/tuning/sequential.py b/golem/core/tuning/sequential.py
@@ -1,10 +1,12 @@
+from copy import deepcopy
 from datetime import timedelta
 from functools import partial
 from typing import Callable, Optional
 
-from hyperopt import tpe, fmin, space_eval
+from hyperopt import tpe, fmin, space_eval, Trials
 
 from golem.core.adapter import BaseOptimizationAdapter
+from golem.core.constants import MIN_TIME_FOR_TUNING_IN_SEC
 from golem.core.optimisers.graph import OptGraph
 from golem.core.optimisers.objective import ObjectiveFunction
 from golem.core.tuning.hyperopt_tuner import HyperoptTuner, get_node_parameters_for_hyperopt
@@ -62,25 +64,28 @@ def _tune(self, graph: DomainGraphForTune, **kwargs) -> DomainGraphForTune:
 
             # Tuning performed sequentially for every node - so get ids of nodes
             nodes_ids = self.get_nodes_order(nodes_number=nodes_amount)
+            final_graph = deepcopy(self.init_graph)
+            best_metric = self.init_metric
             for node_id in nodes_ids:
                 node = graph.nodes[node_id]
-                operation_name = node.name
 
                 # Get node's parameters to optimize
-                node_params = get_node_parameters_for_hyperopt(self.search_space, node_id, operation_name)
-
+                node_params, init_params = get_node_parameters_for_hyperopt(self.search_space, node_id, node)
                 if not node_params:
-                    self.log.info(f'"{operation_name}" operation has no parameters to optimize')
+                    self.log.info(f'"{node.name}" operation has no parameters to optimize')
                 else:
                     # Apply tuning for current node
-                    self._optimize_node(node_id=node_id,
-                                        graph=graph,
-                                        node_params=node_params,
-                                        iterations_per_node=iterations_per_node,
-                                        seconds_per_node=seconds_per_node)
-
+                    graph, metric = self._optimize_node(node_id=node_id,
+                                                        graph=graph,
+                                                        node_params=node_params,
+                                                        init_params=init_params,
+                                                        iterations_per_node=iterations_per_node,
+                                                        seconds_per_node=seconds_per_node)
+                    if metric <= best_metric:
+                        final_graph = deepcopy(graph)
+                        best_metric = metric
             self.was_tuned = True
-        return graph
+        return final_graph
 
     def get_nodes_order(self, nodes_number: int) -> range:
         """ Method returns list with indices of nodes in the graph
@@ -114,22 +119,23 @@ def tune_node(self, graph: DomainGraphForTune, node_index: int) -> DomainGraphFo
             self.init_check(graph)
 
             node = graph.nodes[node_index]
-            operation_name = node.name
 
             # Get node's parameters to optimize
-            node_params = get_node_parameters_for_hyperopt(self.search_space,
-                                                           node_id=node_index,
-                                                           operation_name=operation_name)
+            node_params, init_params = get_node_parameters_for_hyperopt(self.search_space,
+                                                                        node_id=node_index,
+                                                                        node=node)
 
             remaining_time = self._get_remaining_time()
             if self._check_if_tuning_possible(graph, len(node_params) > 1, remaining_time):
                 # Apply tuning for current node
-                self._optimize_node(graph=graph,
-                                    node_id=node_index,
-                                    node_params=node_params,
-                                    iterations_per_node=self.iterations,
-                                    seconds_per_node=remaining_time
-                                    )
+                graph, _ = self._optimize_node(graph=graph,
+                                               node_id=node_index,
+                                               node_params=node_params,
+                                               init_params=init_params,
+                                               iterations_per_node=self.iterations,
+                                               seconds_per_node=remaining_time
+                                               )
+
                 self.was_tuned = True
 
                 # Validation is the optimization do well
@@ -143,6 +149,7 @@ def tune_node(self, graph: DomainGraphForTune, node_index: int) -> DomainGraphFo
     def _optimize_node(self, graph: OptGraph,
                        node_id: int,
                        node_params: dict,
+                       init_params: dict,
                        iterations_per_node: int,
                        seconds_per_node: float) -> OptGraph:
         """
@@ -158,20 +165,40 @@ def _optimize_node(self, graph: OptGraph,
         Returns:
             updated graph with tuned parameters in particular node
         """
-        best_parameters = fmin(partial(self._objective, graph=graph, node_id=node_id),
-                               node_params,
-                               algo=self.algo,
-                               max_evals=iterations_per_node,
-                               early_stop_fn=self.early_stop_fn,
-                               timeout=seconds_per_node)
-
-        best_parameters = space_eval(space=node_params, hp_assignment=best_parameters)
+        remaining_time = self._get_remaining_time()
+        trials = Trials()
+        trials, init_trials_num = self._search_near_initial_parameters(partial(self._objective,
+                                                                               graph=graph,
+                                                                               node_id=node_id,
+                                                                               unchangeable_parameters=init_params),
+                                                                       node_params,
+                                                                       init_params,
+                                                                       trials,
+                                                                       remaining_time)
 
+        remaining_time = self._get_remaining_time()
+        if remaining_time > MIN_TIME_FOR_TUNING_IN_SEC:
+            fmin(partial(self._objective, graph=graph, node_id=node_id),
+                 node_params,
+                 trials=trials,
+                 algo=self.algo,
+                 max_evals=iterations_per_node,
+                 early_stop_fn=self.early_stop_fn,
+                 timeout=seconds_per_node)
+
+        best_params = space_eval(space=node_params, hp_assignment=trials.argmin)
+        is_best_trial_with_init_params = trials.best_trial.get('tid') in range(init_trials_num)
+        if is_best_trial_with_init_params:
+            best_params = {**best_params, **init_params}
         # Set best params for this node in the graph
-        graph = self.set_arg_node(graph=graph, node_id=node_id, node_params=best_parameters)
-        return graph
-
-    def _objective(self, node_params: dict, graph: OptGraph, node_id: int) -> float:
+        graph = self.set_arg_node(graph=graph, node_id=node_id, node_params=best_params)
+        return graph, trials.best_trial['result']['loss']
+
+    def _objective(self,
+                   node_params: dict,
+                   graph: OptGraph,
+                   node_id: int,
+                   unchangeable_parameters: Optional[dict] = None) -> float:
         """ Objective function for minimization problem
 
         Args:
@@ -182,6 +209,9 @@ def _objective(self, node_params: dict, graph: OptGraph, node_id: int) -> float:
         Returns:
             value of objective function
         """
+        # replace new parameters with parameters
+        if unchangeable_parameters:
+            node_params = {**node_params, **unchangeable_parameters}
 
         # Set hyperparameters for node
         graph = self.set_arg_node(graph=graph, node_id=node_id, node_params=node_params)

diff --git a/golem/core/tuning/simultaneous.py b/golem/core/tuning/simultaneous.py
@@ -6,7 +6,6 @@
 from golem.core.constants import MIN_TIME_FOR_TUNING_IN_SEC
 from golem.core.optimisers.graph import OptGraph
 from golem.core.tuning.hyperopt_tuner import HyperoptTuner, get_node_parameters_for_hyperopt
-from golem.core.tuning.search_space import get_node_operation_parameter_label
 from golem.core.tuning.tuner_interface import DomainGraphForTune
 
 
@@ -28,18 +27,21 @@ def _tune(self, graph: DomainGraphForTune, show_progress: bool = True) -> Domain
         parameters_dict, init_parameters = self._get_parameters_for_tune(graph)
         remaining_time = self._get_remaining_time()
 
-        if self._check_if_tuning_possible(graph, parameters_dict, remaining_time):
+        if self._check_if_tuning_possible(graph, len(parameters_dict) > 0, remaining_time):
             trials = Trials()
 
             try:
                 # try searching using initial parameters
                 # (uses original search space with fixed initial parameters)
-                trials, init_trials_num = self._search_near_initial_parameters(graph,
-                                                                               parameters_dict,
-                                                                               init_parameters,
-                                                                               trials,
-                                                                               remaining_time,
-                                                                               show_progress)
+                trials, init_trials_num = self._search_near_initial_parameters(
+                    partial(self._objective,
+                            graph=graph,
+                            unchangeable_parameters=init_parameters),
+                    parameters_dict,
+                    init_parameters,
+                    trials,
+                    remaining_time,
+                    show_progress)
                 remaining_time = self._get_remaining_time()
                 if remaining_time > MIN_TIME_FOR_TUNING_IN_SEC:
                     fmin(partial(self._objective, graph=graph),
@@ -70,48 +72,6 @@ def _tune(self, graph: DomainGraphForTune, show_progress: bool = True) -> Domain
             final_graph = graph
         return final_graph
 
-    def _search_near_initial_parameters(self, graph: OptGraph,
-                                        search_space: dict,
-                                        initial_parameters: dict,
-                                        trials: Trials,
-                                        remaining_time: float,
-                                        show_progress: bool = True) -> Tuple[Trials, int]:
-        """ Method to search using the search space where parameters initially set for the graph are fixed.
-        This allows not to lose results obtained while composition process
-
-        Args:
-            graph: graph to be tuned
-            search_space: dict with parameters to be optimized and their search spaces
-            initial_parameters: dict with initial parameters of the graph
-            trials: Trials object to store all the search iterations
-            show_progress: shows progress of tuning if True
-
-        Returns:
-            trials: Trials object storing all the search trials
-            init_trials_num: number of iterations made using the search space with fixed initial parameters
-        """
-        try_initial_parameters = initial_parameters and self.iterations > 1
-        if not try_initial_parameters:
-            init_trials_num = 0
-            return trials, init_trials_num
-
-        is_init_params_full = len(initial_parameters) == len(search_space)
-        if self.iterations < 10 or is_init_params_full:
-            init_trials_num = 1
-        else:
-            init_trials_num = min(int(self.iterations * 0.1), 10)
-
-        # fmin updates trials with evaluation points tried out during the call
-        fmin(partial(self._objective, graph=graph, unchangeable_parameters=initial_parameters),
-             search_space,
-             trials=trials,
-             algo=self.algo,
-             max_evals=init_trials_num,
-             show_progressbar=show_progress,
-             early_stop_fn=self.early_stop_fn,
-             timeout=remaining_time)
-        return trials, init_trials_num
-
     def _get_parameters_for_tune(self, graph: OptGraph) -> Tuple[dict, dict]:
         """ Method for defining the search space
 
@@ -126,20 +86,13 @@ def _get_parameters_for_tune(self, graph: OptGraph) -> Tuple[dict, dict]:
         parameters_dict = {}
         initial_parameters = {}
         for node_id, node in enumerate(graph.nodes):
-            operation_name = node.name
-
             # Assign unique prefix for each model hyperparameter
             # label - number of node in the graph
-            node_params = get_node_parameters_for_hyperopt(self.search_space, node_id=node_id,
-                                                           operation_name=operation_name)
-            parameters_dict.update(node_params)
-
-            tunable_node_params = self.search_space.get_parameters_for_operation(operation_name)
-            if tunable_node_params:
-                tunable_initial_params = {get_node_operation_parameter_label(node_id, operation_name, p):
-                                          node.parameters[p] for p in node.parameters if p in tunable_node_params}
-                if tunable_initial_params:
-                    initial_parameters.update(tunable_initial_params)
+            tunable_node_params, initial_node_params = get_node_parameters_for_hyperopt(self.search_space,
+                                                                                        node_id=node_id,
+                                                                                        node=node)
+            parameters_dict.update(tunable_node_params)
+            initial_parameters.update(initial_parameters)
 
         return parameters_dict, initial_parameters
 

diff --git a/golem/core/tuning/tuner_interface.py b/golem/core/tuning/tuner_interface.py
@@ -100,6 +100,7 @@ def init_check(self, graph: OptGraph) -> None:
 
         Args:
           graph: graph to calculate objective
+          multi_obj: If optimization was multi objective.
         """
         self.log.info('Hyperparameters optimization start: estimation of metric for initial graph')
 
@@ -179,8 +180,8 @@ def _multi_obj_final_check(self, tuned_graphs: Sequence[OptGraph]) -> Sequence[O
                              f'{metrics_formatted}')
         else:
             self.log.message('Initial metric dominates all found solutions. Return initial graph.')
-            final_graphs = self.init_graph
-            self.obtained_metric = self.init_metric
+            final_graphs = [self.init_graph]
+            self.obtained_metric = [self.init_metric]
         return final_graphs
 
     def get_metric_value(self, graph: OptGraph) -> Union[float, Sequence[float]]: