Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sequential #248

Merged
merged 17 commits into from
Jun 4, 2024
62 changes: 55 additions & 7 deletions golem/core/tuning/hyperopt_tuner.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from abc import ABC
from datetime import timedelta
from typing import Callable, Dict, Optional
from typing import Callable, Dict, Optional, Tuple, Any

import numpy as np
from hyperopt import hp, tpe
from hyperopt import hp, tpe, fmin, Trials
from hyperopt.early_stop import no_progress_loss
from hyperopt.pyll import Apply, scope
from hyperopt.pyll_utils import validate_label

from golem.core.adapter import BaseOptimizationAdapter
from golem.core.dag.linked_graph_node import LinkedGraphNode
from golem.core.log import default_log
from golem.core.optimisers.objective import ObjectiveFunction
from golem.core.tuning.search_space import SearchSpace, get_node_operation_parameter_label
Expand Down Expand Up @@ -64,6 +65,49 @@ def __init__(self, objective_evaluate: ObjectiveFunction,
self.algo = algo
self.log = default_log(self)

def _search_near_initial_parameters(self,
objective,
search_space: dict,
initial_parameters: dict,
trials: Trials,
remaining_time: float,
show_progress: bool = True) -> Tuple[Trials, int]:
""" Method to search using the search space where parameters initially set for the graph are fixed.
This allows not to lose results obtained while composition process
kasyanovse marked this conversation as resolved.
Show resolved Hide resolved

Args:
graph: graph to be tuned
search_space: dict with parameters to be optimized and their search spaces
initial_parameters: dict with initial parameters of the graph
trials: Trials object to store all the search iterations
show_progress: shows progress of tuning if True

Returns:
trials: Trials object storing all the search trials
init_trials_num: number of iterations made using the search space with fixed initial parameters
"""
try_initial_parameters = initial_parameters and self.iterations > 1
kasyanovse marked this conversation as resolved.
Show resolved Hide resolved
if not try_initial_parameters:
init_trials_num = 0
return trials, init_trials_num

is_init_params_full = len(initial_parameters) == len(search_space)
kasyanovse marked this conversation as resolved.
Show resolved Hide resolved
if self.iterations < 10 or is_init_params_full:
kasyanovse marked this conversation as resolved.
Show resolved Hide resolved
init_trials_num = 1
else:
init_trials_num = min(int(self.iterations * 0.1), 10)

# fmin updates trials with evaluation points tried out during the call
fmin(objective,
search_space,
trials=trials,
algo=self.algo,
max_evals=init_trials_num,
show_progressbar=show_progress,
early_stop_fn=self.early_stop_fn,
timeout=remaining_time)
return trials, init_trials_num


def get_parameter_hyperopt_space(search_space: SearchSpace,
operation_name: str,
Expand Down Expand Up @@ -96,31 +140,35 @@ def get_parameter_hyperopt_space(search_space: SearchSpace,
return None


def get_node_parameters_for_hyperopt(search_space: SearchSpace, node_id: int, operation_name: str) \
-> Dict[str, Apply]:
def get_node_parameters_for_hyperopt(search_space: SearchSpace, node_id: int, node: LinkedGraphNode) \
-> Tuple[Dict[str, Apply], Dict[str, Any]]:
"""
Function for forming dictionary with hyperparameters of the node operation for the ``HyperoptTuner``

Args:
search_space: SearchSpace with parameters per operation
node_id: number of node in graph.nodes list
operation_name: name of operation in the node
node: node from the graph

Returns:
parameters_dict: dictionary-like structure with labeled hyperparameters
and their range per operation
"""

# Get available parameters for current operation
operation_name = node.name
kasyanovse marked this conversation as resolved.
Show resolved Hide resolved
parameters_list = search_space.get_parameters_for_operation(operation_name)

parameters_dict = {}
initial_parameters = {}
for parameter_name in parameters_list:
node_op_parameter_name = get_node_operation_parameter_label(node_id, operation_name, parameter_name)

# For operation get range where search can be done
space = get_parameter_hyperopt_space(search_space, operation_name, parameter_name, node_op_parameter_name)

parameters_dict.update({node_op_parameter_name: space})

return parameters_dict
if parameter_name in node.parameters:
initial_parameters.update({node_op_parameter_name: node.parameters[parameter_name]})

return parameters_dict, initial_parameters
98 changes: 64 additions & 34 deletions golem/core/tuning/sequential.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from copy import deepcopy
from datetime import timedelta
from functools import partial
from typing import Callable, Optional

from hyperopt import tpe, fmin, space_eval
from hyperopt import tpe, fmin, space_eval, Trials

from golem.core.adapter import BaseOptimizationAdapter
from golem.core.constants import MIN_TIME_FOR_TUNING_IN_SEC
from golem.core.optimisers.graph import OptGraph
from golem.core.optimisers.objective import ObjectiveFunction
from golem.core.tuning.hyperopt_tuner import HyperoptTuner, get_node_parameters_for_hyperopt
Expand Down Expand Up @@ -62,25 +64,28 @@ def _tune(self, graph: DomainGraphForTune, **kwargs) -> DomainGraphForTune:

# Tuning performed sequentially for every node - so get ids of nodes
nodes_ids = self.get_nodes_order(nodes_number=nodes_amount)
final_graph = deepcopy(self.init_graph)
best_metric = self.init_metric
for node_id in nodes_ids:
node = graph.nodes[node_id]
operation_name = node.name

# Get node's parameters to optimize
node_params = get_node_parameters_for_hyperopt(self.search_space, node_id, operation_name)

node_params, init_params = get_node_parameters_for_hyperopt(self.search_space, node_id, node)
if not node_params:
self.log.info(f'"{operation_name}" operation has no parameters to optimize')
self.log.info(f'"{node.name}" operation has no parameters to optimize')
else:
# Apply tuning for current node
self._optimize_node(node_id=node_id,
graph=graph,
node_params=node_params,
iterations_per_node=iterations_per_node,
seconds_per_node=seconds_per_node)

graph, metric = self._optimize_node(node_id=node_id,
graph=graph,
node_params=node_params,
init_params=init_params,
iterations_per_node=iterations_per_node,
seconds_per_node=seconds_per_node)
if metric <= best_metric:
final_graph = deepcopy(graph)
best_metric = metric
self.was_tuned = True
return graph
return final_graph

def get_nodes_order(self, nodes_number: int) -> range:
""" Method returns list with indices of nodes in the graph
Expand Down Expand Up @@ -114,22 +119,23 @@ def tune_node(self, graph: DomainGraphForTune, node_index: int) -> DomainGraphFo
self.init_check(graph)

node = graph.nodes[node_index]
operation_name = node.name

# Get node's parameters to optimize
node_params = get_node_parameters_for_hyperopt(self.search_space,
node_id=node_index,
operation_name=operation_name)
node_params, init_params = get_node_parameters_for_hyperopt(self.search_space,
node_id=node_index,
node=node)

remaining_time = self._get_remaining_time()
if self._check_if_tuning_possible(graph, len(node_params) > 1, remaining_time):
# Apply tuning for current node
self._optimize_node(graph=graph,
node_id=node_index,
node_params=node_params,
iterations_per_node=self.iterations,
seconds_per_node=remaining_time
)
graph, _ = self._optimize_node(graph=graph,
node_id=node_index,
node_params=node_params,
init_params=init_params,
iterations_per_node=self.iterations,
seconds_per_node=remaining_time
)

self.was_tuned = True

# Validation is the optimization do well
Expand All @@ -143,6 +149,7 @@ def tune_node(self, graph: DomainGraphForTune, node_index: int) -> DomainGraphFo
def _optimize_node(self, graph: OptGraph,
kasyanovse marked this conversation as resolved.
Show resolved Hide resolved
node_id: int,
node_params: dict,
init_params: dict,
iterations_per_node: int,
seconds_per_node: float) -> OptGraph:
"""
Expand All @@ -158,20 +165,40 @@ def _optimize_node(self, graph: OptGraph,
Returns:
updated graph with tuned parameters in particular node
"""
best_parameters = fmin(partial(self._objective, graph=graph, node_id=node_id),
node_params,
algo=self.algo,
max_evals=iterations_per_node,
early_stop_fn=self.early_stop_fn,
timeout=seconds_per_node)

best_parameters = space_eval(space=node_params, hp_assignment=best_parameters)
remaining_time = self._get_remaining_time()
trials = Trials()
trials, init_trials_num = self._search_near_initial_parameters(partial(self._objective,
graph=graph,
node_id=node_id,
unchangeable_parameters=init_params),
node_params,
init_params,
trials,
remaining_time)

remaining_time = self._get_remaining_time()
if remaining_time > MIN_TIME_FOR_TUNING_IN_SEC:
fmin(partial(self._objective, graph=graph, node_id=node_id),
node_params,
trials=trials,
algo=self.algo,
max_evals=iterations_per_node,
early_stop_fn=self.early_stop_fn,
timeout=seconds_per_node)
kasyanovse marked this conversation as resolved.
Show resolved Hide resolved

best_params = space_eval(space=node_params, hp_assignment=trials.argmin)
is_best_trial_with_init_params = trials.best_trial.get('tid') in range(init_trials_num)
if is_best_trial_with_init_params:
best_params = {**best_params, **init_params}
# Set best params for this node in the graph
graph = self.set_arg_node(graph=graph, node_id=node_id, node_params=best_parameters)
return graph

def _objective(self, node_params: dict, graph: OptGraph, node_id: int) -> float:
graph = self.set_arg_node(graph=graph, node_id=node_id, node_params=best_params)
return graph, trials.best_trial['result']['loss']

def _objective(self,
node_params: dict,
graph: OptGraph,
node_id: int,
unchangeable_parameters: Optional[dict] = None) -> float:
""" Objective function for minimization problem

Args:
Expand All @@ -182,6 +209,9 @@ def _objective(self, node_params: dict, graph: OptGraph, node_id: int) -> float:
Returns:
value of objective function
"""
# replace new parameters with parameters
if unchangeable_parameters:
node_params = {**node_params, **unchangeable_parameters}

# Set hyperparameters for node
graph = self.set_arg_node(graph=graph, node_id=node_id, node_params=node_params)
Expand Down
77 changes: 15 additions & 62 deletions golem/core/tuning/simultaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from golem.core.constants import MIN_TIME_FOR_TUNING_IN_SEC
from golem.core.optimisers.graph import OptGraph
from golem.core.tuning.hyperopt_tuner import HyperoptTuner, get_node_parameters_for_hyperopt
from golem.core.tuning.search_space import get_node_operation_parameter_label
from golem.core.tuning.tuner_interface import DomainGraphForTune


Expand All @@ -28,18 +27,21 @@ def _tune(self, graph: DomainGraphForTune, show_progress: bool = True) -> Domain
parameters_dict, init_parameters = self._get_parameters_for_tune(graph)
remaining_time = self._get_remaining_time()

if self._check_if_tuning_possible(graph, parameters_dict, remaining_time):
if self._check_if_tuning_possible(graph, len(parameters_dict) > 0, remaining_time):
trials = Trials()

try:
# try searching using initial parameters
# (uses original search space with fixed initial parameters)
trials, init_trials_num = self._search_near_initial_parameters(graph,
parameters_dict,
init_parameters,
trials,
remaining_time,
show_progress)
trials, init_trials_num = self._search_near_initial_parameters(
partial(self._objective,
graph=graph,
unchangeable_parameters=init_parameters),
parameters_dict,
init_parameters,
trials,
remaining_time,
show_progress)
remaining_time = self._get_remaining_time()
if remaining_time > MIN_TIME_FOR_TUNING_IN_SEC:
fmin(partial(self._objective, graph=graph),
Expand Down Expand Up @@ -70,48 +72,6 @@ def _tune(self, graph: DomainGraphForTune, show_progress: bool = True) -> Domain
final_graph = graph
return final_graph

def _search_near_initial_parameters(self, graph: OptGraph,
search_space: dict,
initial_parameters: dict,
trials: Trials,
remaining_time: float,
show_progress: bool = True) -> Tuple[Trials, int]:
""" Method to search using the search space where parameters initially set for the graph are fixed.
This allows not to lose results obtained while composition process

Args:
graph: graph to be tuned
search_space: dict with parameters to be optimized and their search spaces
initial_parameters: dict with initial parameters of the graph
trials: Trials object to store all the search iterations
show_progress: shows progress of tuning if True

Returns:
trials: Trials object storing all the search trials
init_trials_num: number of iterations made using the search space with fixed initial parameters
"""
try_initial_parameters = initial_parameters and self.iterations > 1
if not try_initial_parameters:
init_trials_num = 0
return trials, init_trials_num

is_init_params_full = len(initial_parameters) == len(search_space)
if self.iterations < 10 or is_init_params_full:
init_trials_num = 1
else:
init_trials_num = min(int(self.iterations * 0.1), 10)

# fmin updates trials with evaluation points tried out during the call
fmin(partial(self._objective, graph=graph, unchangeable_parameters=initial_parameters),
search_space,
trials=trials,
algo=self.algo,
max_evals=init_trials_num,
show_progressbar=show_progress,
early_stop_fn=self.early_stop_fn,
timeout=remaining_time)
return trials, init_trials_num

def _get_parameters_for_tune(self, graph: OptGraph) -> Tuple[dict, dict]:
""" Method for defining the search space

Expand All @@ -126,20 +86,13 @@ def _get_parameters_for_tune(self, graph: OptGraph) -> Tuple[dict, dict]:
parameters_dict = {}
initial_parameters = {}
for node_id, node in enumerate(graph.nodes):
operation_name = node.name

# Assign unique prefix for each model hyperparameter
# label - number of node in the graph
node_params = get_node_parameters_for_hyperopt(self.search_space, node_id=node_id,
operation_name=operation_name)
parameters_dict.update(node_params)

tunable_node_params = self.search_space.get_parameters_for_operation(operation_name)
if tunable_node_params:
tunable_initial_params = {get_node_operation_parameter_label(node_id, operation_name, p):
node.parameters[p] for p in node.parameters if p in tunable_node_params}
if tunable_initial_params:
initial_parameters.update(tunable_initial_params)
tunable_node_params, initial_node_params = get_node_parameters_for_hyperopt(self.search_space,
node_id=node_id,
node=node)
parameters_dict.update(tunable_node_params)
initial_parameters.update(initial_parameters)

return parameters_dict, initial_parameters

Expand Down
5 changes: 3 additions & 2 deletions golem/core/tuning/tuner_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def init_check(self, graph: OptGraph) -> None:

Args:
graph: graph to calculate objective
multi_obj: If optimization was multi objective.
"""
self.log.info('Hyperparameters optimization start: estimation of metric for initial graph')

Expand Down Expand Up @@ -179,8 +180,8 @@ def _multi_obj_final_check(self, tuned_graphs: Sequence[OptGraph]) -> Sequence[O
f'{metrics_formatted}')
else:
self.log.message('Initial metric dominates all found solutions. Return initial graph.')
final_graphs = self.init_graph
self.obtained_metric = self.init_metric
final_graphs = [self.init_graph]
self.obtained_metric = [self.init_metric]
return final_graphs

def get_metric_value(self, graph: OptGraph) -> Union[float, Sequence[float]]:
Expand Down
Loading