diff --git a/golem/core/optimisers/adaptive/mab_agents/contextual_mab_agent.py b/golem/core/optimisers/adaptive/mab_agents/contextual_mab_agent.py index ccd66480..b476b1fb 100644 --- a/golem/core/optimisers/adaptive/mab_agents/contextual_mab_agent.py +++ b/golem/core/optimisers/adaptive/mab_agents/contextual_mab_agent.py @@ -85,7 +85,7 @@ def partial_fit(self, experience: ExperienceBuffer): def _get_experience(self, experience: ExperienceBuffer): """ Get experience from ExperienceBuffer, process rewards and log. """ obs, actions, rewards = experience.retrieve_experience() - arms = [self._arm_by_action[action] for action in actions] + arms = [self._arm_by_action[action.__name__] for action in actions] # there is no need to process rewards as in MAB, since this processing unifies rewards for all contexts self._dbg_log(obs, actions, rewards) return obs, arms, rewards diff --git a/golem/core/optimisers/adaptive/mab_agents/mab_agent.py b/golem/core/optimisers/adaptive/mab_agents/mab_agent.py index 538c2ed2..59e3279b 100644 --- a/golem/core/optimisers/adaptive/mab_agents/mab_agent.py +++ b/golem/core/optimisers/adaptive/mab_agents/mab_agent.py @@ -2,7 +2,8 @@ import _pickle as pickle import random import re -from typing import Union, Sequence, Optional +from functools import partial +from typing import Union, Sequence, Optional, Callable from mabwiser.mab import MAB, LearningPolicy from scipy.special import softmax @@ -25,7 +26,8 @@ def __init__(self, super().__init__(actions=actions, enable_logging=enable_logging) self.actions = list(actions) self._indices = list(range(len(actions))) - self._arm_by_action = dict(zip(actions, self._indices)) + # str because parent operator for mutation is stored as string for custom mutations serialisation + self._arm_by_action = dict(map(lambda x, y: (self._get_callable_name(x), y), actions, self._indices)) self._agent = MAB(arms=self._indices, learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.4), n_jobs=n_jobs) @@ -34,6 +36,16 @@ def __init__(self, self._initial_fit() self._path_to_save = path_to_save + @staticmethod + def _get_callable_name(action: Callable): + if isinstance(action, partial): + return action.func.__name__ + else: + try: + return action.__name__ + except AttributeError: + return str(action) + def _initial_fit(self): n = len(self.actions) uniform_rewards = [1. / n] * n diff --git a/golem/core/optimisers/genetic/operators/base_mutations.py b/golem/core/optimisers/genetic/operators/base_mutations.py index 1677359f..ea192766 100644 --- a/golem/core/optimisers/genetic/operators/base_mutations.py +++ b/golem/core/optimisers/genetic/operators/base_mutations.py @@ -39,6 +39,10 @@ class MutationTypesEnum(Enum): none = 'none' + @property + def __name__(self): + return self.name + def get_mutation_prob(mut_id: MutationStrengthEnum, node: Optional[GraphNode], default_mutation_prob: float = 0.7) -> float: diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 8c52e751..ebe4842e 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -103,8 +103,9 @@ def _mutation(self, individual: Individual) -> Tuple[Individual, bool]: new_graph = self._apply_mutations(new_graph, mutation_type) is_correct_graph = self.graph_generation_params.verifier(new_graph) if is_correct_graph: + # str for custom mutations serialisation parent_operator = ParentOperator(type_='mutation', - operators=mutation_type, + operators=mutation_type.__name__, parent_individuals=individual) individual = Individual(new_graph, parent_operator, metadata=self.requirements.static_individual_metadata)