From ca8f51ae01242005bfd95b3ae895193fa6a7f1f7 Mon Sep 17 00:00:00 2001 From: riccardo Date: Tue, 25 Jul 2023 16:59:13 +0200 Subject: [PATCH 1/3] a2c attributes --- rlberry/agents/torch/a2c/a2c.py | 52 ++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/rlberry/agents/torch/a2c/a2c.py b/rlberry/agents/torch/a2c/a2c.py index be876fa40..2c6dc0d98 100644 --- a/rlberry/agents/torch/a2c/a2c.py +++ b/rlberry/agents/torch/a2c/a2c.py @@ -57,6 +57,56 @@ class A2CAgent(AgentTorch, AgentWithSimplePolicy): Interval (in number of transitions) between agent evaluations in fit(). If None, never evaluate. + Attributes + ---------- + optimizer_type: str + Type of optimizer. 'ADAM' by defaut. + value_net_fn : function(env, **kwargs) + Function that returns an instance of a value network (pytorch). + If None, a default net is used. + batch_size : int + Size of mini batches during each A2C update epoch. + gamma : float + Discount factor used to discount future rewards. + episode_timesteps : int + Number of steps in the current episode. + total_timesteps : int + Total number of timesteps collected by the agent. + _max_episode_steps : int + Maximum number of steps per episode. + total_episodes : int + Total number of episodes collected by the agent. + entr_coef : float + Entropy coefficient. Controls the contribution of entropy regularization to the policy's objective. + _policy_optimizer : torch.optim.Optimizer + Optimizer used to update the policy network. + value_optimizer : torch.optim.Optimizer + Optimizer used to update the value network. + learning_rate : float + Learning rate used by the optimizer during neural network training. + eval_interval : int, default = None + Interval (in number of transitions) between agent evaluations in fit(). + If None, never evaluate. + optimizer_type : str + Type of optimizer used during neural network training. + eval_interval : int + Number of updates between evaluations. If None, no evaluation is performed. + policy_net_fn : function(env, **kwargs) + Function that returns an instance of a policy network (PyTorch). + policy_net_kwargs : dict + Keyword arguments for `policy_net_fn`. + value_net_fn : function(env, **kwargs) + Function that returns an instance of a value network (PyTorch). + value_net_kwargs : dict + Keyword arguments for `value_net_fn`. + value_net : torch.nn.Module + The value network used by the agent. + device : str + Torch device on which the agent's neural networks are placed. + optimizer_kwargs : dict + Keyword arguments for the optimizer used during neural network training. + + References ---------- Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., @@ -122,7 +172,7 @@ def __init__( max_episode_steps = np.inf self._max_episode_steps = max_episode_steps - self._policy = None # categorical policy function + self._policy = None # initialize self.reset() From 2bbefac7dd2e42047da05509cf0e76f684bec4af Mon Sep 17 00:00:00 2001 From: riccardo Date: Tue, 25 Jul 2023 16:59:13 +0200 Subject: [PATCH 2/3] Fixes #188: a2c attributes --- rlberry/agents/torch/a2c/a2c.py | 52 ++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/rlberry/agents/torch/a2c/a2c.py b/rlberry/agents/torch/a2c/a2c.py index be876fa40..2c6dc0d98 100644 --- a/rlberry/agents/torch/a2c/a2c.py +++ b/rlberry/agents/torch/a2c/a2c.py @@ -57,6 +57,56 @@ class A2CAgent(AgentTorch, AgentWithSimplePolicy): Interval (in number of transitions) between agent evaluations in fit(). If None, never evaluate. + Attributes + ---------- + optimizer_type: str + Type of optimizer. 'ADAM' by defaut. + value_net_fn : function(env, **kwargs) + Function that returns an instance of a value network (pytorch). + If None, a default net is used. + batch_size : int + Size of mini batches during each A2C update epoch. + gamma : float + Discount factor used to discount future rewards. + episode_timesteps : int + Number of steps in the current episode. + total_timesteps : int + Total number of timesteps collected by the agent. + _max_episode_steps : int + Maximum number of steps per episode. + total_episodes : int + Total number of episodes collected by the agent. + entr_coef : float + Entropy coefficient. Controls the contribution of entropy regularization to the policy's objective. + _policy_optimizer : torch.optim.Optimizer + Optimizer used to update the policy network. + value_optimizer : torch.optim.Optimizer + Optimizer used to update the value network. + learning_rate : float + Learning rate used by the optimizer during neural network training. + eval_interval : int, default = None + Interval (in number of transitions) between agent evaluations in fit(). + If None, never evaluate. + optimizer_type : str + Type of optimizer used during neural network training. + eval_interval : int + Number of updates between evaluations. If None, no evaluation is performed. + policy_net_fn : function(env, **kwargs) + Function that returns an instance of a policy network (PyTorch). + policy_net_kwargs : dict + Keyword arguments for `policy_net_fn`. + value_net_fn : function(env, **kwargs) + Function that returns an instance of a value network (PyTorch). + value_net_kwargs : dict + Keyword arguments for `value_net_fn`. + value_net : torch.nn.Module + The value network used by the agent. + device : str + Torch device on which the agent's neural networks are placed. + optimizer_kwargs : dict + Keyword arguments for the optimizer used during neural network training. + + References ---------- Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., @@ -122,7 +172,7 @@ def __init__( max_episode_steps = np.inf self._max_episode_steps = max_episode_steps - self._policy = None # categorical policy function + self._policy = None # initialize self.reset() From 704632a46d26286e437c222f7503c30a4a9061fa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Jul 2023 15:29:54 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- rlberry/agents/torch/a2c/a2c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rlberry/agents/torch/a2c/a2c.py b/rlberry/agents/torch/a2c/a2c.py index 2d6056bf0..c11fac801 100644 --- a/rlberry/agents/torch/a2c/a2c.py +++ b/rlberry/agents/torch/a2c/a2c.py @@ -75,7 +75,7 @@ class A2CAgent(AgentTorch, AgentWithSimplePolicy): _max_episode_steps : int Maximum number of steps per episode. total_episodes : int - Total number of episodes collected by the agent. + Total number of episodes collected by the agent. entr_coef : float Entropy coefficient. Controls the contribution of entropy regularization to the policy's objective. _policy_optimizer : torch.optim.Optimizer