From ca8f51ae01242005bfd95b3ae895193fa6a7f1f7 Mon Sep 17 00:00:00 2001
From: riccardo <riccardo.dellavecchia91@gmail.com>
Date: Tue, 25 Jul 2023 16:59:13 +0200
Subject: [PATCH 1/3] a2c attributes

---
 rlberry/agents/torch/a2c/a2c.py | 52 ++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/rlberry/agents/torch/a2c/a2c.py b/rlberry/agents/torch/a2c/a2c.py
index be876fa40..2c6dc0d98 100644
--- a/rlberry/agents/torch/a2c/a2c.py
+++ b/rlberry/agents/torch/a2c/a2c.py
@@ -57,6 +57,56 @@ class A2CAgent(AgentTorch, AgentWithSimplePolicy):
         Interval (in number of transitions) between agent evaluations in fit().
         If None, never evaluate.
 
+    Attributes
+    ----------
+    optimizer_type: str
+        Type of optimizer. 'ADAM' by defaut.
+    value_net_fn : function(env, **kwargs)
+        Function that returns an instance of a value network (pytorch).
+        If None, a default net is used.
+    batch_size : int
+        Size of mini batches during each A2C update epoch.
+    gamma : float
+        Discount factor used to discount future rewards.
+    episode_timesteps : int
+        Number of steps in the current episode.
+    total_timesteps : int
+        Total number of timesteps collected by the agent.
+    _max_episode_steps : int
+        Maximum number of steps per episode.
+    total_episodes : int
+        Total number of episodes collected by the agent.    
+    entr_coef : float
+        Entropy coefficient. Controls the contribution of entropy regularization to the policy's objective.
+    _policy_optimizer : torch.optim.Optimizer
+        Optimizer used to update the policy network.
+    value_optimizer : torch.optim.Optimizer
+        Optimizer used to update the value network.
+    learning_rate : float
+        Learning rate used by the optimizer during neural network training.
+    eval_interval : int, default = None
+        Interval (in number of transitions) between agent evaluations in fit().
+        If None, never evaluate.
+    optimizer_type : str
+        Type of optimizer used during neural network training.
+    eval_interval : int
+        Number of updates between evaluations. If None, no evaluation is performed.
+    policy_net_fn : function(env, **kwargs)
+        Function that returns an instance of a policy network (PyTorch).
+    policy_net_kwargs : dict
+        Keyword arguments for `policy_net_fn`.
+    value_net_fn : function(env, **kwargs)
+        Function that returns an instance of a value network (PyTorch).
+    value_net_kwargs : dict
+        Keyword arguments for `value_net_fn`.
+    value_net : torch.nn.Module
+        The value network used by the agent.
+    device : str
+        Torch device on which the agent's neural networks are placed.
+    optimizer_kwargs : dict
+        Keyword arguments for the optimizer used during neural network training.
+
+
     References
     ----------
     Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T.,
@@ -122,7 +172,7 @@ def __init__(
             max_episode_steps = np.inf
         self._max_episode_steps = max_episode_steps
 
-        self._policy = None  # categorical policy function
+        self._policy = None
 
         # initialize
         self.reset()

From 2bbefac7dd2e42047da05509cf0e76f684bec4af Mon Sep 17 00:00:00 2001
From: riccardo <riccardo.dellavecchia91@gmail.com>
Date: Tue, 25 Jul 2023 16:59:13 +0200
Subject: [PATCH 2/3] Fixes #188: a2c attributes

---
 rlberry/agents/torch/a2c/a2c.py | 52 ++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/rlberry/agents/torch/a2c/a2c.py b/rlberry/agents/torch/a2c/a2c.py
index be876fa40..2c6dc0d98 100644
--- a/rlberry/agents/torch/a2c/a2c.py
+++ b/rlberry/agents/torch/a2c/a2c.py
@@ -57,6 +57,56 @@ class A2CAgent(AgentTorch, AgentWithSimplePolicy):
         Interval (in number of transitions) between agent evaluations in fit().
         If None, never evaluate.
 
+    Attributes
+    ----------
+    optimizer_type: str
+        Type of optimizer. 'ADAM' by defaut.
+    value_net_fn : function(env, **kwargs)
+        Function that returns an instance of a value network (pytorch).
+        If None, a default net is used.
+    batch_size : int
+        Size of mini batches during each A2C update epoch.
+    gamma : float
+        Discount factor used to discount future rewards.
+    episode_timesteps : int
+        Number of steps in the current episode.
+    total_timesteps : int
+        Total number of timesteps collected by the agent.
+    _max_episode_steps : int
+        Maximum number of steps per episode.
+    total_episodes : int
+        Total number of episodes collected by the agent.    
+    entr_coef : float
+        Entropy coefficient. Controls the contribution of entropy regularization to the policy's objective.
+    _policy_optimizer : torch.optim.Optimizer
+        Optimizer used to update the policy network.
+    value_optimizer : torch.optim.Optimizer
+        Optimizer used to update the value network.
+    learning_rate : float
+        Learning rate used by the optimizer during neural network training.
+    eval_interval : int, default = None
+        Interval (in number of transitions) between agent evaluations in fit().
+        If None, never evaluate.
+    optimizer_type : str
+        Type of optimizer used during neural network training.
+    eval_interval : int
+        Number of updates between evaluations. If None, no evaluation is performed.
+    policy_net_fn : function(env, **kwargs)
+        Function that returns an instance of a policy network (PyTorch).
+    policy_net_kwargs : dict
+        Keyword arguments for `policy_net_fn`.
+    value_net_fn : function(env, **kwargs)
+        Function that returns an instance of a value network (PyTorch).
+    value_net_kwargs : dict
+        Keyword arguments for `value_net_fn`.
+    value_net : torch.nn.Module
+        The value network used by the agent.
+    device : str
+        Torch device on which the agent's neural networks are placed.
+    optimizer_kwargs : dict
+        Keyword arguments for the optimizer used during neural network training.
+
+
     References
     ----------
     Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T.,
@@ -122,7 +172,7 @@ def __init__(
             max_episode_steps = np.inf
         self._max_episode_steps = max_episode_steps
 
-        self._policy = None  # categorical policy function
+        self._policy = None
 
         # initialize
         self.reset()

From 704632a46d26286e437c222f7503c30a4a9061fa Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 25 Jul 2023 15:29:54 +0000
Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 rlberry/agents/torch/a2c/a2c.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rlberry/agents/torch/a2c/a2c.py b/rlberry/agents/torch/a2c/a2c.py
index 2d6056bf0..c11fac801 100644
--- a/rlberry/agents/torch/a2c/a2c.py
+++ b/rlberry/agents/torch/a2c/a2c.py
@@ -75,7 +75,7 @@ class A2CAgent(AgentTorch, AgentWithSimplePolicy):
     _max_episode_steps : int
         Maximum number of steps per episode.
     total_episodes : int
-        Total number of episodes collected by the agent.    
+        Total number of episodes collected by the agent.
     entr_coef : float
         Entropy coefficient. Controls the contribution of entropy regularization to the policy's objective.
     _policy_optimizer : torch.optim.Optimizer