dqn.rst APIs(#1)

agi-brain · Nov 30, 2023 · 8954c67 · 8954c67
1 parent 2953516
commit 8954c67
Showing 1 changed file with 93 additions and 7 deletions.
diff --git a/docs/source/documents/api/learners/drl/dqn.rst b/docs/source/documents/api/learners/drl/dqn.rst
@@ -7,6 +7,39 @@ DQN_Learner
 
 **PyTorch:**
 
+.. py:class::
+  xuance.torch.learners.qlearning_family.dqn_learner.DQN_Learner(policy, optimizer, scheduler, device, model_dir, gamma, sync_frequency)
+
+  :param policy: xxxxxx.
+  :type policy: xxxxxx
+  :param optimizer: xxxxxx.
+  :type optimizer: xxxxxx
+  :param scheduler: xxxxxx.
+  :type scheduler: xxxxxx
+  :param device: xxxxxx.
+  :type device: xxxxxx
+  :param model_dir: xxxxxx.
+  :type model_dir: xxxxxx
+  :param gamma: xxxxxx.
+  :type gamma: xxxxxx
+  :param sync_frequency: xxxxxx.
+  :type sync_frequency: xxxxxx
+
+.. py:function::
+  xuance.torch.learners.qlearning_family.dqn_learner.DQN_Learner.update(obs_batch, act_batch, rew_batch, next_batch, terminal_batch)
+
+  :param obs_batch: xxxxxx.
+  :type obs_batch: xxxxxx
+  :param act_batch: xxxxxx.
+  :type act_batch: xxxxxx
+  :param rew_batch: xxxxxx.
+  :type rew_batch: xxxxxx
+  :param next_batch: xxxxxx.
+  :type next_batch: xxxxxx
+  :param terminal_batch: xxxxxx.
+  :type terminal_batch: xxxxxx
+  :return: xxxxxx.
+  :rtype: xxxxxx
 
 .. raw:: html
 
@@ -28,18 +61,71 @@ Source Code
 -----------------
 
 .. tabs::
-
-    .. group-tab:: PyTorch
 
-        .. code-block:: python3
+  .. group-tab:: PyTorch
 
+    .. code-block:: python
 
+       from xuance.torch.learners import *
 
 
-    .. group-tab:: TensorFlow
+        class DQN_Learner(Learner):
+            def __init__(self,
+                         policy: nn.Module,
+                         optimizer: torch.optim.Optimizer,
+                         scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None,
+                         device: Optional[Union[int, str, torch.device]] = None,
+                         model_dir: str = "./",
+                         gamma: float = 0.99,
+                         sync_frequency: int = 100):
+                self.gamma = gamma
+                self.sync_frequency = sync_frequency
+                super(DQN_Learner, self).__init__(policy, optimizer, scheduler, device, model_dir)
 
-        .. code-block:: python3
+            def update(self, obs_batch, act_batch, rew_batch, next_batch, terminal_batch):
+                self.iterations += 1
+                act_batch = torch.as_tensor(act_batch, device=self.device)
+                rew_batch = torch.as_tensor(rew_batch, device=self.device)
+                ter_batch = torch.as_tensor(terminal_batch, device=self.device)
 
-    .. group-tab:: MindSpore
+                _, _, evalQ = self.policy(obs_batch)
+                _, _, targetQ = self.policy.target(next_batch)
+                targetQ = targetQ.max(dim=-1).values
+                targetQ = rew_batch + self.gamma * (1 - ter_batch) * targetQ
+                predictQ = (evalQ * F.one_hot(act_batch.long(), evalQ.shape[1])).sum(dim=-1)
 
-        .. code-block:: python3
+                loss = F.mse_loss(predictQ, targetQ)
+                self.optimizer.zero_grad()
+                loss.backward()
+                self.optimizer.step()
+                if self.scheduler is not None:
+                    self.scheduler.step()
+
+                # hard update for target network
+                if self.iterations % self.sync_frequency == 0:
+                    self.policy.copy_target()
+                lr = self.optimizer.state_dict()['param_groups'][0]['lr']
+
+                info = {
+                    "Qloss": loss.item(),
+                    "learning_rate": lr,
+                    "predictQ": predictQ.mean().item()
+                }
+
+                return info
+
+
+
+
+
+
+
+
+  .. group-tab:: TensorFlow
+
+    .. code-block:: python
+
+
+  .. group-tab:: MindSpore
+
+    .. code-block:: python