Fixed shape for MultiStep returns + Distributional loss

pytorch · Jul 5, 2024 · 5546b57 · 5546b57
1 parent 55f0a52
commit 5546b57
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
@@ -560,7 +560,7 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
             support = support.to("cpu")
             pns_a = pns_a.to("cpu")
 
-            Tz = reward + (1 - terminated.to(reward.dtype)) * discount * support
+            Tz = reward + (1 - terminated.to(reward.dtype)) * discount.unsqueeze(-1) * support.repeat(batch_size, 1)
             if Tz.shape != torch.Size([batch_size, atoms]):
                 raise RuntimeError(
                     "Tz shape must be torch.Size([batch_size, atoms]), "