Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Dreamer-V3 algo #71

Merged
merged 2 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sheeprl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from sheeprl.algos.dreamer_v1 import dreamer_v1
from sheeprl.algos.dreamer_v2 import dreamer_v2
from sheeprl.algos.dreamer_v3 import dreamer_v3
from sheeprl.algos.droq import droq
from sheeprl.algos.p2e_dv1 import p2e_dv1
from sheeprl.algos.p2e_dv2 import p2e_dv2
Expand Down
6 changes: 5 additions & 1 deletion sheeprl/algos/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ class StandardArgs:
screen_size: int = Arg(default=64, help="the size of the pixel-from observations (if any)")
frame_stack: int = Arg(default=-1, help="how many frame to stack (only for pixel-like observations)")
frame_stack_dilation: int = Arg(default=1, help="the dilation between the stacked frames, 1 no dilation")
max_episode_steps: int = Arg(default=-1)
max_episode_steps: int = Arg(
default=-1,
help="the maximum duration in terms of number of steps of an episode, -1 to disable. "
"This value will be divided by the `action_repeat` value during the environment creation.",
)

def __setattr__(self, __name: str, __value: Any) -> None:
super().__setattr__(__name, __value)
Expand Down
2 changes: 1 addition & 1 deletion sheeprl/algos/dreamer_v1/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def __init__(
self.continue_model = continue_model


class Player(nn.Module):
class PlayerDV1(nn.Module):
"""The model of the DreamerV1 player.

Args:
Expand Down
4 changes: 3 additions & 1 deletion sheeprl/algos/dreamer_v1/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ class DreamerV1Args(StandardArgs):
)
action_repeat: int = Arg(default=2, help="the number of times an action is repeated")
max_episode_steps: int = Arg(
default=1000, help="the maximum duration in terms of number of steps of an episode, -1 to disable"
default=1000,
help="the maximum duration in terms of number of steps of an episode, -1 to disable. "
"This value will be divided by the `action_repeat` value during the environment creation.",
)
atari_noop_max: int = Arg(
default=30,
Expand Down
12 changes: 6 additions & 6 deletions sheeprl/algos/dreamer_v1/dreamer_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from torch.utils.data import BatchSampler
from torchmetrics import MeanMetric

from sheeprl.algos.dreamer_v1.agent import Player, WorldModel, build_models
from sheeprl.algos.dreamer_v1.agent import PlayerDV1, WorldModel, build_models
from sheeprl.algos.dreamer_v1.args import DreamerV1Args
from sheeprl.algos.dreamer_v1.loss import actor_loss, critic_loss, reconstruction_loss
from sheeprl.algos.dreamer_v2.utils import test
Expand Down Expand Up @@ -209,8 +209,8 @@ def train(
aggregator.update("Loss/state_loss", state_loss.detach())
aggregator.update("Loss/continue_loss", continue_loss.detach())
aggregator.update("State/kl", kl.detach())
aggregator.update("State/p_entropy", p.entropy().mean().detach())
aggregator.update("State/q_entropy", q.entropy().mean().detach())
aggregator.update("State/post_entropy", p.entropy().mean().detach())
aggregator.update("State/prior_entropy", q.entropy().mean().detach())

# Behaviour Learning
# unflatten first 2 dimensions of recurrent and posterior states in order to have all the states on the first dimension.
Expand Down Expand Up @@ -443,7 +443,7 @@ def main():
state["actor"] if args.checkpoint_path else None,
state["critic"] if args.checkpoint_path else None,
)
player = Player(
player = PlayerDV1(
world_model.encoder.module,
world_model.rssm.recurrent_model.module,
world_model.rssm.representation_model.module,
Expand Down Expand Up @@ -482,8 +482,8 @@ def main():
"Loss/reward_loss": MeanMetric(sync_on_compute=False),
"Loss/state_loss": MeanMetric(sync_on_compute=False),
"Loss/continue_loss": MeanMetric(sync_on_compute=False),
"State/p_entropy": MeanMetric(sync_on_compute=False),
"State/q_entropy": MeanMetric(sync_on_compute=False),
"State/post_entropy": MeanMetric(sync_on_compute=False),
"State/prior_entropy": MeanMetric(sync_on_compute=False),
"State/kl": MeanMetric(sync_on_compute=False),
"Params/exploration_amout": MeanMetric(sync_on_compute=False),
"Grads/world_model": MeanMetric(sync_on_compute=False),
Expand Down
88 changes: 81 additions & 7 deletions sheeprl/algos/dreamer_v2/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,24 @@


class CNNEncoder(nn.Module):
"""The Dreamer-V2 image encoder. This is composed of 4 `nn.Conv2d` with
kernel_size=3, stride=2 and padding=1. No bias is used if a `nn.LayerNorm`
is used after the convolution. This 4-stages model assumes that the image
is a 64x64. If more than one image is to be encoded, then those will
be concatenated on the channel dimension and fed to the encoder.

Args:
keys (Sequence[str]): the keys representing the image observations to encode.
input_channels (Sequence[int]): the input channels, one for each image observation to encode.
image_size (Tuple[int, int]): the image size as (Height,Width).
channels_multiplier (int): the multiplier for the output channels. Given the 4 stages, the 4 output channels
will be [1, 2, 4, 8] * `channels_multiplier`.
layer_norm (bool, optional): whether to apply the layer normalization.
Defaults to True.
activation (ModuleType, optional): the activation function.
Defaults to nn.ELU.
"""

def __init__(
self,
keys: Sequence[str],
Expand Down Expand Up @@ -59,6 +77,24 @@ def forward(self, obs: Dict[str, Tensor]) -> Tensor:


class MLPEncoder(nn.Module):
"""The Dreamer-V3 vector encoder. This is composed of N `nn.Linear` layers, where
N is specified by `mlp_layers`. No bias is used if a `nn.LayerNorm` is used after the linear layer.
If more than one vector is to be encoded, then those will concatenated on the last
dimension before being fed to the encoder.

Args:
keys (Sequence[str]): the keys representing the vector observations to encode.
input_dims (Sequence[int]): the dimensions of every vector to encode.
mlp_layers (int, optional): how many mlp layers.
Defaults to 4.
dense_units (int, optional): the dimension of every mlp.
Defaults to 512.
layer_norm (bool, optional): whether to apply the layer normalization.
Defaults to True.
activation (ModuleType, optional): the activation function after every layer.
Defaults to nn.ELU.
"""

def __init__(
self,
keys: Sequence[str],
Expand Down Expand Up @@ -87,6 +123,25 @@ def forward(self, obs: Dict[str, Tensor]) -> Tensor:


class CNNDecoder(nn.Module):
"""The almost-exact inverse of the `CNNEncoder` class, where in 4 stages it reconstructs
the observation image to 64x64. If multiple images are to be reconstructed,
then it will create a dictionary with an entry for every reconstructed image.
No bias is used if a `nn.LayerNorm` is used after the `nn.Conv2dTranspose` layer.

Args:
keys (Sequence[str]): the keys of the image observation to be reconstructed.
output_channels (Sequence[int]): the output channels, one for every image observation.
channels_multiplier (int): the channels multiplier, same for the encoder network.
latent_state_size (int): the size of the latent state. Before applying the decoder,
a `nn.Linear` layer is used to project the latent state to a feature vector.
cnn_encoder_output_dim (int): the output of the image encoder.
image_size (Tuple[int, int]): the final image size.
activation (nn.Module, optional): the activation function.
Defaults to nn.ELU.
layer_norm (bool, optional): whether to apply the layer normalization.
Defaults to True.
"""

def __init__(
self,
keys: Sequence[str],
Expand Down Expand Up @@ -137,6 +192,25 @@ def forward(self, latent_states: Tensor) -> Dict[str, Tensor]:


class MLPDecoder(nn.Module):
"""The exact inverse of the MLPEncoder. This is composed of N `nn.Linear` layers, where
N is specified by `mlp_layers`. No bias is used if a `nn.LayerNorm` is used after the linear layer.
If more than one vector is to be decoded, then it will create a dictionary with an entry
for every reconstructed vector.

Args:
keys (Sequence[str]): the keys representing the vector observations to decode.
output_dims (Sequence[int]): the dimensions of every vector to decode.
latent_state_size (int): the dimension of the latent state.
mlp_layers (int, optional): how many mlp layers.
Defaults to 4.
dense_units (int, optional): the dimension of every mlp.
Defaults to 512.
layer_norm (bool, optional): whether to apply the layer normalization.
Defaults to True.
activation (ModuleType, optional): the activation function after every layer.
Defaults to nn.ELU.
"""

def __init__(
self,
keys: Sequence[str],
Expand Down Expand Up @@ -168,8 +242,10 @@ def forward(self, latent_states: Tensor) -> Dict[str, Tensor]:


class RecurrentModel(nn.Module):
"""
Recurrent model for the model-base Dreamer agent.
"""Recurrent model for the model-base Dreamer-V3 agent.
This implementation uses the `sheeprl.models.models.LayerNormGRUCell`, which combines
the standard GRUCell from PyTorch with the `nn.LayerNorm`, where the normalization is applied
right after having computed the projection from the input to the weight space.

Args:
input_size (int): the input size of the model.
Expand Down Expand Up @@ -559,7 +635,7 @@ def __init__(
self.continue_model = continue_model


class Player(nn.Module):
class PlayerDV2(nn.Module):
"""
The model of the Dreamer_v1 player.

Expand Down Expand Up @@ -605,7 +681,6 @@ def __init__(
self.discrete_size = discrete_size
self.recurrent_state_size = recurrent_state_size
self.num_envs = num_envs
self.init_states()

def init_states(self, reset_envs: Optional[Sequence[int]] = None) -> None:
"""Initialize the states and the actions for the ended environments.
Expand Down Expand Up @@ -751,7 +826,6 @@ def build_models(
# Sizes
stochastic_size = args.stochastic_size * args.discrete_size
latent_state_size = stochastic_size + args.recurrent_state_size
mlp_dims = [obs_space[k].shape[0] for k in mlp_keys]

# Define models
cnn_encoder = (
Expand All @@ -769,7 +843,7 @@ def build_models(
mlp_encoder = (
MLPEncoder(
keys=mlp_keys,
input_dims=mlp_dims,
input_dims=[obs_space[k].shape[0] for k in mlp_keys],
mlp_layers=args.mlp_layers,
dense_units=args.dense_units,
activation=dense_act,
Expand Down Expand Up @@ -826,7 +900,7 @@ def build_models(
mlp_decoder = (
MLPDecoder(
keys=mlp_keys,
output_dims=mlp_dims,
output_dims=[obs_space[k].shape[0] for k in mlp_keys],
latent_state_size=latent_state_size,
mlp_layers=args.mlp_layers,
dense_units=args.dense_units,
Expand Down
4 changes: 3 additions & 1 deletion sheeprl/algos/dreamer_v2/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ class DreamerV2Args(StandardArgs):
max_step_expl_decay: int = Arg(default=0, help="the maximum number of decay steps")
action_repeat: int = Arg(default=2, help="the number of times an action is repeated")
max_episode_steps: int = Arg(
default=1000, help="the maximum duration in terms of number of steps of an episode, -1 to disable"
default=1000,
help="the maximum duration in terms of number of steps of an episode, -1 to disable. "
"This value will be divided by the `action_repeat` value during the environment creation.",
)
atari_noop_max: int = Arg(
default=30,
Expand Down
Loading