[Deprecations] Deprecate in view of v0.6 release

ghstack-source-id: 44add0a6a9130e23598eba798d175e946727acca Pull Request resolved: #2446
pytorch · Sep 20, 2024 · 81fce90 · 81fce90
1 parent e294c68
commit 81fce90
Show file tree

Hide file tree

Showing 18 changed files with 32 additions and 133 deletions.
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -979,11 +979,9 @@ Helpers
 
     RandomPolicy
     check_env_specs
-    exploration_mode #deprecated
     exploration_type
     get_available_libraries
     make_composite_from_td
-    set_exploration_mode #deprecated
     set_exploration_type
     step_mdp
     terminated_or_truncated

diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
@@ -62,13 +62,13 @@ Exploration wrappers and modules
 
 To efficiently explore the environment, TorchRL proposes a series of modules
 that will override the action sampled by the policy by a noisier version.
-Their behavior is controlled by :func:`~torchrl.envs.utils.exploration_mode`:
-if the exploration is set to ``"random"``, the exploration is active. In all
+Their behavior is controlled by :func:`~torchrl.envs.utils.exploration_type`:
+if the exploration is set to ``ExplorationType.RANDOM``, the exploration is active. In all
 other cases, the action written in the tensordict is simply the network output.
 
 .. note:: Unlike other exploration modules, :class:`~torchrl.modules.ConsistentDropoutModule`
   uses the ``train``/``eval`` mode to comply with the regular `Dropout` API in PyTorch.
-  The :func:`~torchrl.envs.utils.set_exploration_mode` context manager will have no effect on
+  The :func:`~torchrl.envs.utils.set_exploration_type` context manager will have no effect on
   this module.
 
 .. currentmodule:: torchrl.modules

diff --git a/examples/distributed/collectors/multi_nodes/ray_train.py b/examples/distributed/collectors/multi_nodes/ray_train.py
@@ -26,7 +26,7 @@
     TransformedEnv,
 )
 from torchrl.envs.libs.gym import GymEnv
-from torchrl.envs.utils import check_env_specs, set_exploration_mode
+from torchrl.envs.utils import check_env_specs, ExplorationType, set_exploration_type
 from torchrl.modules import ProbabilisticActor, TanhNormal, ValueOperator
 from torchrl.objectives import ClipPPOLoss
 from torchrl.objectives.value import GAE
@@ -201,7 +201,7 @@
         stepcount_str = f"step count (max): {logs['step_count'][-1]}"
         logs["lr"].append(optim.param_groups[0]["lr"])
         lr_str = f"lr policy: {logs['lr'][-1]: 4.4f}"
-        with set_exploration_mode("mean"), torch.no_grad():
+        with set_exploration_type(ExplorationType.MODE), torch.no_grad():
             # execute a rollout with the trained policy
             eval_rollout = env.rollout(1000, policy_module)
             logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())

diff --git a/sota-implementations/decision_transformer/utils.py b/sota-implementations/decision_transformer/utils.py
@@ -38,7 +38,7 @@
 )
 from torchrl.envs.libs.dm_control import DMControlEnv
 from torchrl.envs.libs.gym import set_gym_backend
-from torchrl.envs.utils import set_exploration_mode
+from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.modules import (
     DTActor,
     OnlineDTActor,
@@ -374,13 +374,12 @@ def make_odt_model(cfg):
         module=actor_module,
         distribution_class=dist_class,
         distribution_kwargs=dist_kwargs,
-        default_interaction_mode="random",
         cache_dist=False,
         return_log_prob=False,
     )
 
     # init the lazy layers
-    with torch.no_grad(), set_exploration_mode("random"):
+    with torch.no_grad(), set_exploration_type(ExplorationType.RANDOM):
         td = proof_environment.rollout(max_steps=100)
         td["action"] = td["next", "action"]
         actor(td)
@@ -428,13 +427,12 @@ def make_dt_model(cfg):
         module=actor_module,
         distribution_class=dist_class,
         distribution_kwargs=dist_kwargs,
-        default_interaction_mode="random",
         cache_dist=False,
         return_log_prob=False,
     )
 
     # init the lazy layers
-    with torch.no_grad(), set_exploration_mode("random"):
+    with torch.no_grad(), set_exploration_type(ExplorationType.RANDOM):
         td = proof_environment.rollout(max_steps=100)
         td["action"] = td["next", "action"]
         actor(td)

diff --git a/sota-implementations/redq/config.yaml b/sota-implementations/redq/config.yaml
@@ -36,7 +36,6 @@ collector:
   multi_step: 1
   n_steps_return: 3
   max_frames_per_traj: -1
-  exploration_mode: random
 
 logger:
   backend: wandb

diff --git a/sota-implementations/redq/utils.py b/sota-implementations/redq/utils.py
@@ -1021,7 +1021,7 @@ def make_collector_offpolicy(
         "init_random_frames": cfg.collector.init_random_frames,
         "split_trajs": True,
         # trajectories must be separated if multi-step is used
-        "exploration_type": ExplorationType.from_str(cfg.collector.exploration_mode),
+        "exploration_type": cfg.collector.exploration_type,
     }
 
     collector = collector_helper(**collector_helper_kwargs)

diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -444,7 +444,6 @@ def __init__(
         postproc: Callable[[TensorDictBase], TensorDictBase] | None = None,
         split_trajs: bool | None = None,
         exploration_type: ExplorationType = DEFAULT_EXPLORATION_TYPE,
-        exploration_mode: str | None = None,
         return_same_td: bool = False,
         reset_when_done: bool = True,
         interruptor=None,
@@ -456,9 +455,6 @@ def __init__(
         from torchrl.envs.batched_envs import BatchedEnvBase
 
         self.closed = True
-        exploration_type = _convert_exploration_type(
-            exploration_mode=exploration_mode, exploration_type=exploration_type
-        )
         if create_env_kwargs is None:
             create_env_kwargs = {}
         if not isinstance(create_env_fn, EnvBase):
@@ -1421,7 +1417,7 @@ class _MultiDataCollector(DataCollectorBase):
             A ``cat_results`` value of ``-1`` will always concatenate results along the
             time dimension. This should be preferred over the default. Intermediate values
             are also accepted.
-            Defaults to ``0``.
+            Defaults to ``"stack"``.
 
             .. note:: From v0.5, this argument will default to ``"stack"`` for a better
                 interoperability with the rest of the library.
@@ -1462,7 +1458,6 @@ def __init__(
         postproc: Optional[Callable[[TensorDictBase], TensorDictBase]] = None,
         split_trajs: Optional[bool] = None,
         exploration_type: ExplorationType = DEFAULT_EXPLORATION_TYPE,
-        exploration_mode=None,
         reset_when_done: bool = True,
         update_at_each_batch: bool = False,
         preemptive_threshold: float = None,
@@ -1474,9 +1469,6 @@ def __init__(
         replay_buffer: ReplayBuffer | None = None,
         replay_buffer_chunk: bool = True,
     ):
-        exploration_type = _convert_exploration_type(
-            exploration_mode=exploration_mode, exploration_type=exploration_type
-        )
         self.closed = True
         self.num_workers = len(create_env_fn)
 
@@ -2156,19 +2148,6 @@ def iterator(self) -> Iterator[TensorDictBase]:
         cat_results = self.cat_results
         if cat_results is None:
             cat_results = "stack"
-            warnings.warn(
-                f"`cat_results` was not specified in the constructor of {type(self).__name__}. "
-                f"For MultiSyncDataCollector, `cat_results` indicates how the data should "
-                f"be packed: the preferred option and current default is `cat_results='stack'` "
-                f"which provides the best interoperability across torchrl components. "
-                f"Other accepted values are `cat_results=0` (previous behavior) and "
-                f"`cat_results=-1` (cat along time dimension). Among these two, the latter "
-                f"should be preferred for consistency across environment configurations. "
-                f"Currently, the default value is `'stack'`."
-                f"From v0.6 onward, this warning will be removed. "
-                f"To suppress this warning, set `cat_results` to the desired value.",
-                category=DeprecationWarning,
-            )
 
         self.buffers = {}
         dones = [False for _ in range(self.num_workers)]
@@ -2749,7 +2728,6 @@ def __init__(
         postproc: Optional[Callable[[TensorDictBase], TensorDictBase]] = None,
         split_trajs: Optional[bool] = None,
         exploration_type: ExplorationType = DEFAULT_EXPLORATION_TYPE,
-        exploration_mode=None,
         reset_when_done: bool = True,
         update_at_each_batch: bool = False,
         preemptive_threshold: float = None,
@@ -2774,7 +2752,6 @@ def __init__(
             env_device=env_device,
             storing_device=storing_device,
             exploration_type=exploration_type,
-            exploration_mode=exploration_mode,
             reset_when_done=reset_when_done,
             update_at_each_batch=update_at_each_batch,
             preemptive_threshold=preemptive_threshold,

diff --git a/torchrl/collectors/distributed/generic.py b/torchrl/collectors/distributed/generic.py
@@ -426,7 +426,6 @@ def __init__(
         postproc: Callable | None = None,
         split_trajs: bool = False,
         exploration_type: "ExporationType" = DEFAULT_EXPLORATION_TYPE,  # noqa
-        exploration_mode: str = None,
         collector_class: Type = SyncDataCollector,
         collector_kwargs: dict = None,
         num_workers_per_collector: int = 1,
@@ -438,9 +437,6 @@ def __init__(
         launcher: str = "submitit",
         tcp_port: int = None,
     ):
-        exploration_type = _convert_exploration_type(
-            exploration_mode=exploration_mode, exploration_type=exploration_type
-        )
 
         if collector_class == "async":
             collector_class = MultiaSyncDataCollector

diff --git a/torchrl/collectors/distributed/rpc.py b/torchrl/collectors/distributed/rpc.py
@@ -275,7 +275,6 @@ def __init__(
         postproc: Callable | None = None,
         split_trajs: bool = False,
         exploration_type: "ExporationType" = DEFAULT_EXPLORATION_TYPE,  # noqa
-        exploration_mode: str = None,
         collector_class=SyncDataCollector,
         collector_kwargs=None,
         num_workers_per_collector=1,
@@ -288,9 +287,6 @@ def __init__(
         visible_devices=None,
         tensorpipe_options=None,
     ):
-        exploration_type = _convert_exploration_type(
-            exploration_mode=exploration_mode, exploration_type=exploration_type
-        )
         if collector_class == "async":
             collector_class = MultiaSyncDataCollector
         elif collector_class == "sync":

diff --git a/torchrl/collectors/distributed/sync.py b/torchrl/collectors/distributed/sync.py
@@ -291,7 +291,6 @@ def __init__(
         postproc: Callable | None = None,
         split_trajs: bool = False,
         exploration_type: "ExporationType" = DEFAULT_EXPLORATION_TYPE,  # noqa
-        exploration_mode: str = None,
         collector_class=SyncDataCollector,
         collector_kwargs=None,
         num_workers_per_collector=1,
@@ -302,9 +301,6 @@ def __init__(
         launcher="submitit",
         tcp_port=None,
     ):
-        exploration_type = _convert_exploration_type(
-            exploration_mode=exploration_mode, exploration_type=exploration_type
-        )
 
         if collector_class == "async":
             collector_class = MultiaSyncDataCollector

diff --git a/torchrl/envs/__init__.py b/torchrl/envs/__init__.py
@@ -102,12 +102,10 @@
 from .utils import (
     check_env_specs,
     check_marl_grouping,
-    exploration_mode,
     exploration_type,
     ExplorationType,
     make_composite_from_td,
     MarlGroupMapType,
-    set_exploration_mode,
     set_exploration_type,
     step_mdp,
 )
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -2136,15 +2136,23 @@ class UnsqueezeTransform(Transform):
     """Inserts a dimension of size one at the specified position.
 
     Args:
-        unsqueeze_dim (int): dimension to unsqueeze. Must be negative (or allow_positive_dim
+        dim (int): dimension to unsqueeze. Must be negative (or allow_positive_dim
             must be turned on).
+
+    Keyword Args:
         allow_positive_dim (bool, optional): if ``True``, positive dimensions are accepted.
-            :obj:`UnsqueezeTransform` will map these to the n^th feature dimension
+            `UnsqueezeTransform`` will map these to the n^th feature dimension
             (ie n^th dimension after batch size of parent env) of the input tensor,
-            independently from the tensordict batch size (ie positive dims may be
+            independently of the tensordict batch size (ie positive dims may be
             dangerous in contexts where tensordict of different batch dimension
             are passed).
             Defaults to False, ie. non-negative dimensions are not permitted.
+        in_keys (list of NestedKeys): input entries (read).
+        out_keys (list of NestedKeys): input entries (write). Defaults to ``in_keys`` if
+            not provided.
+        in_keys_inv (list of NestedKeys): input entries (read) during :meth:`~.inv` calls.
+        out_keys_inv (list of NestedKeys): input entries (write) during :meth:`~.inv` calls.
+            Defaults to ``in_keys_in`` if not provided.
     """
 
     invertible = True
@@ -2157,20 +2165,13 @@ def __new__(cls, *args, **kwargs):
     def __init__(
         self,
         dim: int = None,
+        *,
         allow_positive_dim: bool = False,
         in_keys: Sequence[NestedKey] | None = None,
         out_keys: Sequence[NestedKey] | None = None,
         in_keys_inv: Sequence[NestedKey] | None = None,
         out_keys_inv: Sequence[NestedKey] | None = None,
-        **kwargs,
     ):
-        if "unsqueeze_dim" in kwargs:
-            warnings.warn(
-                "The `unsqueeze_dim` kwarg will be removed in v0.6. Please use `dim` instead."
-            )
-            dim = kwargs["unsqueeze_dim"]
-        elif dim is None:
-            raise TypeError("dim must be provided.")
         if in_keys is None:
             in_keys = []  # default
         if out_keys is None:

diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
@@ -32,13 +32,8 @@
 from tensordict.base import _is_leaf_nontensor
 from tensordict.nn import TensorDictModule, TensorDictModuleBase
 from tensordict.nn.probabilistic import (  # noqa
-    # Note: the `set_interaction_mode` and their associated arg `default_interaction_mode` are being deprecated!
-    #       Please use the `set_/interaction_type` ones above with the InteractionType enum instead.
-    #       See more details: https://github.com/pytorch/rl/issues/1016
-    interaction_mode as exploration_mode,
     interaction_type as exploration_type,
     InteractionType as ExplorationType,
-    set_interaction_mode as set_exploration_mode,
     set_interaction_type as set_exploration_type,
 )
 from tensordict.utils import is_non_tensor, NestedKey
@@ -55,9 +50,7 @@
 from torchrl.data.utils import check_no_exclusive_keys
 
 __all__ = [
-    "exploration_mode",
     "exploration_type",
-    "set_exploration_mode",
     "set_exploration_type",
     "ExplorationType",
     "check_env_specs",
@@ -79,9 +72,7 @@
 )
 
 
-def _convert_exploration_type(*, exploration_mode, exploration_type):
-    if exploration_mode is not None:
-        return ExplorationType.from_str(exploration_mode)
+def _convert_exploration_type(*, exploration_type):
     return exploration_type