pytorch · vmoens · Oct 8, 2024 · Sep 20, 2024 · Sep 20, 2024 · Oct 4, 2024
diff --git a/.github/unittest/linux/scripts/run_all.sh b/.github/unittest/linux/scripts/run_all.sh
@@ -88,9 +88,7 @@ conda deactivate
 conda activate "${env_dir}"
 
 echo "installing gymnasium"
-pip3 install "gymnasium"
-pip3 install ale_py
-pip3 install mo-gymnasium[mujoco]  # requires here bc needs mujoco-py
+pip3 install "gymnasium[atari,accept-rom-license,mujoco]<1.0" mo-gymnasium[mujoco]
 pip3 install "mujoco" -U
 
 # sanity check: remove?

diff --git a/.github/unittest/linux_distributed/scripts/setup_env.sh b/.github/unittest/linux_distributed/scripts/setup_env.sh
@@ -119,7 +119,7 @@ if [[ $OSTYPE != 'darwin'* ]]; then
     rm ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
   fi
   echo "installing gymnasium"
-  pip install "gymnasium[atari,accept-rom-license]"
+  pip install "gymnasium[atari,accept-rom-license]<1.0"
 else
-  pip install "gymnasium[atari,accept-rom-license]"
+  pip install "gymnasium[atari,accept-rom-license]<1.0"
 fi
diff --git a/.github/unittest/linux_examples/scripts/run_all.sh b/.github/unittest/linux_examples/scripts/run_all.sh
@@ -130,7 +130,7 @@ elif [[ $PY_VERSION == *"3.11"* ]]; then
   pip install ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
   rm ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 fi
-pip install "gymnasium[atari,accept-rom-license]"
+pip install "gymnasium[atari,accept-rom-license]<1.0"
 
 # ============================================================================================ #
 # ================================ PyTorch & TorchRL ========================================= #

diff --git a/.github/unittest/linux_libs/scripts_envpool/setup_env.sh b/.github/unittest/linux_libs/scripts_envpool/setup_env.sh
@@ -82,9 +82,9 @@ if [[ $OSTYPE != 'darwin'* ]]; then
   fi
   echo "installing gym"
   # envpool does not currently work with gymnasium
-  pip install "gym[atari,accept-rom-license]"
+  pip install "gym[atari,accept-rom-license]<1.0"
 else
-  pip install "gym[atari,accept-rom-license]"
+  pip install "gym[atari,accept-rom-license]<1.0"
 fi
 pip install envpool treevalue
 

diff --git a/.github/unittest/linux_libs/scripts_gym/batch_scripts.sh b/.github/unittest/linux_libs/scripts_gym/batch_scripts.sh
@@ -140,7 +140,7 @@ conda deactivate
 conda create --prefix ./cloned_env --clone ./env -y
 conda activate ./cloned_env
 
-pip3 install 'gymnasium[accept-rom-license,ale-py,atari]' mo-gymnasium gymnasium-robotics -U
+pip3 install 'gymnasium[accept-rom-license,ale-py,atari]<1.0' mo-gymnasium gymnasium-robotics -U
 
 $DIR/run_test.sh
 

diff --git a/.github/unittest/linux_libs/scripts_robohive/environment.yml b/.github/unittest/linux_libs/scripts_robohive/environment.yml
@@ -6,7 +6,7 @@ dependencies:
   - protobuf
   - pip:
     # Initial version is required to install Atari ROMS in setup_env.sh
-    - gymnasium
+    - gymnasium<1.0
     - hypothesis
     - future
     - cloudpickle

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -3,6 +3,7 @@ name: Generate documentation
 on:
   push:
     branches:
+      - nightly
       - main
       - release/*
     tags:
@@ -21,7 +22,7 @@ jobs:
   build-docs:
     strategy:
       matrix:
-        python_version: ["3.9"]
+        python_version: ["3.10"]
         cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
@@ -33,7 +34,7 @@ jobs:
       script: |
         set -e
         set -v
-        apt-get update && apt-get install -y git wget gcc g++
+        apt-get update && apt-get install -y -f git wget gcc g++ dialog apt-utils
         root_dir="$(pwd)"
         conda_dir="${root_dir}/conda"
         env_dir="${root_dir}/env"
@@ -45,14 +46,14 @@ jobs:
         bash ./miniconda.sh -b -f -p "${conda_dir}"
         eval "$(${conda_dir}/bin/conda shell.bash hook)"
         printf "* Creating a test environment\n"
-        conda create --prefix "${env_dir}" -y python=3.8
+        conda create --prefix "${env_dir}" -y python=3.10
         printf "* Activating\n"
         conda activate "${env_dir}"
-        
+
         # 2. upgrade pip, ninja and packaging
-        # apt-get install python3.9 python3-pip -y
+        apt-get install python3-pip unzip -y -f
         python3 -m pip install --upgrade pip
-        python3 -m pip install setuptools ninja packaging -U
+        python3 -m pip install setuptools ninja packaging cmake -U
 
         # 3. check python version
         python3 --version

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -16,9 +16,7 @@ sphinx_design
 torchvision
 dm_control
 mujoco
-atari-py
-ale-py
-gym[classic_control,accept-rom-license]
+gym[classic_control,accept-rom-license,ale-py,atari]
 pygame
 tqdm
 ipython

diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -996,11 +996,9 @@ Helpers
 
     RandomPolicy
     check_env_specs
-    exploration_mode #deprecated
     exploration_type
     get_available_libraries
     make_composite_from_td
-    set_exploration_mode #deprecated
     set_exploration_type
     step_mdp
     terminated_or_truncated

diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
@@ -62,13 +62,13 @@ Exploration wrappers and modules
 
 To efficiently explore the environment, TorchRL proposes a series of modules
 that will override the action sampled by the policy by a noisier version.
-Their behavior is controlled by :func:`~torchrl.envs.utils.exploration_mode`:
-if the exploration is set to ``"random"``, the exploration is active. In all
+Their behavior is controlled by :func:`~torchrl.envs.utils.exploration_type`:
+if the exploration is set to ``ExplorationType.RANDOM``, the exploration is active. In all
 other cases, the action written in the tensordict is simply the network output.
 
 .. note:: Unlike other exploration modules, :class:`~torchrl.modules.ConsistentDropoutModule`
   uses the ``train``/``eval`` mode to comply with the regular `Dropout` API in PyTorch.
-  The :func:`~torchrl.envs.utils.set_exploration_mode` context manager will have no effect on
+  The :func:`~torchrl.envs.utils.set_exploration_type` context manager will have no effect on
   this module.
 
 .. currentmodule:: torchrl.modules

diff --git a/examples/distributed/collectors/multi_nodes/ray_train.py b/examples/distributed/collectors/multi_nodes/ray_train.py
@@ -26,7 +26,7 @@
     TransformedEnv,
 )
 from torchrl.envs.libs.gym import GymEnv
-from torchrl.envs.utils import check_env_specs, set_exploration_mode
+from torchrl.envs.utils import check_env_specs, ExplorationType, set_exploration_type
 from torchrl.modules import ProbabilisticActor, TanhNormal, ValueOperator
 from torchrl.objectives import ClipPPOLoss
 from torchrl.objectives.value import GAE
@@ -85,8 +85,8 @@
         in_keys=["loc", "scale"],
         distribution_class=TanhNormal,
         distribution_kwargs={
-            "min": env.action_spec.space.low,
-            "max": env.action_spec.space.high,
+            "low": env.action_spec.space.low,
+            "high": env.action_spec.space.high,
         },
         return_log_prob=True,
     )
@@ -201,7 +201,7 @@
         stepcount_str = f"step count (max): {logs['step_count'][-1]}"
         logs["lr"].append(optim.param_groups[0]["lr"])
         lr_str = f"lr policy: {logs['lr'][-1]: 4.4f}"
-        with set_exploration_mode("mean"), torch.no_grad():
+        with set_exploration_type(ExplorationType.MODE), torch.no_grad():
             # execute a rollout with the trained policy
             eval_rollout = env.rollout(1000, policy_module)
             logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())

diff --git a/sota-implementations/decision_transformer/utils.py b/sota-implementations/decision_transformer/utils.py
@@ -38,7 +38,7 @@
 )
 from torchrl.envs.libs.dm_control import DMControlEnv
 from torchrl.envs.libs.gym import set_gym_backend
-from torchrl.envs.utils import set_exploration_mode
+from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.modules import (
     DTActor,
     OnlineDTActor,
@@ -374,13 +374,12 @@ def make_odt_model(cfg):
         module=actor_module,
         distribution_class=dist_class,
         distribution_kwargs=dist_kwargs,
-        default_interaction_mode="random",
         cache_dist=False,
         return_log_prob=False,
     )
 
     # init the lazy layers
-    with torch.no_grad(), set_exploration_mode("random"):
+    with torch.no_grad(), set_exploration_type(ExplorationType.RANDOM):
         td = proof_environment.rollout(max_steps=100)
         td["action"] = td["next", "action"]
         actor(td)
@@ -428,13 +427,12 @@ def make_dt_model(cfg):
         module=actor_module,
         distribution_class=dist_class,
         distribution_kwargs=dist_kwargs,
-        default_interaction_mode="random",
         cache_dist=False,
         return_log_prob=False,
     )
 
     # init the lazy layers
-    with torch.no_grad(), set_exploration_mode("random"):
+    with torch.no_grad(), set_exploration_type(ExplorationType.RANDOM):
         td = proof_environment.rollout(max_steps=100)
         td["action"] = td["next", "action"]
         actor(td)

diff --git a/sota-implementations/redq/config.yaml b/sota-implementations/redq/config.yaml
@@ -36,7 +36,6 @@ collector:
   multi_step: 1
   n_steps_return: 3
   max_frames_per_traj: -1
-  exploration_mode: random
 
 logger:
   backend: wandb

diff --git a/sota-implementations/redq/utils.py b/sota-implementations/redq/utils.py
@@ -1021,7 +1021,6 @@ def make_collector_offpolicy(
         "init_random_frames": cfg.collector.init_random_frames,
         "split_trajs": True,
         # trajectories must be separated if multi-step is used
-        "exploration_type": ExplorationType.from_str(cfg.collector.exploration_mode),
     }
 
     collector = collector_helper(**collector_helper_kwargs)

diff --git a/test/test_actors.py b/test/test_actors.py
@@ -54,8 +54,8 @@ def test_probabilistic_actor_nested_delta(log_prob_key, nested_dim=5, n_actions=
         out_keys=[("data", "action")],
         distribution_class=TanhDelta,
         distribution_kwargs={
-            "min": action_spec.space.low,
-            "max": action_spec.space.high,
+            "low": action_spec.space.low,
+            "high": action_spec.space.high,
         },
         log_prob_key=log_prob_key,
         return_log_prob=True,
@@ -77,8 +77,8 @@ def test_probabilistic_actor_nested_delta(log_prob_key, nested_dim=5, n_actions=
         out_keys=[("data", "action")],
         distribution_class=TanhDelta,
         distribution_kwargs={
-            "min": action_spec.space.low,
-            "max": action_spec.space.high,
+            "low": action_spec.space.low,
+            "high": action_spec.space.high,
         },
         log_prob_key=log_prob_key,
         return_log_prob=True,

diff --git a/test/test_distributions.py b/test/test_distributions.py
@@ -190,8 +190,8 @@ def test_truncnormal(self, min, max, vecs, upscale, shape, device):
         d = TruncatedNormal(
             *vecs,
             upscale=upscale,
-            min=min,
-            max=max,
+            low=min,
+            high=max,
         )
         assert d.device == device
         for _ in range(100):
@@ -218,7 +218,7 @@ def test_truncnormal_against_scipy(self):
         high = 2
         low = -1
         log_pi_x = TruncatedNormal(
-            mu, sigma, min=low, max=high, tanh_loc=False
+            mu, sigma, low=low, high=high, tanh_loc=False
         ).log_prob(x)
         pi_x = torch.exp(log_pi_x)
         log_pi_x.backward(torch.ones_like(log_pi_x))
@@ -264,8 +264,8 @@ def test_truncnormal_mode(self, min, max, vecs, upscale, shape, device):
         d = TruncatedNormal(
             *vecs,
             upscale=upscale,
-            min=min,
-            max=max,
+            low=min,
+            high=max,
         )
         assert d.mode is not None
         assert d.entropy() is not None

diff --git a/test/test_libs.py b/test/test_libs.py
@@ -3065,7 +3065,7 @@ def test_atari_preproc(self, dataset_id, tmpdir):
 
         t = Compose(
             UnsqueezeTransform(
-                unsqueeze_dim=-3, in_keys=["observation", ("next", "observation")]
+                dim=-3, in_keys=["observation", ("next", "observation")]
             ),
             Resize(32, in_keys=["observation", ("next", "observation")]),
             RenameTransform(in_keys=["action"], out_keys=["other_action"]),

diff --git a/test/test_rb.py b/test/test_rb.py
@@ -1776,10 +1776,8 @@ def test_insert_transform(self):
                 not _has_tv, reason="needs torchvision dependency"
             ),
         ),
-        pytest.param(
-            partial(UnsqueezeTransform, unsqueeze_dim=-1), id="UnsqueezeTransform"
-        ),
-        pytest.param(partial(SqueezeTransform, squeeze_dim=-1), id="SqueezeTransform"),
+        pytest.param(partial(UnsqueezeTransform, dim=-1), id="UnsqueezeTransform"),
+        pytest.param(partial(SqueezeTransform, dim=-1), id="SqueezeTransform"),
         GrayScale,
         pytest.param(partial(ObservationNorm, loc=1, scale=2), id="ObservationNorm"),
         pytest.param(partial(CatFrames, dim=-3, N=4), id="CatFrames"),