From 38dfc21758f57683071fc29114c338d12f4b8d31 Mon Sep 17 00:00:00 2001
From: Vincent Moens <vmoens@meta.com>
Date: Thu, 12 Oct 2023 04:22:13 -0400
Subject: [PATCH] [CI] Fix CI (python and cuda versions) (#1621)

---
 .../linux_libs/scripts_brax/install.sh        |   4 +-
 .../linux_libs/scripts_d4rl/install.sh        |   4 +-
 .../linux_libs/scripts_habitat/install.sh     |   4 +-
 .../linux_libs/scripts_jumanji/install.sh     |   4 +-
 .../linux_libs/scripts_pettingzoo/install.sh  |   2 +-
 .../linux_libs/scripts_rlhf/install.sh        |   4 +-
 .../scripts_robohive/install_and_run_test.sh  |   2 +-
 .../linux_libs/scripts_sklearn/install.sh     |   4 +-
 .../linux_libs/scripts_smacv2/install.sh      |   2 +-
 .../linux_libs/scripts_vmas/install.sh        |   2 +-
 .github/workflows/test-linux-brax.yml         |   8 +-
 .github/workflows/test-linux-d4rl.yml         |   6 +-
 .github/workflows/test-linux-envpool.yml      |   8 +-
 .github/workflows/test-linux-examples.yml     |   7 +-
 .github/workflows/test-linux-gym.yml          |   6 +-
 .github/workflows/test-linux-jumanji.yml      |   6 +-
 .github/workflows/test-linux-olddeps.yml      |   4 +
 .github/workflows/test-linux-pettingzoo.yml   |   2 +-
 .github/workflows/test-linux-rlhf.yml         |   6 +-
 .github/workflows/test-linux-robohive.yml     |   6 +-
 .github/workflows/test-linux-sklearn.yml      |   6 +-
 .github/workflows/test-linux-smacv2.yml       |   6 +-
 .github/workflows/test-linux-vmas.yml         |   6 +-
 .github/workflows/test-macos-cpu.yml          |   2 +-
 .../workflows/test-windows-optdepts-cpu.yml   |   2 +-
 examples/a2c/a2c_atari.py                     |   1 +
 examples/a2c/a2c_mujoco.py                    |   5 +-
 examples/ppo/ppo_mujoco.py                    |   5 +-
 test/test_rb.py                               | 120 ++++++++++++++++--
 test/test_specs.py                            |   4 +-
 test/test_transforms.py                       |  12 +-
 torchrl/data/replay_buffers/storages.py       |  90 +++++++------
 32 files changed, 254 insertions(+), 96 deletions(-)

diff --git a/.github/unittest/linux_libs/scripts_brax/install.sh b/.github/unittest/linux_libs/scripts_brax/install.sh
index b3a42967935..93c1f113b52 100755
--- a/.github/unittest/linux_libs/scripts_brax/install.sh
+++ b/.github/unittest/linux_libs/scripts_brax/install.sh
@@ -32,7 +32,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall  --progress-bar off
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall  --progress-bar off
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall  --progress-bar off
 fi
 
 # install tensordict
@@ -42,7 +42,7 @@ pip install git+https://github.com/pytorch/tensordict.git --progress-bar off
 python -c "import functorch;import tensordict"
 
 printf "* Installing torchrl\n"
-pip3 install -e .
+python setup.py develop
 
 # smoke test
 python -c "import torchrl"
diff --git a/.github/unittest/linux_libs/scripts_d4rl/install.sh b/.github/unittest/linux_libs/scripts_d4rl/install.sh
index feb922d14b8..2eb52b8f65e 100755
--- a/.github/unittest/linux_libs/scripts_d4rl/install.sh
+++ b/.github/unittest/linux_libs/scripts_d4rl/install.sh
@@ -35,7 +35,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 fi
 
 # install tensordict
@@ -45,7 +45,7 @@ pip install git+https://github.com/pytorch/tensordict.git
 python -c "import functorch;import tensordict"
 
 printf "* Installing torchrl\n"
-pip3 install -e .
+python setup.py develop
 
 # smoke test
 python -c "import torchrl"
diff --git a/.github/unittest/linux_libs/scripts_habitat/install.sh b/.github/unittest/linux_libs/scripts_habitat/install.sh
index 316cf9e3225..071af690448 100755
--- a/.github/unittest/linux_libs/scripts_habitat/install.sh
+++ b/.github/unittest/linux_libs/scripts_habitat/install.sh
@@ -20,7 +20,7 @@ version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
 git submodule sync && git submodule update --init --recursive
 
 printf "Installing PyTorch with %s\n" "${CU_VERSION}"
-pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 
 # install tensordict
 pip3 install git+https://github.com/pytorch/tensordict.git
@@ -29,7 +29,7 @@ pip3 install git+https://github.com/pytorch/tensordict.git
 python3 -c "import functorch;import tensordict"
 
 printf "* Installing torchrl\n"
-pip3 install -e .
+python setup.py develop
 
 # smoke test
 python3 -c "import torchrl"
diff --git a/.github/unittest/linux_libs/scripts_jumanji/install.sh b/.github/unittest/linux_libs/scripts_jumanji/install.sh
index ee6c747315c..3d6ad9ed450 100755
--- a/.github/unittest/linux_libs/scripts_jumanji/install.sh
+++ b/.github/unittest/linux_libs/scripts_jumanji/install.sh
@@ -32,7 +32,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 fi
 
 # install tensordict
@@ -42,7 +42,7 @@ pip install git+https://github.com/pytorch/tensordict.git
 python -c "import functorch;import tensordict"
 
 printf "* Installing torchrl\n"
-pip3 install -e .
+python setup.py develop
 
 # smoke test
 python -c "import torchrl"
diff --git a/.github/unittest/linux_libs/scripts_pettingzoo/install.sh b/.github/unittest/linux_libs/scripts_pettingzoo/install.sh
index 0c7bc8f402b..fb82bcb4ea8 100755
--- a/.github/unittest/linux_libs/scripts_pettingzoo/install.sh
+++ b/.github/unittest/linux_libs/scripts_pettingzoo/install.sh
@@ -32,7 +32,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 fi
 
 # install tensordict
diff --git a/.github/unittest/linux_libs/scripts_rlhf/install.sh b/.github/unittest/linux_libs/scripts_rlhf/install.sh
index 25a73fd6dff..31a6b2b56d4 100755
--- a/.github/unittest/linux_libs/scripts_rlhf/install.sh
+++ b/.github/unittest/linux_libs/scripts_rlhf/install.sh
@@ -35,7 +35,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 fi
 
 # install tensordict
@@ -45,7 +45,7 @@ pip install git+https://github.com/pytorch/tensordict.git
 python -c "import tensordict"
 
 printf "* Installing torchrl\n"
-pip3 install -e .
+python setup.py develop
 
 # smoke test
 python -c "import torchrl"
diff --git a/.github/unittest/linux_libs/scripts_robohive/install_and_run_test.sh b/.github/unittest/linux_libs/scripts_robohive/install_and_run_test.sh
index 68fe922ec5d..873962164d6 100755
--- a/.github/unittest/linux_libs/scripts_robohive/install_and_run_test.sh
+++ b/.github/unittest/linux_libs/scripts_robohive/install_and_run_test.sh
@@ -43,7 +43,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 fi
 
 # install tensordict
diff --git a/.github/unittest/linux_libs/scripts_sklearn/install.sh b/.github/unittest/linux_libs/scripts_sklearn/install.sh
index feb922d14b8..2eb52b8f65e 100755
--- a/.github/unittest/linux_libs/scripts_sklearn/install.sh
+++ b/.github/unittest/linux_libs/scripts_sklearn/install.sh
@@ -35,7 +35,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 fi
 
 # install tensordict
@@ -45,7 +45,7 @@ pip install git+https://github.com/pytorch/tensordict.git
 python -c "import functorch;import tensordict"
 
 printf "* Installing torchrl\n"
-pip3 install -e .
+python setup.py develop
 
 # smoke test
 python -c "import torchrl"
diff --git a/.github/unittest/linux_libs/scripts_smacv2/install.sh b/.github/unittest/linux_libs/scripts_smacv2/install.sh
index 0c7bc8f402b..fb82bcb4ea8 100755
--- a/.github/unittest/linux_libs/scripts_smacv2/install.sh
+++ b/.github/unittest/linux_libs/scripts_smacv2/install.sh
@@ -32,7 +32,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 fi
 
 # install tensordict
diff --git a/.github/unittest/linux_libs/scripts_vmas/install.sh b/.github/unittest/linux_libs/scripts_vmas/install.sh
index 0c7bc8f402b..fb82bcb4ea8 100755
--- a/.github/unittest/linux_libs/scripts_vmas/install.sh
+++ b/.github/unittest/linux_libs/scripts_vmas/install.sh
@@ -32,7 +32,7 @@ if [ "${CU_VERSION:-}" == cpu ] ; then
 #    conda install -y pytorch cpuonly -c pytorch-nightly
     pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
 else
-    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+    pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 --force-reinstall
 fi
 
 # install tensordict
diff --git a/.github/workflows/test-linux-brax.yml b/.github/workflows/test-linux-brax.yml
index 0a09306f313..a461b53be21 100644
--- a/.github/workflows/test-linux-brax.yml
+++ b/.github/workflows/test-linux-brax.yml
@@ -17,6 +17,10 @@ concurrency:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -27,8 +31,8 @@ jobs:
       script: |
         set -euo pipefail
 
-        export PYTHON_VERSION="3.8"
-        export CU_VERSION="11.7"
+        export PYTHON_VERSION="3.9"
+        export CU_VERSION="12.1"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
diff --git a/.github/workflows/test-linux-d4rl.yml b/.github/workflows/test-linux-d4rl.yml
index a5acce1f5c9..3a0d534cd8e 100644
--- a/.github/workflows/test-linux-d4rl.yml
+++ b/.github/workflows/test-linux-d4rl.yml
@@ -17,6 +17,10 @@ concurrency:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -25,7 +29,7 @@ jobs:
       timeout: 120
       script: |
         set -euo pipefail
-        export PYTHON_VERSION="3.8"
+        export PYTHON_VERSION="3.9"
         export CU_VERSION="cu117"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
diff --git a/.github/workflows/test-linux-envpool.yml b/.github/workflows/test-linux-envpool.yml
index 3b1072c9395..844d5b34963 100644
--- a/.github/workflows/test-linux-envpool.yml
+++ b/.github/workflows/test-linux-envpool.yml
@@ -11,6 +11,10 @@ on:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -22,8 +26,8 @@ jobs:
       timeout: 120
       script: |
         set -euo pipefail
-        export PYTHON_VERSION="3.8"
-        export CU_VERSION="11.7"
+        export PYTHON_VERSION="3.9"
+        export CU_VERSION="12.1"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
diff --git a/.github/workflows/test-linux-examples.yml b/.github/workflows/test-linux-examples.yml
index 60c64510cb3..bfc6884bc7a 100644
--- a/.github/workflows/test-linux-examples.yml
+++ b/.github/workflows/test-linux-examples.yml
@@ -22,8 +22,8 @@ jobs:
   tests:
     strategy:
       matrix:
-        python_version: ["3.9"] # "3.8", "3.9", "3.10", "3.11"
-        cuda_arch_version: ["11.6"] # "11.6", "11.7"
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
@@ -36,11 +36,8 @@ jobs:
       script: |
         # Set env vars from matrix
         export PYTHON_VERSION=${{ matrix.python_version }}
-        # Commenting these out for now because the GPU test are not working inside docker
         export CUDA_ARCH_VERSION=${{ matrix.cuda_arch_version }}
         export CU_VERSION="cu${CUDA_ARCH_VERSION:0:2}${CUDA_ARCH_VERSION:3:1}"
-        # Remove the following line when the GPU tests are working inside docker, and uncomment the above lines
-        #export CU_VERSION="cpu"
 
         echo "PYTHON_VERSION: $PYTHON_VERSION"
         echo "CU_VERSION: $CU_VERSION"
diff --git a/.github/workflows/test-linux-gym.yml b/.github/workflows/test-linux-gym.yml
index 0345955808f..6534bcbca7d 100644
--- a/.github/workflows/test-linux-gym.yml
+++ b/.github/workflows/test-linux-gym.yml
@@ -17,6 +17,10 @@ concurrency:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -27,7 +31,7 @@ jobs:
       timeout: 120
       script: |
         set -euxo pipefail
-        export PYTHON_VERSION="3.8"
+        export PYTHON_VERSION="3.9"
         # export CU_VERSION="${{ inputs.gpu-arch-version }}"
         export CU_VERSION="11.4"
         export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/work/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin"
diff --git a/.github/workflows/test-linux-jumanji.yml b/.github/workflows/test-linux-jumanji.yml
index a1ca1eb6a41..97bbc4148a4 100644
--- a/.github/workflows/test-linux-jumanji.yml
+++ b/.github/workflows/test-linux-jumanji.yml
@@ -17,6 +17,10 @@ concurrency:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -27,7 +31,7 @@ jobs:
       script: |
         set -euo pipefail
         export PYTHON_VERSION="3.9"
-        export CU_VERSION="11.7"
+        export CU_VERSION="12.1"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
diff --git a/.github/workflows/test-linux-olddeps.yml b/.github/workflows/test-linux-olddeps.yml
index 9f54d9dda25..776b9a43c80 100644
--- a/.github/workflows/test-linux-olddeps.yml
+++ b/.github/workflows/test-linux-olddeps.yml
@@ -11,6 +11,10 @@ on:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.8"]
+        cuda_arch_version: ["11.6"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
diff --git a/.github/workflows/test-linux-pettingzoo.yml b/.github/workflows/test-linux-pettingzoo.yml
index 628be74beef..7f2c2526684 100644
--- a/.github/workflows/test-linux-pettingzoo.yml
+++ b/.github/workflows/test-linux-pettingzoo.yml
@@ -27,7 +27,7 @@ jobs:
       script: |
         set -euo pipefail
         export PYTHON_VERSION="3.9"
-        export CU_VERSION="11.7"
+        export CU_VERSION="12.1"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
diff --git a/.github/workflows/test-linux-rlhf.yml b/.github/workflows/test-linux-rlhf.yml
index 86040ae9679..4d2a4864ed0 100644
--- a/.github/workflows/test-linux-rlhf.yml
+++ b/.github/workflows/test-linux-rlhf.yml
@@ -17,6 +17,10 @@ concurrency:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -27,7 +31,7 @@ jobs:
       timeout: 120
       script: |
         set -euo pipefail
-        export PYTHON_VERSION="3.8"
+        export PYTHON_VERSION="3.9"
         export CU_VERSION="cu117"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
diff --git a/.github/workflows/test-linux-robohive.yml b/.github/workflows/test-linux-robohive.yml
index 4793971d4a4..47db890c293 100644
--- a/.github/workflows/test-linux-robohive.yml
+++ b/.github/workflows/test-linux-robohive.yml
@@ -11,6 +11,10 @@ on:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -19,7 +23,7 @@ jobs:
       timeout: 120
       script: |
         set -euo pipefail
-        export PYTHON_VERSION="3.8"
+        export PYTHON_VERSION="3.9"
         export CU_VERSION="cu117"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
diff --git a/.github/workflows/test-linux-sklearn.yml b/.github/workflows/test-linux-sklearn.yml
index 9ad10a53297..83c13a09224 100644
--- a/.github/workflows/test-linux-sklearn.yml
+++ b/.github/workflows/test-linux-sklearn.yml
@@ -17,6 +17,10 @@ concurrency:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -27,7 +31,7 @@ jobs:
       timeout: 120
       script: |
         set -euo pipefail
-        export PYTHON_VERSION="3.8"
+        export PYTHON_VERSION="3.9"
         export CU_VERSION="cu117"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
diff --git a/.github/workflows/test-linux-smacv2.yml b/.github/workflows/test-linux-smacv2.yml
index 159c93fb1a1..b937ac87ff7 100644
--- a/.github/workflows/test-linux-smacv2.yml
+++ b/.github/workflows/test-linux-smacv2.yml
@@ -17,6 +17,10 @@ concurrency:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments') }}
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
@@ -28,7 +32,7 @@ jobs:
       script: |
         set -euo pipefail
         export PYTHON_VERSION="3.9"
-        export CU_VERSION="11.7"
+        export CU_VERSION="12.1"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
diff --git a/.github/workflows/test-linux-vmas.yml b/.github/workflows/test-linux-vmas.yml
index fc189b28f7f..abdbc4a5433 100644
--- a/.github/workflows/test-linux-vmas.yml
+++ b/.github/workflows/test-linux-vmas.yml
@@ -17,6 +17,10 @@ concurrency:
 
 jobs:
   unittests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       repository: pytorch/rl
@@ -27,7 +31,7 @@ jobs:
       script: |
         set -euo pipefail
         export PYTHON_VERSION="3.9"
-        export CU_VERSION="11.7"
+        export CU_VERSION="12.1"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
diff --git a/.github/workflows/test-macos-cpu.yml b/.github/workflows/test-macos-cpu.yml
index 184cb7e9884..c4d741b9c21 100644
--- a/.github/workflows/test-macos-cpu.yml
+++ b/.github/workflows/test-macos-cpu.yml
@@ -22,7 +22,7 @@ jobs:
   tests:
     strategy:
       matrix:
-        python_version: ["3.8", "3.9", "3.10", "3.11"]
+        python_version: ["3.8", "3.11"]
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
diff --git a/.github/workflows/test-windows-optdepts-cpu.yml b/.github/workflows/test-windows-optdepts-cpu.yml
index 1cd161a84fb..09ce642c4f0 100644
--- a/.github/workflows/test-windows-optdepts-cpu.yml
+++ b/.github/workflows/test-windows-optdepts-cpu.yml
@@ -25,7 +25,7 @@ jobs:
       script: |
         set -euxo pipefail
 
-        export PYTHON_VERSION="3.8"
+        export PYTHON_VERSION="3.9"
         export CU_VERSION="cpu"
 
         # TODO: Port this to pytorch/test-infra/.github/workflows/windows_job.yml
diff --git a/examples/a2c/a2c_atari.py b/examples/a2c/a2c_atari.py
index 37c1bd9842d..44a37cb3ce6 100644
--- a/examples/a2c/a2c_atari.py
+++ b/examples/a2c/a2c_atari.py
@@ -141,6 +141,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
         for k, batch in enumerate(data_buffer):
 
+            # Get a data batch
             batch = batch.to(device)
 
             # Linearly decrease the learning rate and clip epsilon
diff --git a/examples/a2c/a2c_mujoco.py b/examples/a2c/a2c_mujoco.py
index 4192ddc6556..7f9e588bbf6 100644
--- a/examples/a2c/a2c_mujoco.py
+++ b/examples/a2c/a2c_mujoco.py
@@ -49,7 +49,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
     # Create data buffer
     sampler = SamplerWithoutReplacement()
     data_buffer = TensorDictReplayBuffer(
-        storage=LazyMemmapStorage(cfg.collector.frames_per_batch, device=device),
+        storage=LazyMemmapStorage(cfg.collector.frames_per_batch),
         sampler=sampler,
         batch_size=cfg.loss.mini_batch_size,
     )
@@ -125,6 +125,9 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
         for k, batch in enumerate(data_buffer):
 
+            # Get a data batch
+            batch = batch.to(device)
+
             # Linearly decrease the learning rate and clip epsilon
             alpha = 1.0
             if cfg.optim.anneal_lr:
diff --git a/examples/ppo/ppo_mujoco.py b/examples/ppo/ppo_mujoco.py
index 37230fb33c6..ff6aeda51d2 100644
--- a/examples/ppo/ppo_mujoco.py
+++ b/examples/ppo/ppo_mujoco.py
@@ -55,7 +55,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
     # Create data buffer
     sampler = SamplerWithoutReplacement()
     data_buffer = TensorDictReplayBuffer(
-        storage=LazyMemmapStorage(cfg.collector.frames_per_batch, device=device),
+        storage=LazyMemmapStorage(cfg.collector.frames_per_batch),
         sampler=sampler,
         batch_size=cfg.loss.mini_batch_size,
     )
@@ -144,6 +144,9 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
             for k, batch in enumerate(data_buffer):
 
+                # Get a data batch
+                batch = batch.to(device)
+
                 # Linearly decrease the learning rate and clip epsilon
                 alpha = 1.0
                 if cfg_optim_anneal_lr:
diff --git a/test/test_rb.py b/test/test_rb.py
index 36158d8a69e..8e894f45c3e 100644
--- a/test/test_rb.py
+++ b/test/test_rb.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
+import contextlib
 import importlib
 import pickle
 import sys
@@ -14,6 +15,7 @@
 import pytest
 import torch
 from _utils_internal import get_default_devices, make_tc
+from packaging.version import parse
 from tensordict import is_tensorclass, tensorclass
 from tensordict.tensordict import assert_allclose_td, TensorDict, TensorDictBase
 from torchrl.data import (
@@ -59,6 +61,7 @@
     VecNorm,
 )
 
+OLD_TORCH = parse(torch.__version__) < parse("2.0.0")
 _has_tv = importlib.util.find_spec("torchvision") is not None
 _os_is_windows = sys.platform == "win32"
 
@@ -147,7 +150,12 @@ def test_cursor_position(self, rb_type, sampler, writer, storage, size):
         writer = writer()
         writer.register_storage(storage)
         batch1 = self._get_data(rb_type, size=5)
-        writer.extend(batch1)
+        cond = OLD_TORCH and size < len(batch1) and isinstance(storage, TensorStorage)
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            writer.extend(batch1)
 
         # Added less data than storage max size
         if size > 5:
@@ -172,7 +180,12 @@ def test_extend(self, rb_type, sampler, writer, storage, size):
             rb_type=rb_type, sampler=sampler, writer=writer, storage=storage, size=size
         )
         data = self._get_data(rb_type, size=5)
-        rb.extend(data)
+        cond = OLD_TORCH and size < len(data) and isinstance(rb._storage, TensorStorage)
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            rb.extend(data)
         length = len(rb)
         for d in data[-length:]:
             for b in rb._storage:
@@ -190,7 +203,14 @@ def test_extend(self, rb_type, sampler, writer, storage, size):
             else:
                 raise RuntimeError("did not find match")
         data2 = self._get_data(rb_type, size=2 * size + 2)
-        rb.extend(data2)
+        cond = (
+            OLD_TORCH and size < len(data2) and isinstance(rb._storage, TensorStorage)
+        )
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            rb.extend(data2)
 
     def test_sample(self, rb_type, sampler, writer, storage, size):
         if rb_type is RemoteTensorDictReplayBuffer and _os_is_windows:
@@ -202,7 +222,12 @@ def test_sample(self, rb_type, sampler, writer, storage, size):
             rb_type=rb_type, sampler=sampler, writer=writer, storage=storage, size=size
         )
         data = self._get_data(rb_type, size=5)
-        rb.extend(data)
+        cond = OLD_TORCH and size < len(data) and isinstance(rb._storage, TensorStorage)
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            rb.extend(data)
         new_data = rb.sample()
         if not isinstance(new_data, (torch.Tensor, TensorDictBase)):
             new_data = new_data[0]
@@ -233,7 +258,12 @@ def test_index(self, rb_type, sampler, writer, storage, size):
             rb_type=rb_type, sampler=sampler, writer=writer, storage=storage, size=size
         )
         data = self._get_data(rb_type, size=5)
-        rb.extend(data)
+        cond = OLD_TORCH and size < len(data) and isinstance(rb._storage, TensorStorage)
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            rb.extend(data)
         d1 = rb[2]
         d2 = rb._storage[2]
         if type(d1) is not type(d2):
@@ -255,7 +285,6 @@ def test_pickable(self, rb_type, sampler, writer, storage, size):
             assert isinstance(rb.__dict__[key], type(rb2.__dict__[key]))
 
 
-@pytest.mark.parametrize("storage_type", [TensorStorage])
 class TestStorages:
     def _get_tensor(self):
         return torch.randn(10, 11)
@@ -270,6 +299,7 @@ def _get_tensorclass(self):
         data = self._get_tensordict()
         return make_tc(data)(**data, batch_size=data.shape)
 
+    @pytest.mark.parametrize("storage_type", [TensorStorage])
     def test_errors(self, storage_type):
         with pytest.raises(ValueError, match="Expected storage to be non-null"):
             storage_type(None)
@@ -280,6 +310,7 @@ def test_errors(self, storage_type):
             storage_type(data, max_size=4)
 
     @pytest.mark.parametrize("data_type", ["tensor", "tensordict", "tensorclass"])
+    @pytest.mark.parametrize("storage_type", [TensorStorage])
     def test_get_set(self, storage_type, data_type):
         if data_type == "tensor":
             data = self._get_tensor()
@@ -294,6 +325,7 @@ def test_get_set(self, storage_type, data_type):
         assert (storage.get(range(10)) == 0).all()
 
     @pytest.mark.parametrize("data_type", ["tensor", "tensordict", "tensorclass"])
+    @pytest.mark.parametrize("storage_type", [TensorStorage])
     def test_state_dict(self, storage_type, data_type):
         if data_type == "tensor":
             data = self._get_tensor()
@@ -312,6 +344,52 @@ def test_state_dict(self, storage_type, data_type):
             storage2.get(range(10))
         )
 
+    @pytest.mark.skipif(
+        not torch.cuda.device_count(),
+        reason="not cuda device found to test rb storage.",
+    )
+    @pytest.mark.parametrize(
+        "device_data,device_storage",
+        [
+            [torch.device("cuda"), torch.device("cpu")],
+            [torch.device("cpu"), torch.device("cuda")],
+            [torch.device("cpu"), "auto"],
+            [torch.device("cuda"), "auto"],
+        ],
+    )
+    @pytest.mark.parametrize("storage_type", [LazyMemmapStorage, LazyTensorStorage])
+    @pytest.mark.parametrize("data_type", ["tensor", "tc", "td"])
+    def test_storage_device(self, device_data, device_storage, storage_type, data_type):
+        @tensorclass
+        class TC:
+            a: torch.Tensor
+
+        if data_type == "tensor":
+            data = torch.randn(3, device=device_data)
+        elif data_type == "td":
+            data = TensorDict(
+                {"a": torch.randn(3, device=device_data)}, [], device=device_data
+            )
+        elif data_type == "tc":
+            data = TC(
+                a=torch.randn(3, device=device_data),
+                batch_size=[],
+                device=device_data,
+            )
+        else:
+            raise NotImplementedError
+        storage = storage_type(max_size=10, device=device_storage)
+        if device_storage == "auto":
+            device_storage = device_data
+        if storage_type is LazyMemmapStorage and device_storage.type == "cuda":
+            with pytest.warns(
+                DeprecationWarning, match="Support for Memmap device other than CPU"
+            ):
+                storage.set(0, data)
+        else:
+            storage.set(0, data)
+        assert storage.get(0).device.type == device_storage.type
+
 
 @pytest.mark.parametrize("max_size", [1000])
 @pytest.mark.parametrize("shape", [[3, 4]])
@@ -580,7 +658,14 @@ def test_cursor_position2(self, rbtype, storage, size, prefetch):
         torch.manual_seed(0)
         rb = self._get_rb(rbtype, storage=storage, size=size, prefetch=prefetch)
         batch1 = self._get_data(rbtype, size=5)
-        rb.extend(batch1)
+        cond = (
+            OLD_TORCH and size < len(batch1) and isinstance(rb._storage, TensorStorage)
+        )
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            rb.extend(batch1)
 
         # Added less data than storage max size
         if size > 5 or storage is None:
@@ -633,7 +718,12 @@ def test_extend(self, rbtype, storage, size, prefetch):
         torch.manual_seed(0)
         rb = self._get_rb(rbtype, storage=storage, size=size, prefetch=prefetch)
         data = self._get_data(rbtype, size=5)
-        rb.extend(data)
+        cond = OLD_TORCH and size < len(data) and isinstance(rb._storage, TensorStorage)
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            rb.extend(data)
         length = len(rb)
         for d in data[-length:]:
             found_similar = False
@@ -656,7 +746,12 @@ def test_sample(self, rbtype, storage, size, prefetch):
         torch.manual_seed(0)
         rb = self._get_rb(rbtype, storage=storage, size=size, prefetch=prefetch)
         data = self._get_data(rbtype, size=5)
-        rb.extend(data)
+        cond = OLD_TORCH and size < len(data) and isinstance(rb._storage, TensorStorage)
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            rb.extend(data)
         new_data = rb.sample()
         if not isinstance(new_data, (torch.Tensor, TensorDictBase)):
             new_data = new_data[0]
@@ -682,7 +777,12 @@ def test_index(self, rbtype, storage, size, prefetch):
         torch.manual_seed(0)
         rb = self._get_rb(rbtype, storage=storage, size=size, prefetch=prefetch)
         data = self._get_data(rbtype, size=5)
-        rb.extend(data)
+        cond = OLD_TORCH and size < len(data) and isinstance(rb._storage, TensorStorage)
+        with pytest.warns(
+            UserWarning,
+            match="A cursor of length superior to the storage capacity was provided",
+        ) if cond else contextlib.nullcontext():
+            rb.extend(data)
         d1 = rb[2]
         d2 = rb._storage[2]
         if type(d1) is not type(d2):
diff --git a/test/test_specs.py b/test/test_specs.py
index 2936cfcf582..86bddc912ee 100644
--- a/test/test_specs.py
+++ b/test/test_specs.py
@@ -654,7 +654,9 @@ def test_nested_composite_spec_update(self, shape, is_complete, device, dtype):
     def test_change_batch_size(self, shape, is_complete, device, dtype):
         ts = self._composite_spec(shape, is_complete, device, dtype)
         ts["nested"] = CompositeSpec(
-            leaf=UnboundedContinuousTensorSpec(shape), shape=shape
+            leaf=UnboundedContinuousTensorSpec(shape, device=device),
+            shape=shape,
+            device=device,
         )
         ts = ts.expand(3, *shape)
         assert ts["nested"].shape == (3, *shape)
diff --git a/test/test_transforms.py b/test/test_transforms.py
index ef6796ea04d..6f9caec5f51 100644
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 import abc
 import argparse
+import importlib.util
 
 import itertools
 import pickle
@@ -46,7 +47,6 @@
 from torchrl.data import (
     BoundedTensorSpec,
     CompositeSpec,
-    LazyMemmapStorage,
     LazyTensorStorage,
     ReplayBuffer,
     TensorDictReplayBuffer,
@@ -114,6 +114,8 @@
 
 TIMEOUT = 100.0
 
+_has_gymnasium = importlib.util.find_spec("gymnasium") is not None
+
 
 class TransformBase:
     """A base class for transform test.
@@ -8799,10 +8801,9 @@ def test_transform_model(self):
         assert t(TensorDict({}, [], device="cpu:0")).device == torch.device("cpu:1")
 
     @pytest.mark.parametrize("rbclass", [ReplayBuffer, TensorDictReplayBuffer])
-    @pytest.mark.parametrize(
-        "storage", [TensorStorage, LazyTensorStorage, LazyMemmapStorage]
-    )
+    @pytest.mark.parametrize("storage", [TensorStorage, LazyTensorStorage])
     def test_transform_rb(self, rbclass, storage):
+        # we don't test casting to cuda on Memmap tensor storage since it's discouraged
         t = Compose(DeviceCastTransform("cpu:1", "cpu:0"))
         storage_kwargs = (
             {
@@ -8962,7 +8963,8 @@ def test_transform_no_env(self, batch):
 
 
 @pytest.mark.skipif(
-    not _has_gym, reason="EndOfLifeTransform can only be tested when Gym is present."
+    not _has_gymnasium,
+    reason="EndOfLifeTransform can only be tested when Gym is present.",
 )
 class TestEndOfLife(TransformBase):
     def test_trans_parallel_env_check(self):
diff --git a/torchrl/data/replay_buffers/storages.py b/torchrl/data/replay_buffers/storages.py
index 313163b96f8..844cea7d656 100644
--- a/torchrl/data/replay_buffers/storages.py
+++ b/torchrl/data/replay_buffers/storages.py
@@ -16,7 +16,7 @@
 from tensordict.tensordict import is_tensor_collection, TensorDict, TensorDictBase
 from tensordict.utils import expand_right
 
-from torchrl._utils import _CKPT_BACKEND, VERBOSE
+from torchrl._utils import _CKPT_BACKEND, implement_for, VERBOSE
 from torchrl.data.replay_buffers.utils import INT_CLASSES
 
 try:
@@ -304,6 +304,7 @@ def load_state_dict(self, state_dict):
         self.initialized = state_dict["initialized"]
         self._len = state_dict["_len"]
 
+    @implement_for("torch", "2.0", None)
     def set(
         self,
         cursor: Union[int, Sequence[int], slice],
@@ -321,6 +322,36 @@ def set(
                 self._init(data)
         self._storage[cursor] = data
 
+    @implement_for("torch", None, "2.0")
+    def set(  # noqa: F811
+        self,
+        cursor: Union[int, Sequence[int], slice],
+        data: Union[TensorDictBase, torch.Tensor],
+    ):
+        if isinstance(cursor, INT_CLASSES):
+            self._len = max(self._len, cursor + 1)
+        else:
+            self._len = max(self._len, max(cursor) + 1)
+
+        if not self.initialized:
+            if not isinstance(cursor, INT_CLASSES):
+                self._init(data[0])
+            else:
+                self._init(data)
+        if not isinstance(cursor, (*INT_CLASSES, slice)):
+            if not isinstance(cursor, torch.Tensor):
+                cursor = torch.tensor(cursor)
+            if len(cursor) > len(self._storage):
+                warnings.warn(
+                    "A cursor of length superior to the storage capacity was provided. "
+                    "To accomodate for this, the cursor will be truncated to its last "
+                    "element such that its length matched the length of the storage. "
+                    "This may **not** be the optimal behaviour for your application! "
+                    "Make sure that the storage capacity is big enough to support the "
+                    "batch size provided."
+                )
+        self._storage[cursor] = data
+
     def get(self, index: Union[int, Sequence[int], slice]) -> Any:
         if not self.initialized:
             raise RuntimeError(
@@ -571,28 +602,15 @@ def _init(self, data: Union[TensorDictBase, torch.Tensor]) -> None:
             print("Creating a MemmapStorage...")
         if self.device == "auto":
             self.device = data.device
-        if isinstance(data, torch.Tensor):
-            # if Tensor, we just create a MemmapTensor of the desired shape, device and dtype
-            out = MemmapTensor(
-                self.max_size, *data.shape, device=self.device, dtype=data.dtype
+        if self.device.type != "cpu":
+            warnings.warn(
+                "Support for Memmap device other than CPU will be deprecated in v0.4.0.",
+                category=DeprecationWarning,
             )
-            filesize = os.path.getsize(out.filename) / 1024 / 1024
-            if VERBOSE:
-                print(
-                    f"The storage was created in {out.filename} and occupies {filesize} Mb of storage."
-                )
-        elif is_tensorclass(data):
-            out = (
-                data.clone()
-                .expand(self.max_size, *data.shape)
-                .memmap_like(prefix=self.scratch_dir)
-            )
-            if self.device.type != "cpu":
-                warnings.warn(
-                    "Support for Memmap device other than CPU will be deprecated in v0.4.0.",
-                    category=DeprecationWarning,
-                )
-                out = out.to(self.device).memmap_()
+        if is_tensor_collection(data):
+            out = data.clone().to(self.device)
+            out = out.expand(self.max_size, *data.shape)
+            out = out.memmap_like(prefix=self.scratch_dir)
 
             for key, tensor in sorted(
                 out.items(include_nested=True, leaves_only=True), key=str
@@ -603,28 +621,16 @@ def _init(self, data: Union[TensorDictBase, torch.Tensor]) -> None:
                         f"\t{key}: {tensor.filename}, {filesize} Mb of storage (size: {tensor.shape})."
                     )
         else:
-            if VERBOSE:
-                print("The storage is being created: ")
-            out = (
-                data.clone()
-                .expand(self.max_size, *data.shape)
-                .memmap_like(prefix=self.scratch_dir)
+            # If not a tensorclass/tensordict, it must be a tensor(-like)
+            # if Tensor, we just create a MemmapTensor of the desired shape, device and dtype
+            out = MemmapTensor(
+                self.max_size, *data.shape, device=self.device, dtype=data.dtype
             )
-            if self.device.type != "cpu":
-                warnings.warn(
-                    "Support for Memmap device other than CPU will be deprecated in v0.4.0.",
-                    category=DeprecationWarning,
+            filesize = os.path.getsize(out.filename) / 1024 / 1024
+            if VERBOSE:
+                print(
+                    f"The storage was created in {out.filename} and occupies {filesize} Mb of storage."
                 )
-                out = out.to(self.device).memmap_()
-
-            for key, tensor in sorted(
-                out.items(include_nested=True, leaves_only=True), key=str
-            ):
-                filesize = os.path.getsize(tensor.filename) / 1024 / 1024
-                if VERBOSE:
-                    print(
-                        f"\t{key}: {tensor.filename}, {filesize} Mb of storage (size: {tensor.shape})."
-                    )
         self._storage = out
         self.initialized = True