Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Deprecations] Deprecate in view of v0.6 release #2446

Merged
merged 10 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .github/unittest/linux/scripts/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,7 @@ conda deactivate
conda activate "${env_dir}"

echo "installing gymnasium"
pip3 install "gymnasium"
pip3 install ale_py
pip3 install mo-gymnasium[mujoco] # requires here bc needs mujoco-py
pip3 install "gymnasium[atari,accept-rom-license,mujoco]<1.0" mo-gymnasium[mujoco]
pip3 install "mujoco" -U

# sanity check: remove?
Expand Down
4 changes: 2 additions & 2 deletions .github/unittest/linux_distributed/scripts/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ if [[ $OSTYPE != 'darwin'* ]]; then
rm ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
fi
echo "installing gymnasium"
pip install "gymnasium[atari,accept-rom-license]"
pip install "gymnasium[atari,accept-rom-license]<1.0"
else
pip install "gymnasium[atari,accept-rom-license]"
pip install "gymnasium[atari,accept-rom-license]<1.0"
fi
2 changes: 1 addition & 1 deletion .github/unittest/linux_examples/scripts/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ elif [[ $PY_VERSION == *"3.11"* ]]; then
pip install ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
rm ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
fi
pip install "gymnasium[atari,accept-rom-license]"
pip install "gymnasium[atari,accept-rom-license]<1.0"

# ============================================================================================ #
# ================================ PyTorch & TorchRL ========================================= #
Expand Down
4 changes: 2 additions & 2 deletions .github/unittest/linux_libs/scripts_envpool/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ if [[ $OSTYPE != 'darwin'* ]]; then
fi
echo "installing gym"
# envpool does not currently work with gymnasium
pip install "gym[atari,accept-rom-license]"
pip install "gym[atari,accept-rom-license]<1.0"
else
pip install "gym[atari,accept-rom-license]"
pip install "gym[atari,accept-rom-license]<1.0"
fi
pip install envpool treevalue

Expand Down
2 changes: 1 addition & 1 deletion .github/unittest/linux_libs/scripts_gym/batch_scripts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ conda deactivate
conda create --prefix ./cloned_env --clone ./env -y
conda activate ./cloned_env

pip3 install 'gymnasium[accept-rom-license,ale-py,atari]' mo-gymnasium gymnasium-robotics -U
pip3 install 'gymnasium[accept-rom-license,ale-py,atari]<1.0' mo-gymnasium gymnasium-robotics -U

$DIR/run_test.sh

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:
- protobuf
- pip:
# Initial version is required to install Atari ROMS in setup_env.sh
- gymnasium
- gymnasium<1.0
- hypothesis
- future
- cloudpickle
Expand Down
13 changes: 7 additions & 6 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name: Generate documentation
on:
push:
branches:
- nightly
- main
- release/*
tags:
Expand All @@ -21,7 +22,7 @@ jobs:
build-docs:
strategy:
matrix:
python_version: ["3.9"]
python_version: ["3.10"]
cuda_arch_version: ["12.1"]
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
Expand All @@ -33,7 +34,7 @@ jobs:
script: |
set -e
set -v
apt-get update && apt-get install -y git wget gcc g++
apt-get update && apt-get install -y -f git wget gcc g++ dialog apt-utils
root_dir="$(pwd)"
conda_dir="${root_dir}/conda"
env_dir="${root_dir}/env"
Expand All @@ -45,14 +46,14 @@ jobs:
bash ./miniconda.sh -b -f -p "${conda_dir}"
eval "$(${conda_dir}/bin/conda shell.bash hook)"
printf "* Creating a test environment\n"
conda create --prefix "${env_dir}" -y python=3.8
conda create --prefix "${env_dir}" -y python=3.10
printf "* Activating\n"
conda activate "${env_dir}"

# 2. upgrade pip, ninja and packaging
# apt-get install python3.9 python3-pip -y
apt-get install python3-pip unzip -y -f
python3 -m pip install --upgrade pip
python3 -m pip install setuptools ninja packaging -U
python3 -m pip install setuptools ninja packaging cmake -U

# 3. check python version
python3 --version
Expand Down
4 changes: 1 addition & 3 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ sphinx_design
torchvision
dm_control
mujoco
atari-py
ale-py
gym[classic_control,accept-rom-license]
gym[classic_control,accept-rom-license,ale-py,atari]
pygame
tqdm
ipython
Expand Down
2 changes: 0 additions & 2 deletions docs/source/reference/envs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -996,11 +996,9 @@ Helpers

RandomPolicy
check_env_specs
exploration_mode #deprecated
exploration_type
get_available_libraries
make_composite_from_td
set_exploration_mode #deprecated
set_exploration_type
step_mdp
terminated_or_truncated
Expand Down
6 changes: 3 additions & 3 deletions docs/source/reference/modules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ Exploration wrappers and modules

To efficiently explore the environment, TorchRL proposes a series of modules
that will override the action sampled by the policy by a noisier version.
Their behavior is controlled by :func:`~torchrl.envs.utils.exploration_mode`:
if the exploration is set to ``"random"``, the exploration is active. In all
Their behavior is controlled by :func:`~torchrl.envs.utils.exploration_type`:
if the exploration is set to ``ExplorationType.RANDOM``, the exploration is active. In all
other cases, the action written in the tensordict is simply the network output.

.. note:: Unlike other exploration modules, :class:`~torchrl.modules.ConsistentDropoutModule`
uses the ``train``/``eval`` mode to comply with the regular `Dropout` API in PyTorch.
The :func:`~torchrl.envs.utils.set_exploration_mode` context manager will have no effect on
The :func:`~torchrl.envs.utils.set_exploration_type` context manager will have no effect on
this module.

.. currentmodule:: torchrl.modules
Expand Down
8 changes: 4 additions & 4 deletions examples/distributed/collectors/multi_nodes/ray_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
TransformedEnv,
)
from torchrl.envs.libs.gym import GymEnv
from torchrl.envs.utils import check_env_specs, set_exploration_mode
from torchrl.envs.utils import check_env_specs, ExplorationType, set_exploration_type
from torchrl.modules import ProbabilisticActor, TanhNormal, ValueOperator
from torchrl.objectives import ClipPPOLoss
from torchrl.objectives.value import GAE
Expand Down Expand Up @@ -85,8 +85,8 @@
in_keys=["loc", "scale"],
distribution_class=TanhNormal,
distribution_kwargs={
"min": env.action_spec.space.low,
"max": env.action_spec.space.high,
"low": env.action_spec.space.low,
"high": env.action_spec.space.high,
},
return_log_prob=True,
)
Expand Down Expand Up @@ -201,7 +201,7 @@
stepcount_str = f"step count (max): {logs['step_count'][-1]}"
logs["lr"].append(optim.param_groups[0]["lr"])
lr_str = f"lr policy: {logs['lr'][-1]: 4.4f}"
with set_exploration_mode("mean"), torch.no_grad():
with set_exploration_type(ExplorationType.MODE), torch.no_grad():
# execute a rollout with the trained policy
eval_rollout = env.rollout(1000, policy_module)
logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())
Expand Down
8 changes: 3 additions & 5 deletions sota-implementations/decision_transformer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
)
from torchrl.envs.libs.dm_control import DMControlEnv
from torchrl.envs.libs.gym import set_gym_backend
from torchrl.envs.utils import set_exploration_mode
from torchrl.envs.utils import ExplorationType, set_exploration_type
from torchrl.modules import (
DTActor,
OnlineDTActor,
Expand Down Expand Up @@ -374,13 +374,12 @@ def make_odt_model(cfg):
module=actor_module,
distribution_class=dist_class,
distribution_kwargs=dist_kwargs,
default_interaction_mode="random",
cache_dist=False,
return_log_prob=False,
)

# init the lazy layers
with torch.no_grad(), set_exploration_mode("random"):
with torch.no_grad(), set_exploration_type(ExplorationType.RANDOM):
td = proof_environment.rollout(max_steps=100)
td["action"] = td["next", "action"]
actor(td)
Expand Down Expand Up @@ -428,13 +427,12 @@ def make_dt_model(cfg):
module=actor_module,
distribution_class=dist_class,
distribution_kwargs=dist_kwargs,
default_interaction_mode="random",
cache_dist=False,
return_log_prob=False,
)

# init the lazy layers
with torch.no_grad(), set_exploration_mode("random"):
with torch.no_grad(), set_exploration_type(ExplorationType.RANDOM):
td = proof_environment.rollout(max_steps=100)
td["action"] = td["next", "action"]
actor(td)
Expand Down
1 change: 0 additions & 1 deletion sota-implementations/redq/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ collector:
multi_step: 1
n_steps_return: 3
max_frames_per_traj: -1
exploration_mode: random

logger:
backend: wandb
Expand Down
1 change: 0 additions & 1 deletion sota-implementations/redq/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,6 @@ def make_collector_offpolicy(
"init_random_frames": cfg.collector.init_random_frames,
"split_trajs": True,
# trajectories must be separated if multi-step is used
"exploration_type": ExplorationType.from_str(cfg.collector.exploration_mode),
}

collector = collector_helper(**collector_helper_kwargs)
Expand Down
8 changes: 4 additions & 4 deletions test/test_actors.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def test_probabilistic_actor_nested_delta(log_prob_key, nested_dim=5, n_actions=
out_keys=[("data", "action")],
distribution_class=TanhDelta,
distribution_kwargs={
"min": action_spec.space.low,
"max": action_spec.space.high,
"low": action_spec.space.low,
"high": action_spec.space.high,
},
log_prob_key=log_prob_key,
return_log_prob=True,
Expand All @@ -77,8 +77,8 @@ def test_probabilistic_actor_nested_delta(log_prob_key, nested_dim=5, n_actions=
out_keys=[("data", "action")],
distribution_class=TanhDelta,
distribution_kwargs={
"min": action_spec.space.low,
"max": action_spec.space.high,
"low": action_spec.space.low,
"high": action_spec.space.high,
},
log_prob_key=log_prob_key,
return_log_prob=True,
Expand Down
10 changes: 5 additions & 5 deletions test/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ def test_truncnormal(self, min, max, vecs, upscale, shape, device):
d = TruncatedNormal(
*vecs,
upscale=upscale,
min=min,
max=max,
low=min,
high=max,
)
assert d.device == device
for _ in range(100):
Expand All @@ -218,7 +218,7 @@ def test_truncnormal_against_scipy(self):
high = 2
low = -1
log_pi_x = TruncatedNormal(
mu, sigma, min=low, max=high, tanh_loc=False
mu, sigma, low=low, high=high, tanh_loc=False
).log_prob(x)
pi_x = torch.exp(log_pi_x)
log_pi_x.backward(torch.ones_like(log_pi_x))
Expand Down Expand Up @@ -264,8 +264,8 @@ def test_truncnormal_mode(self, min, max, vecs, upscale, shape, device):
d = TruncatedNormal(
*vecs,
upscale=upscale,
min=min,
max=max,
low=min,
high=max,
)
assert d.mode is not None
assert d.entropy() is not None
Expand Down
2 changes: 1 addition & 1 deletion test/test_libs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3065,7 +3065,7 @@ def test_atari_preproc(self, dataset_id, tmpdir):

t = Compose(
UnsqueezeTransform(
unsqueeze_dim=-3, in_keys=["observation", ("next", "observation")]
dim=-3, in_keys=["observation", ("next", "observation")]
),
Resize(32, in_keys=["observation", ("next", "observation")]),
RenameTransform(in_keys=["action"], out_keys=["other_action"]),
Expand Down
6 changes: 2 additions & 4 deletions test/test_rb.py
Original file line number Diff line number Diff line change
Expand Up @@ -1776,10 +1776,8 @@ def test_insert_transform(self):
not _has_tv, reason="needs torchvision dependency"
),
),
pytest.param(
partial(UnsqueezeTransform, unsqueeze_dim=-1), id="UnsqueezeTransform"
),
pytest.param(partial(SqueezeTransform, squeeze_dim=-1), id="SqueezeTransform"),
pytest.param(partial(UnsqueezeTransform, dim=-1), id="UnsqueezeTransform"),
pytest.param(partial(SqueezeTransform, dim=-1), id="SqueezeTransform"),
GrayScale,
pytest.param(partial(ObservationNorm, loc=1, scale=2), id="ObservationNorm"),
pytest.param(partial(CatFrames, dim=-3, N=4), id="CatFrames"),
Expand Down
Loading
Loading