Farama-Foundation · reginald-mclean · May 2, 2024 · May 3, 2024 · Jul 24, 2024 · Jul 24, 2024
diff --git a/README.md b/README.md
@@ -76,12 +76,12 @@ You may wish to only access individual environments used in the Metaworld benchm
 
 ### Seeding a Benchmark Instance
 For the purposes of reproducibility, it may be important to you to seed your benchmark instance.
-For example, for the ML1 benchmark environment with the 'pick-place-v2' environment, you can do so in the following way:
+For example, for the ML1 benchmark environment with the 'pick-place-V3' environment, you can do so in the following way:
 ```python
 import metaworld
 
 SEED = 0  # some seed number here
-benchmark = metaworld.ML1('pick-place-v2', seed=SEED)
+benchmark = metaworld.ML1('pick-place-V3', seed=SEED)
 ```
 
 ### Running ML1 or MT1
@@ -91,9 +91,9 @@ import random
 
 print(metaworld.ML1.ENV_NAMES)  # Check out the available environments
 
-ml1 = metaworld.ML1('pick-place-v2') # Construct the benchmark, sampling tasks
+ml1 = metaworld.ML1('pick-place-V3') # Construct the benchmark, sampling tasks
 
-env = ml1.train_classes['pick-place-v2']()  # Create an environment with task `pick_place`
+env = ml1.train_classes['pick-place-V3']()  # Create an environment with task `pick_place`
 task = random.choice(ml1.train_tasks)
 env.set_task(task)  # Set task
 
@@ -152,15 +152,15 @@ environments respectively.
 
 You can access them in the following way:
 ```python
-from metaworld.envs import (ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE,
-                            ALL_V2_ENVIRONMENTS_GOAL_HIDDEN)
+from metaworld.envs import (ALL_V3_ENVIRONMENTS_GOAL_OBSERVABLE,
+                            ALL_V3_ENVIRONMENTS_GOAL_HIDDEN)
                             # these are ordered dicts where the key : value
                             # is env_name : env_constructor
 
 import numpy as np
 
-door_open_goal_observable_cls = ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE["door-open-v2-goal-observable"]
-door_open_goal_hidden_cls = ALL_V2_ENVIRONMENTS_GOAL_HIDDEN["door-open-v2-goal-hidden"]
+door_open_goal_observable_cls = ALL_V3_ENVIRONMENTS_GOAL_OBSERVABLE["door-open-V3-goal-observable"]
+door_open_goal_hidden_cls = ALL_V3_ENVIRONMENTS_GOAL_HIDDEN["door-open-V3-goal-hidden"]
 
 env = door_open_goal_hidden_cls()
 env.reset()  # Reset environment
@@ -171,15 +171,15 @@ assert (obs[-3:] == np.zeros(3)).all() # goal will be zeroed out because env is
 # You can choose to initialize the random seed of the environment.
 # The state of your rng will remain unaffected after the environment is constructed.
 env1 = door_open_goal_observable_cls(seed=5)
-env2 = door_open_goal_observable_cls(seed=5)
+enV3 = door_open_goal_observable_cls(seed=5)
 
 env1.reset()  # Reset environment
-env2.reset()
+enV3.reset()
 a1 = env1.action_space.sample()  # Sample an action
-a2 = env2.action_space.sample()
+a2 = enV3.action_space.sample()
 next_obs1, _, _, _ = env1.step(a1)  # Step the environment with the sampled random action
 
-next_obs2, _, _, _ = env2.step(a2)
+next_obs2, _, _, _ = enV3.step(a2)
 assert (next_obs1[-3:] == next_obs2[-3:]).all() # 2 envs initialized with the same seed will have the same goal
 assert not (next_obs2[-3:] == np.zeros(3)).all()   # The env's are goal observable, meaning the goal is not zero'd out
 

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -16,8 +16,6 @@ COPY . /usr/local/metaworld/
 WORKDIR /usr/local/metaworld/
 RUN free -g
 RUN pip install .[testing]
-RUN git clone https://github.com/reginald-mclean/Gymnasium.git
-RUN pip install -e Gymnasium
 
 
 ENTRYPOINT ["/usr/local/metaworld/docker/entrypoint"]
diff --git a/docs/index.md b/docs/index.md
@@ -25,9 +25,9 @@ import random
 
 print(metaworld.ML1.ENV_NAMES)  # Check out the available environments
 
-ml1 = metaworld.ML1('pick-place-v2') # Construct the benchmark, sampling tasks
+ml1 = metaworld.ML1('pick-place-V3') # Construct the benchmark, sampling tasks
 
-env = ml1.train_classes['pick-place-v2']()  # Create an environment with task `pick_place`
+env = ml1.train_classes['pick-place-V3']()  # Create an environment with task `pick_place`
 task = random.choice(ml1.train_tasks)
 env.set_task(task)  # Set task
 
@@ -41,6 +41,7 @@ obs, reward, terminate, truncate, info = env.step(a)
 :caption: Introduction
 
 introduction/basic_usage
+evaluation/evaluation
 installation/installation
 rendering/rendering
 usage/basic_usage

diff --git a/docs/introduction/basic_usage.md b/docs/introduction/basic_usage.md
@@ -7,145 +7,126 @@ firstpage:
 # Basic Usage
 
 ## Using the benchmark
-Here is a list of benchmark environments for meta-RL (ML*) and multi-task-RL (MT*):
-* [__ML1__](https://meta-world.github.io/figures/ml1.gif) is a meta-RL benchmark environment which tests few-shot adaptation to goal variation within single task. You can choose to test variation within any of [50 tasks](https://meta-world.github.io/figures/ml45-1080p.gif) for this benchmark.
-* [__ML10__](https://meta-world.github.io/figures/ml10.gif) is a meta-RL benchmark which tests few-shot adaptation to new tasks. It comprises 10 meta-train tasks, and 3 test tasks.
-* [__ML45__](https://meta-world.github.io/figures/ml45-1080p.gif) is a meta-RL benchmark which tests few-shot adaptation to new tasks. It comprises 45 meta-train tasks and 5 test tasks.
-* [__MT10__](https://meta-world.github.io/figures/mt10.gif), __MT1__, and __MT50__ are multi-task-RL benchmark environments for learning a multi-task policy that perform 10, 1, and 50 training tasks respectively. __MT1__ is similar to __ML1__ because you can choose to test variation within any of [50 tasks](https://meta-world.github.io/figures/ml45-1080p.gif) for this benchmark.  In the original Meta-World experiments, we augment MT10 and MT50 environment observations with a one-hot vector which identifies the task. We don't enforce how users utilize task one-hot vectors, however one solution would be to use a Gym wrapper such as [this one](https://github.com/rlworkgroup/garage/blob/master/src/garage/envs/multi_env_wrapper.py)
+There are 6 major benchmarks pre-packaged into Meta-World with support for making your own custom benchmarks. The benchmarks are divided into Multi-Task and Meta reinforcement learning benchmarks.
 
+### Multi-Task Benchmarks
+The MT1, MT10, and MT50 benchmarks are the Multi-Task Benchmarks. These benchmarks are used to learn a multi-task policy that can learn 1, 10, or 50 training tasks simultaneously. MT1 benchmarks can be created with any of the 50 tasks available in Meta-World.
+In the MT10 and MT50 benchmarks, the observations returned by the benchmark will come with one-hot task IDs appended to the state.
 
-### Basics
-We provide a `Benchmark` API, that allows constructing environments following the [`gymnasium.Env`](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/core.py#L21) interface.
+### Meta-Learning Benchmarks
+The ML1, ML10, and ML45 benchmarks are 3 meta-reinforcement learning benchmarks available in Meta-World. The ML1 benchmark can be used with any of the 50 tasks available in Meta-World.
+The ML1 benchmark tests for few-shot adaptation to goal variations within a single task. The ML10 and ML45 both test few-shot adaptation to new tasks. ML10 comprises 10 train tasks with 5 test tasks, while ML45 comprises of 45 training tasks with 5 test tasks.
 
-To use a `Benchmark`, first construct it (this samples the tasks allowed for one run of an algorithm on the benchmark).
-Then, construct at least one instance of each environment listed in `benchmark.train_classes` and `benchmark.test_classes`.
-For each of those environments, a task must be assigned to it using
-`env.set_task(task)` from `benchmark.train_tasks` and `benchmark.test_tasks`,
-respectively.
-`Tasks` can only be assigned to environments which have a key in
-`benchmark.train_classes` or `benchmark.test_classes` matching `task.env_name`.
 
-
-### Seeding a Benchmark Instance
-For the purposes of reproducibility, it may be important to you to seed your benchmark instance.
-For example, for the ML1 benchmark environment with the 'pick-place-v2' environment, you can do so in the following way:
+### MT1
 ```python
+import gymnasium as gym
 import metaworld
 
 SEED = 0  # some seed number here
-benchmark = metaworld.ML1('pick-place-v2', seed=SEED)
+env = gym.make('Meta-World/MT1-reach', seed=seed)
+obs, info = env.reset()
+
+a = env.action_space.sample() # randomly sample an action
+obs, reward, truncate, terminate, info = env.step(a) # apply the randomly sampled action
+```
+
+### MT10
+MT10 has two different versions that can be returned by gym.make. The first version is the synchronous version of the benchmark where all environments are contained within the same process.
+For users with limited compute resources, the synchronous option needs the least resources.
+```python
+import gymnasium as gym
+import metaworld
+
+seed = 42
+
+envs = gym.make('Meta-World/MT10-sync', seed=seed) # this returns a Synchronous Vector Environment with 10 environments
+
+obs, info = envs.reset() # reset all 10 environments
+
+a = env.action_space.sample() # sample an action for each environment
+
+obs, reward, truncate, terminate, info = envs.step(a) # step all 10 environments
+```
+Alternatively, for users with more compute we also provide the asynchronous version of the MT10 benchmark where each environment is isolated in it's own process and must use inter-process messaging via pipes to communicate.
+
+```python
+envs = gym.make('Meta-World/MT10-async', seed=seed) # this returns an Asynchronous Vector Environment with 10 environments
+```
+
+### MT50
+MT50 also contains two different versions, a synchronous and an asynchronous version, of the environments.
+```python
+import gymnasium as gym
+import metaworld
+
+seed = 42
+
+envs = gym.make('Meta-World/MT50-sync', seed=seed) # this returns a Synchronous Vector Environment with 50 environments
+
+obs, info = envs.reset() # reset all 50 environments
+
+a = env.action_space.sample() # sample an action for each environment
+
+obs, reward, truncate, terminate, info = envs.step(a) # step all 50 environments
 ```
 
-### Running ML1 or MT1
 ```python
+envs = gym.make('Meta-World/MT50-async', seed=seed) # this returns an Asynchronous Vector Environment with 50 environments
+```
+
+
+## Meta-Learning Benchmarks
+Each Meta-reinforcement learning benchmark has training and testing environments. These environments must be created separately as follows.
+
+### ML1
+```python
+import gymnasium as gym
 import metaworld
-import random
 
-print(metaworld.ML1.ENV_NAMES)  # Check out the available environments
+seed = 42
 
-ml1 = metaworld.ML1('pick-place-v2') # Construct the benchmark, sampling tasks
+train_envs = gym.make('Meta-World/ML1-train-reach-V3', seed=seed)
+test_envs = gym.make('Meta-World/ML1-test-reach-V3', seed=seed)
 
-env = ml1.train_classes['pick-place-v2']()  # Create an environment with task `pick_place`
-task = random.choice(ml1.train_tasks)
-env.set_task(task)  # Set task
+# training procedure use train_envs
+# testing procedure use test_envs
 
-obs = env.reset()  # Reset environment
-a = env.action_space.sample()  # Sample an action
-obs, reward, terminate, truncate, info = env.step(a)  # Step the environment with the sampled random action
 ```
-__MT1__ can be run the same way except that it does not contain any `test_tasks`
 
 
-### Running a benchmark
-Create an environment with train tasks (ML10, MT10, ML45, or MT50):
+### ML10
+Similar to the Multi-Task benchmarks, the ML10 and ML45 environments can be run in synchronous or asynchronous modes.
+
+
 ```python
+import gymnasium as gym
 import metaworld
-import random
-
-ml10 = metaworld.ML10() # Construct the benchmark, sampling tasks
-
-training_envs = []
-for name, env_cls in ml10.train_classes.items():
-  env = env_cls()
-  task = random.choice([task for task in ml10.train_tasks
-                        if task.env_name == name])
-  env.set_task(task)
-  training_envs.append(env)
-
-for env in training_envs:
-  obs = env.reset()  # Reset environment
-  a = env.action_space.sample()  # Sample an action
-  obs, reward, terminate, truncate, info = env.step(a)  # Step the environment with the sampled random action
+train_envs = gym.make('Meta-World/ML10-train-sync', seed=seed) # or ML10-train-async
+test_envs = gym.make('Meta-World/ML10-test-sync', seed=seed) # or ML10-test-async
 ```
-Create an environment with test tasks (this only works for ML10 and ML45, since MT10 and MT50 don't have a separate set of test tasks):
+
+
+### ML45
 ```python
+import gymnasium as gym
 import metaworld
-import random
-
-ml10 = metaworld.ML10() # Construct the benchmark, sampling tasks
-
-testing_envs = []
-for name, env_cls in ml10.test_classes.items():
-  env = env_cls()
-  task = random.choice([task for task in ml10.test_tasks
-                        if task.env_name == name])
-  env.set_task(task)
-  testing_envs.append(env)
-
-for env in testing_envs:
-  obs = env.reset()  # Reset environment
-  a = env.action_space.sample()  # Sample an action
-  obs, reward, terminate, truncate, info = env.step(a)  # Step the environment with the sampled random action
+
+train_envs = gym.make('Meta-World/ML45-train-sync', seed=seed) # or ML45-train-async
+test_envs = gym.make('Meta-World/ML45-test-sync', seed=seed) # or ML45-test-async
 ```
 
-## Accessing Single Goal Environments
-You may wish to only access individual environments used in the Meta-World benchmark for your research.
-We provide constructors for creating environments where the goal has been hidden (by zeroing out the goal in
-the observation) and environments where the goal is observable. They are called GoalHidden and GoalObservable
-environments respectively.
 
-You can access them in the following way:
+## Custom Benchmarks
+Finally, we also provide support for creating custom benchmarks by combining any number of Meta-World environments.
+
+The prefix 'mt' will return environments that are goal observable for Multi-Task reinforcement learning, while the prefix 'ml' will return environments that are partially observable for Meta-reinforcement learning.
+Like the included MT and ML benchmarks, these environments can also be run in synchronous or asynchronous mode.
+In order to create a custom benchmark, the user must provide a list of environment names with the suffix '-V3'.
+
 ```python
-from metaworld.envs import (ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE,
-                            ALL_V2_ENVIRONMENTS_GOAL_HIDDEN)
-                            # these are ordered dicts where the key : value
-                            # is env_name : env_constructor
-
-import numpy as np
-
-door_open_goal_observable_cls = ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE["door-open-v2-goal-observable"]
-door_open_goal_hidden_cls = ALL_V2_ENVIRONMENTS_GOAL_HIDDEN["door-open-v2-goal-hidden"]
-
-env = door_open_goal_hidden_cls()
-env.reset()  # Reset environment
-a = env.action_space.sample()  # Sample an action
-obs, reward, terminate, truncate, info = env.step(a)  # Step the environment with the sampled random action
-assert (obs[-3:] == np.zeros(3)).all() # goal will be zeroed out because env is HiddenGoal
-
-# You can choose to initialize the random seed of the environment.
-# The state of your rng will remain unaffected after the environment is constructed.
-env1 = door_open_goal_observable_cls(seed=5)
-env2 = door_open_goal_observable_cls(seed=5)
-
-env1.reset()  # Reset environment
-env2.reset()
-a1 = env1.action_space.sample()  # Sample an action
-a2 = env2.action_space.sample()
-next_obs1, _, _, _, _ = env1.step(a1)  # Step the environment with the sampled random action
-
-next_obs2, _, _, _ = env2.step(a2)
-assert (next_obs1[-3:] == next_obs2[-3:]).all() # 2 envs initialized with the same seed will have the same goal
-assert not (next_obs2[-3:] == np.zeros(3)).all()   # The env's are goal observable, meaning the goal is not zero'd out
-
-env3 = door_open_goal_observable_cls(seed=10)  # Construct an environment with a different seed
-env1.reset()  # Reset environment
-env3.reset()
-a1 = env1.action_space.sample()  # Sample an action
-a3 = env3.action_space.sample()
-next_obs1, _, _, _, _ = env1.step(a1)  # Step the environment with the sampled random action
-next_obs3, _, _, _, _ = env3.step(a3)
-
-assert not (next_obs1[-3:] == next_obs3[-3:]).all() # 2 envs initialized with different seeds will have different goals
-assert not (next_obs1[-3:] == np.zeros(3)).all()   # The env's are goal observable, meaning the goal is not zero'd out
+import gymnasium as gym
+import metaworld
 
+envs = gym.make('Meta-World/mt-custom-sync', envs_list=['env_name_1-V3', 'env_name_2-V3', 'env_name_3-V3'], seed=seed)
 ```