před 2 roky · 06938c5dc9
--- a/gym_minigrid/envs/blockedunlockpickup.py
+++ b/gym_minigrid/envs/blockedunlockpickup.py
@@ -3,9 +3,60 @@ from gym_minigrid.roomgrid import RoomGrid
 
				 
			
 
				 
			
 
				 class BlockedUnlockPickupEnv(RoomGrid):
			
 
				+
			
 
				     """
			
 
				-    Unlock a door blocked by a ball, then pick up a box
			
 
				-    in another room
			
 
				+    ### Description
			
 
				+
			
 
				+    The agent has to pick up a box which is placed in another room, behind a
			
 
				+    locked door. The door is also blocked by a ball which the agent has to move
			
 
				+    before it can unlock the door. Hence, the agent has to learn to move the
			
 
				+    ball, pick up the key, open the door and pick up the object in the other
			
 
				+    room. This environment can be solved without relying on language.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "pick up the {color} {type}"
			
 
				+
			
 
				+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				+    "yellow" or "grey".
			
 
				+
			
 
				+    {type} is the type of the object. Can be "box" or "key".
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action            |
			
 
				+    |-----|--------------|-------------------|
			
 
				+    | 0   | left         | Turn left         |
			
 
				+    | 1   | right        | Turn right        |
			
 
				+    | 2   | forward      | Move forward      |
			
 
				+    | 3   | pickup       | Pick up an object |
			
 
				+    | 4   | drop         | Unused            |
			
 
				+    | 5   | toggle       | Unused            |
			
 
				+    | 6   | done         | Unused            |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent picks up the correct box.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-BlockedUnlockPickup-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, **kwargs):
			
--- a/gym_minigrid/envs/crossing.py
+++ b/gym_minigrid/envs/crossing.py
@@ -6,8 +6,77 @@ from gym_minigrid.minigrid import Goal, Grid, Lava, MiniGridEnv, MissionSpace
 
				 
			
 
				 
			
 
				 class CrossingEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Environment with wall or lava obstacles, sparse reward.
			
 
				+    ### Description
			
 
				+
			
 
				+    Depending on the `obstacle_type` parameter:
			
 
				+    - `Lava` - The agent has to reach the green goal square on the other corner
			
 
				+        of the room while avoiding rivers of deadly lava which terminate the
			
 
				+        episode in failure. Each lava stream runs across the room either
			
 
				+        horizontally or vertically, and has a single crossing point which can be
			
 
				+        safely used; Luckily, a path to the goal is guaranteed to exist. This
			
 
				+        environment is useful for studying safety and safe exploration.
			
 
				+    - otherwise - Similar to the `LavaCrossing` environment, the agent has to
			
 
				+        reach the green goal square on the other corner of the room, however
			
 
				+        lava is replaced by walls. This MDP is therefore much easier and maybe
			
 
				+        useful for quickly testing your algorithms.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+    Depending on the `obstacle_type` parameter:
			
 
				+    - `Lava` - "avoid the lava and get to the green goal square"
			
 
				+    - otherwise - "find the opening and get to the green goal square"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action       |
			
 
				+    |-----|--------------|--------------|
			
 
				+    | 0   | left         | Turn left    |
			
 
				+    | 1   | right        | Turn right   |
			
 
				+    | 2   | forward      | Move forward |
			
 
				+    | 3   | pickup       | Unused       |
			
 
				+    | 4   | drop         | Unused       |
			
 
				+    | 5   | toggle       | Unused       |
			
 
				+    | 6   | done         | Unused       |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. The agent falls into lava.
			
 
				+    3. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    S: size of the map SxS.
			
 
				+    N: number of valid crossings across lava or walls from the starting position
			
 
				+    to the goal
			
 
				+
			
 
				+    - `Lava` :
			
 
				+        - `MiniGrid-LavaCrossingS9N1-v0`
			
 
				+        - `MiniGrid-LavaCrossingS9N2-v0`
			
 
				+        - `MiniGrid-LavaCrossingS9N3-v0`
			
 
				+        - `MiniGrid-LavaCrossingS11N5-v0`
			
 
				+
			
 
				+    - otherwise :
			
 
				+        - `MiniGrid-SimpleCrossingS9N1-v0`
			
 
				+        - `MiniGrid-SimpleCrossingS9N2-v0`
			
 
				+        - `MiniGrid-SimpleCrossingS9N3-v0`
			
 
				+        - `MiniGrid-SimpleCrossingS11N5-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, **kwargs):
			
--- a/gym_minigrid/envs/distshift.py
+++ b/gym_minigrid/envs/distshift.py
@@ -2,8 +2,59 @@ from gym_minigrid.minigrid import Goal, Grid, Lava, MiniGridEnv, MissionSpace
 
				 
			
 
				 
			
 
				 class DistShiftEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Distributional shift environment.
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment is based on one of the DeepMind [AI safety gridworlds]
			
 
				+    (https://github.com/deepmind/ai-safety-gridworlds). The agent starts in the
			
 
				+    top-left corner and must reach the goal which is in the top-right corner,
			
 
				+    but has to avoid stepping into lava on its way. The aim of this environment
			
 
				+    is to test an agent's ability to generalize. There are two slightly
			
 
				+    different variants of the environment, so that the agent can be trained on
			
 
				+    one variant and tested on the other.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "get to the green goal square"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action       |
			
 
				+    |-----|--------------|--------------|
			
 
				+    | 0   | left         | Turn left    |
			
 
				+    | 1   | right        | Turn right   |
			
 
				+    | 2   | forward      | Move forward |
			
 
				+    | 3   | pickup       | Unused       |
			
 
				+    | 4   | drop         | Unused       |
			
 
				+    | 5   | toggle       | Unused       |
			
 
				+    | 6   | done         | Unused       |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. The agent falls into lava.
			
 
				+    3. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-DistShift1-v0`
			
 
				+    - `MiniGrid-DistShift2-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(
			
--- a/gym_minigrid/envs/doorkey.py
+++ b/gym_minigrid/envs/doorkey.py
@@ -2,8 +2,57 @@ from gym_minigrid.minigrid import Door, Goal, Grid, Key, MiniGridEnv, MissionSpa
 
				 
			
 
				 
			
 
				 class DoorKeyEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Environment with a door and key, sparse reward
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment has a key that the agent must pick up in order to unlock a
			
 
				+    goal and then get to the green goal square. This environment is difficult,
			
 
				+    because of the sparse reward, to solve using classical RL algorithms. It is
			
 
				+    useful to experiment with curiosity or curriculum learning.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "use the key to open the door and then get to the goal"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action                    |
			
 
				+    |-----|--------------|---------------------------|
			
 
				+    | 0   | left         | Turn left                 |
			
 
				+    | 1   | right        | Turn right                |
			
 
				+    | 2   | forward      | Move forward              |
			
 
				+    | 3   | pickup       | Pick up an object         |
			
 
				+    | 4   | drop         | Unused                    |
			
 
				+    | 5   | toggle       | Toggle/activate an object |
			
 
				+    | 6   | done         | Unused                    |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-DoorKey-5x5-v0`
			
 
				+    - `MiniGrid-DoorKey-6x6-v0`
			
 
				+    - `MiniGrid-DoorKey-8x8-v0`
			
 
				+    - `MiniGrid-DoorKey-16x16-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=8, **kwargs):
			
--- a/gym_minigrid/envs/dynamicobstacles.py
+++ b/gym_minigrid/envs/dynamicobstacles.py
@@ -7,7 +7,61 @@ from gym_minigrid.minigrid import Ball, Goal, Grid, MiniGridEnv, MissionSpace
 
				 
			
 
				 class DynamicObstaclesEnv(MiniGridEnv):
			
 
				     """
			
 
				-    Single-room square grid environment with moving obstacles
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment is an empty room with moving obstacles.
			
 
				+    The goal of the agent is to reach the green goal square without colliding
			
 
				+    with any obstacle. A large penalty is subtracted if the agent collides with
			
 
				+    an obstacle and the episode finishes. This environment is useful to test
			
 
				+    Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in
			
 
				+    Partial Observability.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "get to the green goal square"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action       |
			
 
				+    |-----|--------------|--------------|
			
 
				+    | 0   | left         | Turn left    |
			
 
				+    | 1   | right        | Turn right   |
			
 
				+    | 2   | forward      | Move forward |
			
 
				+    | 3   | pickup       | Unused       |
			
 
				+    | 4   | drop         | Unused       |
			
 
				+    | 5   | toggle       | Unused       |
			
 
				+    | 6   | done         | Unused       |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure. A '-1' penalty is
			
 
				+    subtracted if the agent collides with an obstacle.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. The agent collides with an obstacle.
			
 
				+    3. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-Dynamic-Obstacles-5x5-v0`
			
 
				+    - `MiniGrid-Dynamic-Obstacles-Random-5x5-v0`
			
 
				+    - `MiniGrid-Dynamic-Obstacles-6x6-v0`
			
 
				+    - `MiniGrid-Dynamic-Obstacles-Random-6x6-v0`
			
 
				+    - `MiniGrid-Dynamic-Obstacles-8x8-v0`
			
 
				+    - `MiniGrid-Dynamic-Obstacles-16x16-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(
			
--- a/gym_minigrid/envs/empty.py
+++ b/gym_minigrid/envs/empty.py
@@ -2,8 +2,63 @@ from gym_minigrid.minigrid import Goal, Grid, MiniGridEnv, MissionSpace
 
				 
			
 
				 
			
 
				 class EmptyEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Empty grid environment, no obstacles, sparse reward
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment is an empty room, and the goal of the agent is to reach the
			
 
				+    green goal square, which provides a sparse reward. A small penalty is
			
 
				+    subtracted for the number of steps to reach the goal. This environment is
			
 
				+    useful, with small rooms, to validate that your RL algorithm works
			
 
				+    correctly, and with large rooms to experiment with sparse rewards and
			
 
				+    exploration. The random variants of the environment have the agent starting
			
 
				+    at a random position for each episode, while the regular variants have the
			
 
				+    agent always starting in the corner opposite to the goal.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "get to the green goal square"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action       |
			
 
				+    |-----|--------------|--------------|
			
 
				+    | 0   | left         | Turn left    |
			
 
				+    | 1   | right        | Turn right   |
			
 
				+    | 2   | forward      | Move forward |
			
 
				+    | 3   | pickup       | Unused       |
			
 
				+    | 4   | drop         | Unused       |
			
 
				+    | 5   | toggle       | Unused       |
			
 
				+    | 6   | done         | Unused       |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-Empty-5x5-v0`
			
 
				+    - `MiniGrid-Empty-Random-5x5-v0`
			
 
				+    - `MiniGrid-Empty-6x6-v0`
			
 
				+    - `MiniGrid-Empty-Random-6x6-v0`
			
 
				+    - `MiniGrid-Empty-8x8-v0`
			
 
				+    - `MiniGrid-Empty-16x16-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=8, agent_start_pos=(1, 1), agent_start_dir=0, **kwargs):
			
--- a/gym_minigrid/envs/fetch.py
+++ b/gym_minigrid/envs/fetch.py
@@ -9,9 +9,67 @@ from gym_minigrid.minigrid import (
 
				 
			
 
				 
			
 
				 class FetchEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Environment in which the agent has to fetch a random object
			
 
				-    named using English text strings
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment has multiple objects of assorted types and colors. The
			
 
				+    agent receives a textual string as part of its observation telling it which
			
 
				+    object to pick up. Picking up the wrong object terminates the episode with
			
 
				+    zero reward.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "{syntax} {color} {type}"
			
 
				+
			
 
				+    {syntax} is one of the following: "get a", "go get a", "fetch a",
			
 
				+    "go fetch a", "you must fetch a".
			
 
				+
			
 
				+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				+    "yellow" or "grey".
			
 
				+
			
 
				+    {type} is the type of the object. Can be "key" or "ball".
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action               |
			
 
				+    |-----|--------------|----------------------|
			
 
				+    | 0   | left         | Turn left            |
			
 
				+    | 1   | right        | Turn right           |
			
 
				+    | 2   | forward      | Move forward         |
			
 
				+    | 3   | pickup       | Pick up an object    |
			
 
				+    | 4   | drop         | Unused               |
			
 
				+    | 5   | toggle       | Unused               |
			
 
				+    | 6   | done         | Unused               |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent picks up the correct object.
			
 
				+    2. The agent picks up the wrong object.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    N: number of objects to be generated.
			
 
				+
			
 
				+    - `MiniGrid-Fetch-5x5-N2-v0`
			
 
				+    - `MiniGrid-Fetch-6x6-N2-v0`
			
 
				+    - `MiniGrid-Fetch-8x8-N3-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=8, numObjs=3, **kwargs):
			
--- a/gym_minigrid/envs/fourrooms.py
+++ b/gym_minigrid/envs/fourrooms.py
@@ -2,9 +2,54 @@ from gym_minigrid.minigrid import Goal, Grid, MiniGridEnv, MissionSpace
 
				 
			
 
				 
			
 
				 class FourRoomsEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Classic 4 rooms gridworld environment.
			
 
				-    Can specify agent and goal position, if not it set at random.
			
 
				+    ### Description
			
 
				+
			
 
				+    Classic four room reinforcement learning environment. The agent must
			
 
				+    navigate in a maze composed of four rooms interconnected by 4 gaps in the
			
 
				+    walls. To obtain a reward, the agent must reach the green goal square. Both
			
 
				+    the agent and the goal square are randomly placed in any of the four rooms.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "reach the goal"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action       |
			
 
				+    |-----|--------------|--------------|
			
 
				+    | 0   | left         | Turn left    |
			
 
				+    | 1   | right        | Turn right   |
			
 
				+    | 2   | forward      | Move forward |
			
 
				+    | 3   | pickup       | Unused       |
			
 
				+    | 4   | drop         | Unused       |
			
 
				+    | 5   | toggle       | Unused       |
			
 
				+    | 6   | done         | Unused       |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-FourRooms-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, agent_pos=None, goal_pos=None, **kwargs):
			
--- a/gym_minigrid/envs/gotodoor.py
+++ b/gym_minigrid/envs/gotodoor.py
@@ -2,9 +2,60 @@ from gym_minigrid.minigrid import COLOR_NAMES, Door, Grid, MiniGridEnv, MissionS
 
				 
			
 
				 
			
 
				 class GoToDoorEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Environment in which the agent is instructed to go to a given object
			
 
				-    named using an English text string
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment is a room with four doors, one on each wall. The agent
			
 
				+    receives a textual (mission) string as input, telling it which door to go
			
 
				+    to, (eg: "go to the red door"). It receives a positive reward for performing
			
 
				+    the `done` action next to the correct door, as indicated in the mission
			
 
				+    string.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "go to the {color} door"
			
 
				+
			
 
				+    {color} is the color of the door. Can be "red", "green", "blue", "purple",
			
 
				+    "yellow" or "grey".
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action               |
			
 
				+    |-----|--------------|----------------------|
			
 
				+    | 0   | left         | Turn left            |
			
 
				+    | 1   | right        | Turn right           |
			
 
				+    | 2   | forward      | Move forward         |
			
 
				+    | 3   | pickup       | Unused               |
			
 
				+    | 4   | drop         | Unused               |
			
 
				+    | 5   | toggle       | Unused               |
			
 
				+    | 6   | done         | Done completing task |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent stands next the correct door performing the `done` action.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-GoToDoor-5x5-v0`
			
 
				+    - `MiniGrid-GoToDoor-6x6-v0`
			
 
				+    - `MiniGrid-GoToDoor-8x8-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=5, **kwargs):
			
--- a/gym_minigrid/envs/keycorridor.py
+++ b/gym_minigrid/envs/keycorridor.py
@@ -3,9 +3,70 @@ from gym_minigrid.roomgrid import RoomGrid
 
				 
			
 
				 
			
 
				 class KeyCorridorEnv(RoomGrid):
			
 
				+
			
 
				     """
			
 
				-    A ball is behind a locked door, the key is placed in a
			
 
				-    random room.
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment is similar to the locked room environment, but there are
			
 
				+    multiple registered environment configurations of increasing size,
			
 
				+    making it easier to use curriculum learning to train an agent to solve it.
			
 
				+    The agent has to pick up an object which is behind a locked door. The key is
			
 
				+    hidden in another room, and the agent has to explore the environment to find
			
 
				+    it. The mission string does not give the agent any clues as to where the
			
 
				+    key is placed. This environment can be solved without relying on language.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "pick up the {color} {obj_type}"
			
 
				+
			
 
				+    {color} is the color of the object. Can be "red", "green", "blue", "purple",
			
 
				+    "yellow" or "grey".
			
 
				+
			
 
				+    {type} is the type of the object. Can be "ball" or "key".
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action            |
			
 
				+    |-----|--------------|-------------------|
			
 
				+    | 0   | left         | Turn left         |
			
 
				+    | 1   | right        | Turn right        |
			
 
				+    | 2   | forward      | Move forward      |
			
 
				+    | 3   | pickup       | Pick up an object |
			
 
				+    | 4   | drop         | Unused            |
			
 
				+    | 5   | toggle       | Unused            |
			
 
				+    | 6   | done         | Unused            |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent picks up the correct object.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    S: room size.
			
 
				+    R: Number of rows.
			
 
				+
			
 
				+    - `MiniGrid-KeyCorridorS3R1-v0`
			
 
				+    - `MiniGrid-KeyCorridorS3R2-v0`
			
 
				+    - `MiniGrid-KeyCorridorS3R3-v0`
			
 
				+    - `MiniGrid-KeyCorridorS4R3-v0`
			
 
				+    - `MiniGrid-KeyCorridorS5R3-v0`
			
 
				+    - `MiniGrid-KeyCorridorS6R3-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, num_rows=3, obj_type="ball", room_size=6, **kwargs):
			
--- a/gym_minigrid/envs/lavagap.py
+++ b/gym_minigrid/envs/lavagap.py
@@ -4,9 +4,61 @@ from gym_minigrid.minigrid import Goal, Grid, Lava, MiniGridEnv, MissionSpace
 
				 
			
 
				 
			
 
				 class LavaGapEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Environment with one wall of lava with a small gap to cross through
			
 
				-    This environment is similar to LavaCrossing but simpler in structure.
			
 
				+    ### Description
			
 
				+
			
 
				+    The agent has to reach the green goal square at the opposite corner of the
			
 
				+    room, and must pass through a narrow gap in a vertical strip of deadly lava.
			
 
				+    Touching the lava terminate the episode with a zero reward. This environment
			
 
				+    is useful for studying safety and safe exploration.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    Depending on the `obstacle_type` parameter:
			
 
				+    - `Lava`: "avoid the lava and get to the green goal square"
			
 
				+    - otherwise: "find the opening and get to the green goal square"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action       |
			
 
				+    |-----|--------------|--------------|
			
 
				+    | 0   | left         | Turn left    |
			
 
				+    | 1   | right        | Turn right   |
			
 
				+    | 2   | forward      | Move forward |
			
 
				+    | 3   | pickup       | Unused       |
			
 
				+    | 4   | drop         | Unused       |
			
 
				+    | 5   | toggle       | Unused       |
			
 
				+    | 6   | done         | Unused       |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. The agent falls into lava.
			
 
				+    3. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    S: size of map SxS.
			
 
				+
			
 
				+    - `MiniGrid-LavaGapS5-v0`
			
 
				+    - `MiniGrid-LavaGapS6-v0`
			
 
				+    - `MiniGrid-LavaGapS7-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size, obstacle_type=Lava, **kwargs):
			
--- a/gym_minigrid/envs/lockedroom.py
+++ b/gym_minigrid/envs/lockedroom.py
@@ -25,9 +25,58 @@ class LockedRoom:
 
				 
			
 
				 
			
 
				 class LockedRoomEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Environment in which the agent is instructed to go to a given object
			
 
				-    named using an English text string
			
 
				+    ### Description
			
 
				+
			
 
				+    The environment has six rooms, one of which is locked. The agent receives
			
 
				+    a textual mission string as input, telling it which room to go to in order
			
 
				+    to get the key that opens the locked room. It then has to go into the locked
			
 
				+    room in order to reach the final goal. This environment is extremely
			
 
				+    difficult to solve with vanilla reinforcement learning alone.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
			
 
				+
			
 
				+    {lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
			
 
				+    "blue", "purple", "yellow" or "grey".
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action                    |
			
 
				+    |-----|--------------|---------------------------|
			
 
				+    | 0   | left         | Turn left                 |
			
 
				+    | 1   | right        | Turn right                |
			
 
				+    | 2   | forward      | Move forward              |
			
 
				+    | 3   | pickup       | Pick up an object         |
			
 
				+    | 4   | drop         | Unused                    |
			
 
				+    | 5   | toggle       | Toggle/activate an object |
			
 
				+    | 6   | done         | Unused                    |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-LockedRoom-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=19, **kwargs):
			
--- a/gym_minigrid/envs/memory.py
+++ b/gym_minigrid/envs/memory.py
@@ -4,13 +4,61 @@ from gym_minigrid.minigrid import Ball, Grid, Key, MiniGridEnv, MissionSpace, Wa
 
				 
			
 
				 
			
 
				 class MemoryEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    This environment is a memory test. The agent starts in a small room
			
 
				-    where it sees an object. It then has to go through a narrow hallway
			
 
				-    which ends in a split. At each end of the split there is an object,
			
 
				-    one of which is the same as the object in the starting room. The
			
 
				-    agent has to remember the initial object, and go to the matching
			
 
				-    object at split.
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment is a memory test. The agent starts in a small room where it
			
 
				+    sees an object. It then has to go through a narrow hallway which ends in a
			
 
				+    split. At each end of the split there is an object, one of which is the same
			
 
				+    as the object in the starting room. The agent has to remember the initial
			
 
				+    object, and go to the matching object at split.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "go to the matching object at the end of the hallway"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action                    |
			
 
				+    |-----|--------------|---------------------------|
			
 
				+    | 0   | left         | Turn left                 |
			
 
				+    | 1   | right        | Turn right                |
			
 
				+    | 2   | forward      | Move forward              |
			
 
				+    | 3   | pickup       | Pick up an object         |
			
 
				+    | 4   | drop         | Unused                    |
			
 
				+    | 5   | toggle       | Toggle/activate an object |
			
 
				+    | 6   | done         | Unused                    |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the correct matching object.
			
 
				+    2. The agent reaches the wrong matching object.
			
 
				+    3. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    S: size of map SxS.
			
 
				+
			
 
				+    - `MiniGrid-MemoryS17Random-v0`
			
 
				+    - `MiniGrid-MemoryS13Random-v0`
			
 
				+    - `MiniGrid-MemoryS13-v0`
			
 
				+    - `MiniGrid-MemoryS11-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=8, random_length=False, **kwargs):
			
--- a/gym_minigrid/envs/multiroom.py
+++ b/gym_minigrid/envs/multiroom.py
@@ -18,8 +18,60 @@ class MultiRoom:
 
				 
			
 
				 
			
 
				 class MultiRoomEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Environment with multiple rooms (subgoals)
			
 
				+    ### Description
			
 
				+
			
 
				+    This environment has a series of connected rooms with doors that must be
			
 
				+    opened in order to get to the next room. The final room has the green goal
			
 
				+    square the agent must get to. This environment is extremely difficult to
			
 
				+    solve using RL alone. However, by gradually increasing the number of rooms
			
 
				+    and building a curriculum, the environment can be solved.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "traverse the rooms to get to the goal"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action                    |
			
 
				+    |-----|--------------|---------------------------|
			
 
				+    | 0   | left         | Turn left                 |
			
 
				+    | 1   | right        | Turn right                |
			
 
				+    | 2   | forward      | Move forward              |
			
 
				+    | 3   | pickup       | Unused                    |
			
 
				+    | 4   | drop         | Unused                    |
			
 
				+    | 5   | toggle       | Toggle/activate an object |
			
 
				+    | 6   | done         | Unused                    |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent reaches the goal.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    S: size of map SxS.
			
 
				+    N: number of rooms.
			
 
				+
			
 
				+    - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
			
 
				+    - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
			
 
				+    - `MiniGrid-MultiRoom-N6-v0` (six rooms)
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, minNumRooms, maxNumRooms, maxRoomSize=10, **kwargs):
			
--- a/gym_minigrid/envs/obstructedmaze.py
+++ b/gym_minigrid/envs/obstructedmaze.py
@@ -3,9 +3,68 @@ from gym_minigrid.roomgrid import RoomGrid
 
				 
			
 
				 
			
 
				 class ObstructedMazeEnv(RoomGrid):
			
 
				+
			
 
				     """
			
 
				-    A blue ball is hidden in the maze. Doors may be locked,
			
 
				-    doors may be obstructed by a ball and keys may be hidden in boxes.
			
 
				+    ### Description
			
 
				+
			
 
				+    The agent has to pick up a box which is placed in a corner of a 3x3 maze.
			
 
				+    The doors are locked, the keys are hidden in boxes and doors are obstructed
			
 
				+    by balls. This environment can be solved without relying on language.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "pick up the {COLOR_NAMES[0]} ball"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action                    |
			
 
				+    |-----|--------------|---------------------------|
			
 
				+    | 0   | left         | Turn left                 |
			
 
				+    | 1   | right        | Turn right                |
			
 
				+    | 2   | forward      | Move forward              |
			
 
				+    | 3   | pickup       | Pick up an object         |
			
 
				+    | 4   | drop         | Unused                    |
			
 
				+    | 5   | toggle       | Toggle/activate an object |
			
 
				+    | 6   | done         | Unused                    |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent picks up the blue ball.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    "NDl" are the number of doors locked.
			
 
				+    "h" if the key is hidden in a box.
			
 
				+    "b" if the door is obstructed by a ball.
			
 
				+    "Q" number of quarters that will have doors and keys out of the 9 that the
			
 
				+    map already has.
			
 
				+    "Full" 3x3 maze with "h" and "b" options.
			
 
				+
			
 
				+    - `MiniGrid-ObstructedMaze-1Dl-v0`
			
 
				+    - `MiniGrid-ObstructedMaze-1Dlh-v0`
			
 
				+    - `MiniGrid-ObstructedMaze-1Dlhb-v0`
			
 
				+    - `MiniGrid-ObstructedMaze-2Dl-v0`
			
 
				+    - `MiniGrid-ObstructedMaze-2Dlh-v0`
			
 
				+    - `MiniGrid-ObstructedMaze-2Dlhb-v0`
			
 
				+    - `MiniGrid-ObstructedMaze-1Q-v0`
			
 
				+    - `MiniGrid-ObstructedMaze-2Q-v0`
			
 
				+    - `MiniGrid-ObstructedMaze-Full-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, num_rows, num_cols, num_rooms_visited, **kwargs):
			
--- a/gym_minigrid/envs/putnear.py
+++ b/gym_minigrid/envs/putnear.py
@@ -10,9 +10,63 @@ from gym_minigrid.minigrid import (
 
				 
			
 
				 
			
 
				 class PutNearEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Environment in which the agent is instructed to place an object near
			
 
				-    another object through a natural language string.
			
 
				+    ### Description
			
 
				+
			
 
				+    The agent is instructed through a textual string to pick up an object and
			
 
				+    place it next to another object. This environment is easy to solve with two
			
 
				+    objects, but difficult to solve with more, as it involves both textual
			
 
				+    understanding and spatial reasoning involving multiple objects.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "put the {move_color} {move_type} near the {target_color} {target_type}"
			
 
				+
			
 
				+    {move_color} and {target_color} can be "red", "green", "blue", "purple",
			
 
				+    "yellow" or "grey".
			
 
				+
			
 
				+    {move_type} and {target_type} Can be "box", "ball" or "key".
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action            |
			
 
				+    |-----|--------------|-------------------|
			
 
				+    | 0   | left         | Turn left         |
			
 
				+    | 1   | right        | Turn right        |
			
 
				+    | 2   | forward      | Move forward      |
			
 
				+    | 3   | pickup       | Pick up an object |
			
 
				+    | 4   | drop         | Drop an object    |
			
 
				+    | 5   | toggle       | Unused            |
			
 
				+    | 6   | done         | Unused            |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent picks up the wrong object.
			
 
				+    2. The agent drop the correct object near the target.
			
 
				+    3. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    N: number of objects.
			
 
				+
			
 
				+    - `MiniGrid-PutNear-6x6-N2-v0`
			
 
				+    - `MiniGrid-PutNear-8x8-N3-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=6, numObjs=2, **kwargs):
			
--- a/gym_minigrid/envs/redbluedoors.py
+++ b/gym_minigrid/envs/redbluedoors.py
@@ -2,10 +2,56 @@ from gym_minigrid.minigrid import Door, Grid, MiniGridEnv, MissionSpace
 
				 
			
 
				 
			
 
				 class RedBlueDoorEnv(MiniGridEnv):
			
 
				+
			
 
				     """
			
 
				-    Single room with red and blue doors on opposite sides.
			
 
				-    The red door must be opened before the blue door to
			
 
				-    obtain a reward.
			
 
				+    ### Description
			
 
				+
			
 
				+    The agent is randomly placed within a room with one red and one blue door
			
 
				+    facing opposite directions. The agent has to open the red door and then open
			
 
				+    the blue door, in that order. Note that, surprisingly, this environment is
			
 
				+    solvable without memory.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "open the red door then the blue door"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action                    |
			
 
				+    |-----|--------------|---------------------------|
			
 
				+    | 0   | left         | Turn left                 |
			
 
				+    | 1   | right        | Turn right                |
			
 
				+    | 2   | forward      | Move forward              |
			
 
				+    | 3   | pickup       | Unused                    |
			
 
				+    | 4   | drop         | Unused                    |
			
 
				+    | 5   | toggle       | Toggle/activate an object |
			
 
				+    | 6   | done         | Unused                    |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent opens the blue door having already opened the red door.
			
 
				+    2. The agent opens the blue door without having opened the red door yet.
			
 
				+    3. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-RedBlueDoors-6x6-v0`
			
 
				+    - `MiniGrid-RedBlueDoors-8x8-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=8, **kwargs):
			
--- a/gym_minigrid/envs/unlock.py
+++ b/gym_minigrid/envs/unlock.py
@@ -3,8 +3,52 @@ from gym_minigrid.roomgrid import RoomGrid
 
				 
			
 
				 
			
 
				 class UnlockEnv(RoomGrid):
			
 
				+
			
 
				     """
			
 
				-    Unlock a door
			
 
				+    ### Description
			
 
				+
			
 
				+    The agent has to open a locked door. This environment can be solved without
			
 
				+    relying on language.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "open the door"
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action                    |
			
 
				+    |-----|--------------|---------------------------|
			
 
				+    | 0   | left         | Turn left                 |
			
 
				+    | 1   | right        | Turn right                |
			
 
				+    | 2   | forward      | Move forward              |
			
 
				+    | 3   | pickup       | Unused                    |
			
 
				+    | 4   | drop         | Unused                    |
			
 
				+    | 5   | toggle       | Toggle/activate an object |
			
 
				+    | 6   | done         | Unused                    |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent opens the door.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-Unlock-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, **kwargs):
			
--- a/gym_minigrid/envs/unlockpickup.py
+++ b/gym_minigrid/envs/unlockpickup.py
@@ -3,8 +3,55 @@ from gym_minigrid.roomgrid import RoomGrid
 
				 
			
 
				 
			
 
				 class UnlockPickupEnv(RoomGrid):
			
 
				+
			
 
				     """
			
 
				-    Unlock a door, then pick up a box in another room
			
 
				+    ### Description
			
 
				+
			
 
				+    The agent has to pick up a box which is placed in another room, behind a
			
 
				+    locked door. This environment can be solved without relying on language.
			
 
				+
			
 
				+    ### Mission Space
			
 
				+
			
 
				+    "pick up the {color} box"
			
 
				+
			
 
				+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				+    "yellow" or "grey".
			
 
				+
			
 
				+    ### Action Space
			
 
				+
			
 
				+    | Num | Name         | Action                    |
			
 
				+    |-----|--------------|---------------------------|
			
 
				+    | 0   | left         | Turn left                 |
			
 
				+    | 1   | right        | Turn right                |
			
 
				+    | 2   | forward      | Move forward              |
			
 
				+    | 3   | pickup       | Pick up an object         |
			
 
				+    | 4   | drop         | Unused                    |
			
 
				+    | 5   | toggle       | Toggle/activate an object |
			
 
				+    | 6   | done         | Unused                    |
			
 
				+
			
 
				+    ### Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ### Rewards
			
 
				+
			
 
				+    A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ### Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent picks up the correct box.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ### Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-Unlock-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, **kwargs):