Kaynağa Gözat

Merge pull request #222 from vairodp/add-documentation

Added docs
Rodrigo de Lazcano 2 yıl önce
ebeveyn
işleme
06938c5dc9

+ 53 - 2
gym_minigrid/envs/blockedunlockpickup.py

@@ -3,9 +3,60 @@ from gym_minigrid.roomgrid import RoomGrid
 
 
 class BlockedUnlockPickupEnv(RoomGrid):
+
     """
-    Unlock a door blocked by a ball, then pick up a box
-    in another room
+    ### Description
+
+    The agent has to pick up a box which is placed in another room, behind a
+    locked door. The door is also blocked by a ball which the agent has to move
+    before it can unlock the door. Hence, the agent has to learn to move the
+    ball, pick up the key, open the door and pick up the object in the other
+    room. This environment can be solved without relying on language.
+
+    ### Mission Space
+
+    "pick up the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "box" or "key".
+
+    ### Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the correct box.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-BlockedUnlockPickup-v0`
+
     """
 
     def __init__(self, **kwargs):

+ 70 - 1
gym_minigrid/envs/crossing.py

@@ -6,8 +6,77 @@ from gym_minigrid.minigrid import Goal, Grid, Lava, MiniGridEnv, MissionSpace
 
 
 class CrossingEnv(MiniGridEnv):
+
     """
-    Environment with wall or lava obstacles, sparse reward.
+    ### Description
+
+    Depending on the `obstacle_type` parameter:
+    - `Lava` - The agent has to reach the green goal square on the other corner
+        of the room while avoiding rivers of deadly lava which terminate the
+        episode in failure. Each lava stream runs across the room either
+        horizontally or vertically, and has a single crossing point which can be
+        safely used; Luckily, a path to the goal is guaranteed to exist. This
+        environment is useful for studying safety and safe exploration.
+    - otherwise - Similar to the `LavaCrossing` environment, the agent has to
+        reach the green goal square on the other corner of the room, however
+        lava is replaced by walls. This MDP is therefore much easier and maybe
+        useful for quickly testing your algorithms.
+
+    ### Mission Space
+    Depending on the `obstacle_type` parameter:
+    - `Lava` - "avoid the lava and get to the green goal square"
+    - otherwise - "find the opening and get to the green goal square"
+
+    ### Action Space
+
+    | Num | Name         | Action       |
+    |-----|--------------|--------------|
+    | 0   | left         | Turn left    |
+    | 1   | right        | Turn right   |
+    | 2   | forward      | Move forward |
+    | 3   | pickup       | Unused       |
+    | 4   | drop         | Unused       |
+    | 5   | toggle       | Unused       |
+    | 6   | done         | Unused       |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. The agent falls into lava.
+    3. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    S: size of the map SxS.
+    N: number of valid crossings across lava or walls from the starting position
+    to the goal
+
+    - `Lava` :
+        - `MiniGrid-LavaCrossingS9N1-v0`
+        - `MiniGrid-LavaCrossingS9N2-v0`
+        - `MiniGrid-LavaCrossingS9N3-v0`
+        - `MiniGrid-LavaCrossingS11N5-v0`
+
+    - otherwise :
+        - `MiniGrid-SimpleCrossingS9N1-v0`
+        - `MiniGrid-SimpleCrossingS9N2-v0`
+        - `MiniGrid-SimpleCrossingS9N3-v0`
+        - `MiniGrid-SimpleCrossingS11N5-v0`
+
     """
 
     def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, **kwargs):

+ 52 - 1
gym_minigrid/envs/distshift.py

@@ -2,8 +2,59 @@ from gym_minigrid.minigrid import Goal, Grid, Lava, MiniGridEnv, MissionSpace
 
 
 class DistShiftEnv(MiniGridEnv):
+
     """
-    Distributional shift environment.
+    ### Description
+
+    This environment is based on one of the DeepMind [AI safety gridworlds]
+    (https://github.com/deepmind/ai-safety-gridworlds). The agent starts in the
+    top-left corner and must reach the goal which is in the top-right corner,
+    but has to avoid stepping into lava on its way. The aim of this environment
+    is to test an agent's ability to generalize. There are two slightly
+    different variants of the environment, so that the agent can be trained on
+    one variant and tested on the other.
+
+    ### Mission Space
+
+    "get to the green goal square"
+
+    ### Action Space
+
+    | Num | Name         | Action       |
+    |-----|--------------|--------------|
+    | 0   | left         | Turn left    |
+    | 1   | right        | Turn right   |
+    | 2   | forward      | Move forward |
+    | 3   | pickup       | Unused       |
+    | 4   | drop         | Unused       |
+    | 5   | toggle       | Unused       |
+    | 6   | done         | Unused       |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. The agent falls into lava.
+    3. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-DistShift1-v0`
+    - `MiniGrid-DistShift2-v0`
+
     """
 
     def __init__(

+ 50 - 1
gym_minigrid/envs/doorkey.py

@@ -2,8 +2,57 @@ from gym_minigrid.minigrid import Door, Goal, Grid, Key, MiniGridEnv, MissionSpa
 
 
 class DoorKeyEnv(MiniGridEnv):
+
     """
-    Environment with a door and key, sparse reward
+    ### Description
+
+    This environment has a key that the agent must pick up in order to unlock a
+    goal and then get to the green goal square. This environment is difficult,
+    because of the sparse reward, to solve using classical RL algorithms. It is
+    useful to experiment with curiosity or curriculum learning.
+
+    ### Mission Space
+
+    "use the key to open the door and then get to the goal"
+
+    ### Action Space
+
+    | Num | Name         | Action                    |
+    |-----|--------------|---------------------------|
+    | 0   | left         | Turn left                 |
+    | 1   | right        | Turn right                |
+    | 2   | forward      | Move forward              |
+    | 3   | pickup       | Pick up an object         |
+    | 4   | drop         | Unused                    |
+    | 5   | toggle       | Toggle/activate an object |
+    | 6   | done         | Unused                    |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-DoorKey-5x5-v0`
+    - `MiniGrid-DoorKey-6x6-v0`
+    - `MiniGrid-DoorKey-8x8-v0`
+    - `MiniGrid-DoorKey-16x16-v0`
+
     """
 
     def __init__(self, size=8, **kwargs):

+ 55 - 1
gym_minigrid/envs/dynamicobstacles.py

@@ -7,7 +7,61 @@ from gym_minigrid.minigrid import Ball, Goal, Grid, MiniGridEnv, MissionSpace
 
 class DynamicObstaclesEnv(MiniGridEnv):
     """
-    Single-room square grid environment with moving obstacles
+    ### Description
+
+    This environment is an empty room with moving obstacles.
+    The goal of the agent is to reach the green goal square without colliding
+    with any obstacle. A large penalty is subtracted if the agent collides with
+    an obstacle and the episode finishes. This environment is useful to test
+    Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in
+    Partial Observability.
+
+    ### Mission Space
+
+    "get to the green goal square"
+
+    ### Action Space
+
+    | Num | Name         | Action       |
+    |-----|--------------|--------------|
+    | 0   | left         | Turn left    |
+    | 1   | right        | Turn right   |
+    | 2   | forward      | Move forward |
+    | 3   | pickup       | Unused       |
+    | 4   | drop         | Unused       |
+    | 5   | toggle       | Unused       |
+    | 6   | done         | Unused       |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure. A '-1' penalty is
+    subtracted if the agent collides with an obstacle.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. The agent collides with an obstacle.
+    3. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-Dynamic-Obstacles-5x5-v0`
+    - `MiniGrid-Dynamic-Obstacles-Random-5x5-v0`
+    - `MiniGrid-Dynamic-Obstacles-6x6-v0`
+    - `MiniGrid-Dynamic-Obstacles-Random-6x6-v0`
+    - `MiniGrid-Dynamic-Obstacles-8x8-v0`
+    - `MiniGrid-Dynamic-Obstacles-16x16-v0`
+
     """
 
     def __init__(

+ 56 - 1
gym_minigrid/envs/empty.py

@@ -2,8 +2,63 @@ from gym_minigrid.minigrid import Goal, Grid, MiniGridEnv, MissionSpace
 
 
 class EmptyEnv(MiniGridEnv):
+
     """
-    Empty grid environment, no obstacles, sparse reward
+    ### Description
+
+    This environment is an empty room, and the goal of the agent is to reach the
+    green goal square, which provides a sparse reward. A small penalty is
+    subtracted for the number of steps to reach the goal. This environment is
+    useful, with small rooms, to validate that your RL algorithm works
+    correctly, and with large rooms to experiment with sparse rewards and
+    exploration. The random variants of the environment have the agent starting
+    at a random position for each episode, while the regular variants have the
+    agent always starting in the corner opposite to the goal.
+
+    ### Mission Space
+
+    "get to the green goal square"
+
+    ### Action Space
+
+    | Num | Name         | Action       |
+    |-----|--------------|--------------|
+    | 0   | left         | Turn left    |
+    | 1   | right        | Turn right   |
+    | 2   | forward      | Move forward |
+    | 3   | pickup       | Unused       |
+    | 4   | drop         | Unused       |
+    | 5   | toggle       | Unused       |
+    | 6   | done         | Unused       |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-Empty-5x5-v0`
+    - `MiniGrid-Empty-Random-5x5-v0`
+    - `MiniGrid-Empty-6x6-v0`
+    - `MiniGrid-Empty-Random-6x6-v0`
+    - `MiniGrid-Empty-8x8-v0`
+    - `MiniGrid-Empty-16x16-v0`
+
     """
 
     def __init__(self, size=8, agent_start_pos=(1, 1), agent_start_dir=0, **kwargs):

+ 60 - 2
gym_minigrid/envs/fetch.py

@@ -9,9 +9,67 @@ from gym_minigrid.minigrid import (
 
 
 class FetchEnv(MiniGridEnv):
+
     """
-    Environment in which the agent has to fetch a random object
-    named using English text strings
+    ### Description
+
+    This environment has multiple objects of assorted types and colors. The
+    agent receives a textual string as part of its observation telling it which
+    object to pick up. Picking up the wrong object terminates the episode with
+    zero reward.
+
+    ### Mission Space
+
+    "{syntax} {color} {type}"
+
+    {syntax} is one of the following: "get a", "go get a", "fetch a",
+    "go fetch a", "you must fetch a".
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "key" or "ball".
+
+    ### Action Space
+
+    | Num | Name         | Action               |
+    |-----|--------------|----------------------|
+    | 0   | left         | Turn left            |
+    | 1   | right        | Turn right           |
+    | 2   | forward      | Move forward         |
+    | 3   | pickup       | Pick up an object    |
+    | 4   | drop         | Unused               |
+    | 5   | toggle       | Unused               |
+    | 6   | done         | Unused               |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the correct object.
+    2. The agent picks up the wrong object.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    N: number of objects to be generated.
+
+    - `MiniGrid-Fetch-5x5-N2-v0`
+    - `MiniGrid-Fetch-6x6-N2-v0`
+    - `MiniGrid-Fetch-8x8-N3-v0`
+
     """
 
     def __init__(self, size=8, numObjs=3, **kwargs):

+ 47 - 2
gym_minigrid/envs/fourrooms.py

@@ -2,9 +2,54 @@ from gym_minigrid.minigrid import Goal, Grid, MiniGridEnv, MissionSpace
 
 
 class FourRoomsEnv(MiniGridEnv):
+
     """
-    Classic 4 rooms gridworld environment.
-    Can specify agent and goal position, if not it set at random.
+    ### Description
+
+    Classic four room reinforcement learning environment. The agent must
+    navigate in a maze composed of four rooms interconnected by 4 gaps in the
+    walls. To obtain a reward, the agent must reach the green goal square. Both
+    the agent and the goal square are randomly placed in any of the four rooms.
+
+    ### Mission Space
+
+    "reach the goal"
+
+    ### Action Space
+
+    | Num | Name         | Action       |
+    |-----|--------------|--------------|
+    | 0   | left         | Turn left    |
+    | 1   | right        | Turn right   |
+    | 2   | forward      | Move forward |
+    | 3   | pickup       | Unused       |
+    | 4   | drop         | Unused       |
+    | 5   | toggle       | Unused       |
+    | 6   | done         | Unused       |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-FourRooms-v0`
+
     """
 
     def __init__(self, agent_pos=None, goal_pos=None, **kwargs):

+ 53 - 2
gym_minigrid/envs/gotodoor.py

@@ -2,9 +2,60 @@ from gym_minigrid.minigrid import COLOR_NAMES, Door, Grid, MiniGridEnv, MissionS
 
 
 class GoToDoorEnv(MiniGridEnv):
+
     """
-    Environment in which the agent is instructed to go to a given object
-    named using an English text string
+    ### Description
+
+    This environment is a room with four doors, one on each wall. The agent
+    receives a textual (mission) string as input, telling it which door to go
+    to, (eg: "go to the red door"). It receives a positive reward for performing
+    the `done` action next to the correct door, as indicated in the mission
+    string.
+
+    ### Mission Space
+
+    "go to the {color} door"
+
+    {color} is the color of the door. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ### Action Space
+
+    | Num | Name         | Action               |
+    |-----|--------------|----------------------|
+    | 0   | left         | Turn left            |
+    | 1   | right        | Turn right           |
+    | 2   | forward      | Move forward         |
+    | 3   | pickup       | Unused               |
+    | 4   | drop         | Unused               |
+    | 5   | toggle       | Unused               |
+    | 6   | done         | Done completing task |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent stands next the correct door performing the `done` action.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-GoToDoor-5x5-v0`
+    - `MiniGrid-GoToDoor-6x6-v0`
+    - `MiniGrid-GoToDoor-8x8-v0`
+
     """
 
     def __init__(self, size=5, **kwargs):

+ 63 - 2
gym_minigrid/envs/keycorridor.py

@@ -3,9 +3,70 @@ from gym_minigrid.roomgrid import RoomGrid
 
 
 class KeyCorridorEnv(RoomGrid):
+
     """
-    A ball is behind a locked door, the key is placed in a
-    random room.
+    ### Description
+
+    This environment is similar to the locked room environment, but there are
+    multiple registered environment configurations of increasing size,
+    making it easier to use curriculum learning to train an agent to solve it.
+    The agent has to pick up an object which is behind a locked door. The key is
+    hidden in another room, and the agent has to explore the environment to find
+    it. The mission string does not give the agent any clues as to where the
+    key is placed. This environment can be solved without relying on language.
+
+    ### Mission Space
+
+    "pick up the {color} {obj_type}"
+
+    {color} is the color of the object. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball" or "key".
+
+    ### Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the correct object.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    S: room size.
+    R: Number of rows.
+
+    - `MiniGrid-KeyCorridorS3R1-v0`
+    - `MiniGrid-KeyCorridorS3R2-v0`
+    - `MiniGrid-KeyCorridorS3R3-v0`
+    - `MiniGrid-KeyCorridorS4R3-v0`
+    - `MiniGrid-KeyCorridorS5R3-v0`
+    - `MiniGrid-KeyCorridorS6R3-v0`
+
     """
 
     def __init__(self, num_rows=3, obj_type="ball", room_size=6, **kwargs):

+ 54 - 2
gym_minigrid/envs/lavagap.py

@@ -4,9 +4,61 @@ from gym_minigrid.minigrid import Goal, Grid, Lava, MiniGridEnv, MissionSpace
 
 
 class LavaGapEnv(MiniGridEnv):
+
     """
-    Environment with one wall of lava with a small gap to cross through
-    This environment is similar to LavaCrossing but simpler in structure.
+    ### Description
+
+    The agent has to reach the green goal square at the opposite corner of the
+    room, and must pass through a narrow gap in a vertical strip of deadly lava.
+    Touching the lava terminate the episode with a zero reward. This environment
+    is useful for studying safety and safe exploration.
+
+    ### Mission Space
+
+    Depending on the `obstacle_type` parameter:
+    - `Lava`: "avoid the lava and get to the green goal square"
+    - otherwise: "find the opening and get to the green goal square"
+
+    ### Action Space
+
+    | Num | Name         | Action       |
+    |-----|--------------|--------------|
+    | 0   | left         | Turn left    |
+    | 1   | right        | Turn right   |
+    | 2   | forward      | Move forward |
+    | 3   | pickup       | Unused       |
+    | 4   | drop         | Unused       |
+    | 5   | toggle       | Unused       |
+    | 6   | done         | Unused       |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. The agent falls into lava.
+    3. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    S: size of map SxS.
+
+    - `MiniGrid-LavaGapS5-v0`
+    - `MiniGrid-LavaGapS6-v0`
+    - `MiniGrid-LavaGapS7-v0`
+
     """
 
     def __init__(self, size, obstacle_type=Lava, **kwargs):

+ 51 - 2
gym_minigrid/envs/lockedroom.py

@@ -25,9 +25,58 @@ class LockedRoom:
 
 
 class LockedRoomEnv(MiniGridEnv):
+
     """
-    Environment in which the agent is instructed to go to a given object
-    named using an English text string
+    ### Description
+
+    The environment has six rooms, one of which is locked. The agent receives
+    a textual mission string as input, telling it which room to go to in order
+    to get the key that opens the locked room. It then has to go into the locked
+    room in order to reach the final goal. This environment is extremely
+    difficult to solve with vanilla reinforcement learning alone.
+
+    ### Mission Space
+
+    "get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
+
+    {lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
+    "blue", "purple", "yellow" or "grey".
+
+    ### Action Space
+
+    | Num | Name         | Action                    |
+    |-----|--------------|---------------------------|
+    | 0   | left         | Turn left                 |
+    | 1   | right        | Turn right                |
+    | 2   | forward      | Move forward              |
+    | 3   | pickup       | Pick up an object         |
+    | 4   | drop         | Unused                    |
+    | 5   | toggle       | Toggle/activate an object |
+    | 6   | done         | Unused                    |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-LockedRoom-v0`
+
     """
 
     def __init__(self, size=19, **kwargs):

+ 54 - 6
gym_minigrid/envs/memory.py

@@ -4,13 +4,61 @@ from gym_minigrid.minigrid import Ball, Grid, Key, MiniGridEnv, MissionSpace, Wa
 
 
 class MemoryEnv(MiniGridEnv):
+
     """
-    This environment is a memory test. The agent starts in a small room
-    where it sees an object. It then has to go through a narrow hallway
-    which ends in a split. At each end of the split there is an object,
-    one of which is the same as the object in the starting room. The
-    agent has to remember the initial object, and go to the matching
-    object at split.
+    ### Description
+
+    This environment is a memory test. The agent starts in a small room where it
+    sees an object. It then has to go through a narrow hallway which ends in a
+    split. At each end of the split there is an object, one of which is the same
+    as the object in the starting room. The agent has to remember the initial
+    object, and go to the matching object at split.
+
+    ### Mission Space
+
+    "go to the matching object at the end of the hallway"
+
+    ### Action Space
+
+    | Num | Name         | Action                    |
+    |-----|--------------|---------------------------|
+    | 0   | left         | Turn left                 |
+    | 1   | right        | Turn right                |
+    | 2   | forward      | Move forward              |
+    | 3   | pickup       | Pick up an object         |
+    | 4   | drop         | Unused                    |
+    | 5   | toggle       | Toggle/activate an object |
+    | 6   | done         | Unused                    |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the correct matching object.
+    2. The agent reaches the wrong matching object.
+    3. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    S: size of map SxS.
+
+    - `MiniGrid-MemoryS17Random-v0`
+    - `MiniGrid-MemoryS13Random-v0`
+    - `MiniGrid-MemoryS13-v0`
+    - `MiniGrid-MemoryS11-v0`
+
     """
 
     def __init__(self, size=8, random_length=False, **kwargs):

+ 53 - 1
gym_minigrid/envs/multiroom.py

@@ -18,8 +18,60 @@ class MultiRoom:
 
 
 class MultiRoomEnv(MiniGridEnv):
+
     """
-    Environment with multiple rooms (subgoals)
+    ### Description
+
+    This environment has a series of connected rooms with doors that must be
+    opened in order to get to the next room. The final room has the green goal
+    square the agent must get to. This environment is extremely difficult to
+    solve using RL alone. However, by gradually increasing the number of rooms
+    and building a curriculum, the environment can be solved.
+
+    ### Mission Space
+
+    "traverse the rooms to get to the goal"
+
+    ### Action Space
+
+    | Num | Name         | Action                    |
+    |-----|--------------|---------------------------|
+    | 0   | left         | Turn left                 |
+    | 1   | right        | Turn right                |
+    | 2   | forward      | Move forward              |
+    | 3   | pickup       | Unused                    |
+    | 4   | drop         | Unused                    |
+    | 5   | toggle       | Toggle/activate an object |
+    | 6   | done         | Unused                    |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent reaches the goal.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    S: size of map SxS.
+    N: number of rooms.
+
+    - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
+    - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
+    - `MiniGrid-MultiRoom-N6-v0` (six rooms)
+
     """
 
     def __init__(self, minNumRooms, maxNumRooms, maxRoomSize=10, **kwargs):

+ 61 - 2
gym_minigrid/envs/obstructedmaze.py

@@ -3,9 +3,68 @@ from gym_minigrid.roomgrid import RoomGrid
 
 
 class ObstructedMazeEnv(RoomGrid):
+
     """
-    A blue ball is hidden in the maze. Doors may be locked,
-    doors may be obstructed by a ball and keys may be hidden in boxes.
+    ### Description
+
+    The agent has to pick up a box which is placed in a corner of a 3x3 maze.
+    The doors are locked, the keys are hidden in boxes and doors are obstructed
+    by balls. This environment can be solved without relying on language.
+
+    ### Mission Space
+
+    "pick up the {COLOR_NAMES[0]} ball"
+
+    ### Action Space
+
+    | Num | Name         | Action                    |
+    |-----|--------------|---------------------------|
+    | 0   | left         | Turn left                 |
+    | 1   | right        | Turn right                |
+    | 2   | forward      | Move forward              |
+    | 3   | pickup       | Pick up an object         |
+    | 4   | drop         | Unused                    |
+    | 5   | toggle       | Toggle/activate an object |
+    | 6   | done         | Unused                    |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the blue ball.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    "NDl" are the number of doors locked.
+    "h" if the key is hidden in a box.
+    "b" if the door is obstructed by a ball.
+    "Q" number of quarters that will have doors and keys out of the 9 that the
+    map already has.
+    "Full" 3x3 maze with "h" and "b" options.
+
+    - `MiniGrid-ObstructedMaze-1Dl-v0`
+    - `MiniGrid-ObstructedMaze-1Dlh-v0`
+    - `MiniGrid-ObstructedMaze-1Dlhb-v0`
+    - `MiniGrid-ObstructedMaze-2Dl-v0`
+    - `MiniGrid-ObstructedMaze-2Dlh-v0`
+    - `MiniGrid-ObstructedMaze-2Dlhb-v0`
+    - `MiniGrid-ObstructedMaze-1Q-v0`
+    - `MiniGrid-ObstructedMaze-2Q-v0`
+    - `MiniGrid-ObstructedMaze-Full-v0`
+
     """
 
     def __init__(self, num_rows, num_cols, num_rooms_visited, **kwargs):

+ 56 - 2
gym_minigrid/envs/putnear.py

@@ -10,9 +10,63 @@ from gym_minigrid.minigrid import (
 
 
 class PutNearEnv(MiniGridEnv):
+
     """
-    Environment in which the agent is instructed to place an object near
-    another object through a natural language string.
+    ### Description
+
+    The agent is instructed through a textual string to pick up an object and
+    place it next to another object. This environment is easy to solve with two
+    objects, but difficult to solve with more, as it involves both textual
+    understanding and spatial reasoning involving multiple objects.
+
+    ### Mission Space
+
+    "put the {move_color} {move_type} near the {target_color} {target_type}"
+
+    {move_color} and {target_color} can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {move_type} and {target_type} Can be "box", "ball" or "key".
+
+    ### Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Drop an object    |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the wrong object.
+    2. The agent drop the correct object near the target.
+    3. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    N: number of objects.
+
+    - `MiniGrid-PutNear-6x6-N2-v0`
+    - `MiniGrid-PutNear-8x8-N3-v0`
+
     """
 
     def __init__(self, size=6, numObjs=2, **kwargs):

+ 49 - 3
gym_minigrid/envs/redbluedoors.py

@@ -2,10 +2,56 @@ from gym_minigrid.minigrid import Door, Grid, MiniGridEnv, MissionSpace
 
 
 class RedBlueDoorEnv(MiniGridEnv):
+
     """
-    Single room with red and blue doors on opposite sides.
-    The red door must be opened before the blue door to
-    obtain a reward.
+    ### Description
+
+    The agent is randomly placed within a room with one red and one blue door
+    facing opposite directions. The agent has to open the red door and then open
+    the blue door, in that order. Note that, surprisingly, this environment is
+    solvable without memory.
+
+    ### Mission Space
+
+    "open the red door then the blue door"
+
+    ### Action Space
+
+    | Num | Name         | Action                    |
+    |-----|--------------|---------------------------|
+    | 0   | left         | Turn left                 |
+    | 1   | right        | Turn right                |
+    | 2   | forward      | Move forward              |
+    | 3   | pickup       | Unused                    |
+    | 4   | drop         | Unused                    |
+    | 5   | toggle       | Toggle/activate an object |
+    | 6   | done         | Unused                    |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the blue door having already opened the red door.
+    2. The agent opens the blue door without having opened the red door yet.
+    3. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-RedBlueDoors-6x6-v0`
+    - `MiniGrid-RedBlueDoors-8x8-v0`
+
     """
 
     def __init__(self, size=8, **kwargs):

+ 45 - 1
gym_minigrid/envs/unlock.py

@@ -3,8 +3,52 @@ from gym_minigrid.roomgrid import RoomGrid
 
 
 class UnlockEnv(RoomGrid):
+
     """
-    Unlock a door
+    ### Description
+
+    The agent has to open a locked door. This environment can be solved without
+    relying on language.
+
+    ### Mission Space
+
+    "open the door"
+
+    ### Action Space
+
+    | Num | Name         | Action                    |
+    |-----|--------------|---------------------------|
+    | 0   | left         | Turn left                 |
+    | 1   | right        | Turn right                |
+    | 2   | forward      | Move forward              |
+    | 3   | pickup       | Unused                    |
+    | 4   | drop         | Unused                    |
+    | 5   | toggle       | Toggle/activate an object |
+    | 6   | done         | Unused                    |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-Unlock-v0`
+
     """
 
     def __init__(self, **kwargs):

+ 48 - 1
gym_minigrid/envs/unlockpickup.py

@@ -3,8 +3,55 @@ from gym_minigrid.roomgrid import RoomGrid
 
 
 class UnlockPickupEnv(RoomGrid):
+
     """
-    Unlock a door, then pick up a box in another room
+    ### Description
+
+    The agent has to pick up a box which is placed in another room, behind a
+    locked door. This environment can be solved without relying on language.
+
+    ### Mission Space
+
+    "pick up the {color} box"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ### Action Space
+
+    | Num | Name         | Action                    |
+    |-----|--------------|---------------------------|
+    | 0   | left         | Turn left                 |
+    | 1   | right        | Turn right                |
+    | 2   | forward      | Move forward              |
+    | 3   | pickup       | Pick up an object         |
+    | 4   | drop         | Unused                    |
+    | 5   | toggle       | Toggle/activate an object |
+    | 6   | done         | Unused                    |
+
+    ### Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ### Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ### Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the correct box.
+    2. Timeout (see `max_steps`).
+
+    ### Registered Configurations
+
+    - `MiniGrid-Unlock-v0`
+
     """
 
     def __init__(self, **kwargs):