Browse Source

Added Docstrings to the BabyAI envs (#302)

Bolun 2 years ago
parent
commit
88ebc71e9c

+ 1 - 1
.pre-commit-config.yaml

@@ -22,7 +22,7 @@ repos:
           - --show-source
           - --statistics
   - repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
+    rev: 5.12.0
     hooks:
       - id: isort
         args: ["--profile", "black"]

+ 14 - 11
docs/scripts/gen_envs_display.py

@@ -56,19 +56,22 @@ if __name__ == "__main__":
     type_dict = {}
 
     for env_spec in gymnasium.envs.registry.values():
-        # minigrid.envs:Env or minigrid.envs.babyai:Env
-        split = env_spec.entry_point.split(".")
-        # ignore minigrid.envs.env_type:Env
-        env_module = split[0]
-        env_name = split[-1].split(":")[-1]
-        env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
+        if isinstance(env_spec.entry_point, str):
+            # minigrid.envs:Env or minigrid.envs.babyai:Env
+            split = env_spec.entry_point.split(".")
+            # ignore minigrid.envs.env_type:Env
+            env_module = split[0]
+            env_name = split[-1].split(":")[-1]
+            env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
 
-        if env_module == "minigrid":
-            if env_type not in type_dict.keys():
-                type_dict[env_type] = []
+            if env_module == "minigrid":
+                if env_type not in type_dict.keys():
+                    type_dict[env_type] = []
 
-            if env_name not in type_dict[env_type]:
-                type_dict[env_type].append(env_name)
+                if env_name not in type_dict[env_type]:
+                    type_dict[env_type].append(env_name)
+        else:
+            continue
 
     for key, value in type_dict.items():
         env_type = key

+ 14 - 13
docs/scripts/gen_mds.py

@@ -32,19 +32,20 @@ babyai_envs = {}
 
 # Obtain filtered list
 for env_spec in tqdm(all_envs):
-    # minigrid.envs:Env
-    split = env_spec.entry_point.split(".")
-    # ignore gymnasium.envs.env_type:Env
-    env_module = split[0]
-
-    if len(split) > 2 and "babyai" in split[2]:
-        curr_babyai_env = split[2]
-        babyai_env_name = curr_babyai_env.split(":")[1]
-        babyai_envs[babyai_env_name] = env_spec
-    elif env_module == "minigrid":
-        env_name = split[1]
-        filtered_envs_by_type[env_name] = env_spec
-    # if env_module != "minigrid":
+    if isinstance(env_spec.entry_point, str):
+        # minigrid.envs:Env
+        split = env_spec.entry_point.split(".")
+        # ignore gymnasium.envs.env_type:Env
+        env_module = split[0]
+
+        if len(split) > 2 and "babyai" in split[2]:
+            curr_babyai_env = split[2]
+            babyai_env_name = curr_babyai_env.split(":")[1]
+            babyai_envs[babyai_env_name] = env_spec
+        elif env_module == "minigrid":
+            env_name = split[1]
+            filtered_envs_by_type[env_name] = env_spec
+        # if env_module != "minigrid":
     else:
         continue
 

BIN
figures/lavacrossing_NoWrapper.png


BIN
figures/lavacrossing_RGBImgObsWrapper.png


BIN
figures/lavacrossing_RGBImgPartialObsWrapper.png


+ 535 - 2
minigrid/envs/babyai/goto.py

@@ -11,9 +11,52 @@ from minigrid.envs.babyai.core.verifier import GoToInstr, ObjDesc
 
 class GoToRedBallGrey(RoomGridLevel):
     """
+
+    ## Description
+
     Go to the red ball, single room, with distractors.
     The distractors are all grey to reduce perceptual complexity.
     This level has distractors but doesn't make use of language.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
+
     """
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -36,8 +79,50 @@ class GoToRedBallGrey(RoomGridLevel):
 
 class GoToRedBall(RoomGridLevel):
     """
+    ## Description
+
     Go to the red ball, single room, with distractors.
     This level has distractors but doesn't make use of language.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBall-v0`
+
     """
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -57,7 +142,50 @@ class GoToRedBall(RoomGridLevel):
 
 class GoToRedBallNoDists(GoToRedBall):
     """
+
+    ## Description
+
     Go to the red ball. No distractors present.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallNoDists-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -66,7 +194,57 @@ class GoToRedBallNoDists(GoToRedBall):
 
 class GoToObj(RoomGridLevel):
     """
-    Go to an object, inside a single room with no doors, no distractors
+    ## Description
+
+    Go to an object, inside a single room with no doors, no distractors. The
+    naming convention `GoToObjS{X}` represents a room of size `X`.
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToObj-v0`
+    - `BabyAI-GoToObjS4-v0`
+    - `BabyAI-GoToObjS6-v0`
+
     """
 
     def __init__(self, room_size=8, **kwargs):
@@ -81,7 +259,68 @@ class GoToObj(RoomGridLevel):
 
 class GoToLocal(RoomGridLevel):
     """
-    Go to an object, inside a single room with no doors, no distractors
+
+    ## Description
+
+    Go to an object, inside a single room with no doors, no distractors. The
+    naming convention `GoToLocalS{X}N{Y}` represents a room of size `X` with
+    distractor number `Y`.
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToLocal-v0`
+    - `BabyAI-GoToLocalS5N2-v0`
+    - `BabyAI-GoToLocalS6N2-v0`
+    - `BabyAI-GoToLocalS6N3-v0`
+    - `BabyAI-GoToLocalS6N4-v0`
+    - `BabyAI-GoToLocalS7N4-v0`
+    - `BabyAI-GoToLocalS7N5-v0`
+    - `BabyAI-GoToLocalS8N2-v0`
+    - `BabyAI-GoToLocalS8N3-v0`
+    - `BabyAI-GoToLocalS8N4-v0`
+    - `BabyAI-GoToLocalS8N5-v0`
+    - `BabyAI-GoToLocalS8N6-v0`
+    - `BabyAI-GoToLocalS8N7-v0`
     """
 
     def __init__(self, room_size=8, num_dists=8, **kwargs):
@@ -98,7 +337,62 @@ class GoToLocal(RoomGridLevel):
 
 class GoTo(RoomGridLevel):
     """
+
+    ## Description
+
     Go to an object, the object may be in another room. Many distractors.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoTo-v0`
+    - `BabyAI-GoToOpen-v0`
+    - `BabyAI-GoToObjMaze-v0`
+    - `BabyAI-GoToObjMazeOpen-v0`
+    - `BabyAI-GoToObjMazeS4R2-v0`
+    - `BabyAI-GoToObjMazeS4-v0`
+    - `BabyAI-GoToObjMazeS5-v0`
+    - `BabyAI-GoToObjMazeS6-v0`
+    - `BabyAI-GoToObjMazeS7-v0`
     """
 
     def __init__(
@@ -131,9 +425,57 @@ class GoTo(RoomGridLevel):
 
 class GoToImpUnlock(RoomGridLevel):
     """
+
+    ## Description
+
     Go to an object, which may be in a locked room.
     Competencies: Maze, GoTo, ImpUnlock
     No unblocking.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToImpUnlock-v0`
+
     """
 
     def gen_mission(self):
@@ -182,12 +524,64 @@ class GoToImpUnlock(RoomGridLevel):
 
 class GoToSeq(LevelGen):
     """
+
+    ## Description
+
     Sequencing of go-to-object commands.
 
     Competencies: Maze, GoTo, Seq
     No locked room.
     No locations.
     No unblocking.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}" +
+    "and go to a/the {color} {type}" +
+    ", then go to a/the {color} {type}" +
+    "and go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToSeq-v0`
+    - `BabyAI-GoToSeqS5R2-v0`
+
     """
 
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
@@ -206,10 +600,55 @@ class GoToSeq(LevelGen):
 
 class GoToRedBlueBall(RoomGridLevel):
     """
+
+    ## Description
+
     Go to the red ball or to the blue ball.
     There is exactly one red or blue ball, and some distractors.
     The distractors are guaranteed not to be red or blue balls.
     Language is not required to solve this level.
+
+    ## Mission Space
+
+    "go to the {color} ball"
+
+    {color} is the color of the box. Can be "red" or "blue".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBlueBall-v0`
+
     """
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -237,9 +676,55 @@ class GoToRedBlueBall(RoomGridLevel):
 
 class GoToDoor(RoomGridLevel):
     """
+
+    ## Description
+
     Go to a door
     (of a given color, in the current room)
     No distractors, no language variation
+
+    ## Mission Space
+
+    "go to the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToDoor-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -258,8 +743,56 @@ class GoToDoor(RoomGridLevel):
 
 class GoToObjDoor(RoomGridLevel):
     """
+
+    ## Description
+
     Go to an object or door
     (of a given type and color, in the current room)
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box", "key" or "door".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object or door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToObjDoor-v0`
+
     """
 
     def __init__(self, **kwargs):

+ 242 - 0
minigrid/envs/babyai/open.py

@@ -17,7 +17,53 @@ from minigrid.envs.babyai.core.verifier import (
 
 class Open(RoomGridLevel):
     """
+
+    ## Description
+
     Open a door, which may be in another room
+
+    ## Mission Space
+
+    "open a {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-Open-v0`
+
     """
 
     def gen_mission(self):
@@ -41,10 +87,53 @@ class Open(RoomGridLevel):
 
 class OpenRedDoor(RoomGridLevel):
     """
+
+    ## Description
+
     Go to the red door
     (always unlocked, in the current room)
     Note: this level is intentionally meant for debugging and is
     intentionally kept very simple.
+
+    ## Mission Space
+
+    "open the red door"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OpenRedDoor-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -58,9 +147,58 @@ class OpenRedDoor(RoomGridLevel):
 
 class OpenDoor(RoomGridLevel):
     """
+
+    ## Description
+
     Go to the door
     The door to open is given by its color or by its location.
     (always unlocked, in the current room)
+
+    ## Mission Space
+
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OpenDoor-v0`
+    - `BabyAI-OpenDoorDebug-v0`
+    - `BabyAI-OpenDoorColor-v0`
+    - `BabyAI-OpenDoorLoc-v0`
+
     """
 
     def __init__(self, debug=False, select_by=None, **kwargs):
@@ -92,10 +230,58 @@ class OpenDoor(RoomGridLevel):
 
 class OpenTwoDoors(RoomGridLevel):
     """
+
+    ## Description
+
     Open door X, then open door Y
     The two doors are facing opposite directions, so that the agent
     Can't see whether the door behind him is open.
     This task requires memory (recurrent policy) to be solved effectively.
+
+    ## Mission Space
+
+    "open the {color} door, the open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OpenTwoDoors-v0`
+    - `BabyAI-OpenRedBlueDoors-v0`
+    - `BabyAI-OpenRedBlueDoorsDebug-v0`
+
     """
 
     def __init__(
@@ -139,7 +325,63 @@ class OpenTwoDoors(RoomGridLevel):
 
 class OpenDoorsOrder(RoomGridLevel):
     """
+
+    ## Description
+
     Open one or two doors in the order specified.
+
+    ## Mission Space
+
+    "open the {color} door, the open the {color} door"
+
+    or
+
+    "open the {color} door after you open the {color} door"
+
+    or
+
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OpenDoorsOrderN2-v0`
+    - `BabyAI-OpenDoorsOrderN4-v0`
+    - `BabyAI-OpenDoorsOrderN2Debug-v0`
+    - `BabyAI-OpenDoorsOrderN4Debug-v0`
     """
 
     def __init__(self, num_doors, debug=False, max_steps: int | None = None, **kwargs):

+ 248 - 2
minigrid/envs/babyai/other.py

@@ -17,10 +17,66 @@ from minigrid.envs.babyai.core.verifier import (
 
 class ActionObjDoor(RoomGridLevel):
     """
+
+    ## Description
+
     [pick up an object] or
     [go to an object or door] or
     [open a door]
     (in the current room)
+
+    ## Mission Space
+
+    "pick up the {color} {type}"
+
+    or
+
+    "go to the {color} {type}"
+
+    or
+
+    "open a {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box", "door" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent finishes the instruction.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-ActionObjDoor-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -51,9 +107,56 @@ class ActionObjDoor(RoomGridLevel):
 
 class FindObjS5(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object (in a random room)
     Rooms have a size of 5
     This level requires potentially exhaustive exploration
+
+    ## Mission Space
+
+    "pick up the {type}"
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-FindObjS5-v0`
+    - `BabyAI-FindObjS6-v0`
+    - `BabyAI-FindObjS7-v0`
+
     """
 
     def __init__(self, room_size=5, max_steps: int | None = None, **kwargs):
@@ -76,8 +179,57 @@ class FindObjS5(RoomGridLevel):
 
 class KeyCorridor(RoomGridLevel):
     """
+
+    ## Description
+
     A ball is behind a locked door, the key is placed in a
     random room.
+
+    ## Mission Space
+
+    "pick up the ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-KeyCorridor-v0`
+    - `BabyAI-KeyCorridorS3R1-v0`
+    - `BabyAI-KeyCorridorS3R2-v0`
+    - `BabyAI-KeyCorridorS3R3-v0`
+    - `BabyAI-KeyCorridorS4R3-v0`
+    - `BabyAI-KeyCorridorS5R3-v0`
+    - `BabyAI-KeyCorridorS6R3-v0`
+
     """
 
     def __init__(
@@ -122,8 +274,53 @@ class KeyCorridor(RoomGridLevel):
 
 class OneRoomS8(RoomGridLevel):
     """
-    Pick up the ball
-    Rooms have a size of 8
+
+    ## Description
+
+    Pick up the ball. Rooms have a size of 8.
+
+    ## Mission Space
+
+    "pick up the ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OneRoomS8-v0`
+    - `BabyAI-OneRoomS12-v0`
+    - `BabyAI-OneRoomS16-v0`
+    - `BabyAI-OneRoomS20-v0`
+
     """
 
     def __init__(self, room_size=8, **kwargs):
@@ -137,9 +334,58 @@ class OneRoomS8(RoomGridLevel):
 
 class MoveTwoAcross(RoomGridLevel):
     """
+
+    ## Description
+
     Task of the form: move the A next to the B and the C next to the D.
     This task is structured to have a very large number of possible
     instructions.
+
+    ## Mission Space
+
+    "put the {color} {type} next to the {color} {type}, then put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent finishes the instruction.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-MoveTwoAcrossS5N2-v0`
+    - `BabyAI-MoveTwoAcrossS8N9-v0`
+
     """
 
     def __init__(

+ 241 - 0
minigrid/envs/babyai/pickup.py

@@ -11,7 +11,55 @@ from minigrid.envs.babyai.core.verifier import ObjDesc, PickupInstr
 
 class Pickup(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object, the object may be in another room.
+
+    ## Mission Space
+
+    "pick up a {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-Pickup-v0`
+
     """
 
     def gen_mission(self):
@@ -25,8 +73,56 @@ class Pickup(RoomGridLevel):
 
 class UnblockPickup(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object, the object may be in another room. The path may
     be blocked by one or more obstructors.
+
+    ## Mission Space
+
+    "pick up a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-UnblockPickup-v0`
+
     """
 
     def gen_mission(self):
@@ -45,10 +141,58 @@ class UnblockPickup(RoomGridLevel):
 
 class PickupLoc(LevelGen):
     """
+
+    ## Description
+
     Pick up an object which may be described using its location. This is a
     single room environment.
 
     Competencies: PickUp, Loc. No unblocking.
+
+    ## Mission Space
+
+    "pick up the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PickupLoc-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -69,10 +213,59 @@ class PickupLoc(LevelGen):
 
 class PickupDist(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object
     The object to pick up is given by its type only, or
     by its color, or by its type and color.
     (in the current room, with distractors)
+
+    ## Mission Space
+
+    "pick up a/the {color}/{type}/{color}{type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PickupDist-v0`
+    - `BabyAI-PickupDistDebug-v0`
+
     """
 
     def __init__(self, debug=False, **kwargs):
@@ -98,8 +291,56 @@ class PickupDist(RoomGridLevel):
 
 class PickupAbove(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object (in the room above)
     This task requires to use the compass to be solved effectively.
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PickupAbove-v0`
+
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):

+ 105 - 0
minigrid/envs/babyai/putnext.py

@@ -10,8 +10,58 @@ from minigrid.envs.babyai.core.verifier import ObjDesc, PutNextInstr
 
 class PutNextLocal(RoomGridLevel):
     """
+
+    ## Description
+
     Put an object next to another object, inside a single room
     with no doors, no distractors
+
+    ## Mission Space
+
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent finishes the instructed task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PutNextLocal-v0`
+    - `BabyAI-PutNextLocalS5N3-v0`
+    - `BabyAI-PutNextLocalS6N4-v0``
+
     """
 
     def __init__(self, room_size=8, num_objs=8, **kwargs):
@@ -31,9 +81,64 @@ class PutNextLocal(RoomGridLevel):
 
 class PutNext(RoomGridLevel):
     """
+
+    ## Description
+
     Task of the form: move the A next to the B and the C next to the D.
     This task is structured to have a very large number of possible
     instructions.
+
+    ## Mission Space
+
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent finishes the instructed task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PutNextS4N1-v0`
+    - `BabyAI-PutNextS5N2-v0`
+    - `BabyAI-PutNextS5N1-v0`
+    - `BabyAI-PutNextS6N3-v0`
+    - `BabyAI-PutNextS7N4-v0`
+    - `BabyAI-PutNextS5N2Carrying-v0`
+    - `BabyAI-PutNextS6N3Carrying-v0`
+    - `BabyAI-PutNextS7N4Carrying-v0`
+
     """
 
     def __init__(

+ 487 - 8
minigrid/envs/babyai/synth.py

@@ -11,11 +11,73 @@ from minigrid.envs.babyai.core.levelgen import LevelGen
 
 class Synth(LevelGen):
     """
-    Union of all instructions from PutNext, Open, Goto and PickUp. The agent
-    may need to move objects around. The agent may have to unlock the door,
-    but only if it is explicitly referred by the instruction.
+
+    ## Description
+
+    Union of all instructions from PutNext, Open, Goto and PickUp.
+    The agent may need to move objects around. The agent may have
+    to unlock the door, but only if it is explicitly referred by
+    the instruction.
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    or
+
+    "pick up a/the {color} {type}"
+
+    or
+
+    "open the {color} door"
+
+    or
+
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-Synth-v0`
+    - `BabyAI-SynthS5R2-v0`
+
     """
 
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
@@ -34,17 +96,74 @@ class Synth(LevelGen):
         )
 
 
-class SynthS5R2(Synth):
-    def __init__(self, **kwargs):
-        super().__init__(room_size=5, num_rows=2, num_cols=2, num_dists=7, **kwargs)
-
-
 class SynthLoc(LevelGen):
     """
+
+    ## Description
+
     Like Synth, but a significant share of object descriptions involves
     location language like in PickUpLoc. No implicit unlocking.
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc
+
+    ## Mission Space
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-SynthLoc-v0`
     """
 
     def __init__(self, **kwargs):
@@ -61,10 +180,96 @@ class SynthLoc(LevelGen):
 
 class SynthSeq(LevelGen):
     """
+
+    ## Description
+
     Like SynthLoc, but now with multiple commands, combined just like in GoToSeq.
     No implicit unlocking.
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc, Seq
+
+    ## Mission Space
+
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-SynthSeq-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -76,6 +281,99 @@ class SynthSeq(LevelGen):
 
 
 class MiniBossLevel(LevelGen):
+    """
+
+    ## Description
+
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
+    Compared to BossLevel this has a smaller room and a lower probability of
+    locked rooms.
+
+    ## Mission Space
+
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-MiniBossLevel-v0`
+
+    """
+
     def __init__(self, **kwargs):
         super().__init__(
             num_cols=2,
@@ -88,10 +386,191 @@ class MiniBossLevel(LevelGen):
 
 
 class BossLevel(LevelGen):
+    """
+
+    ## Description
+
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
+
+    ## Mission Space
+
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-BossLevel-v0`
+    """
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
 
 class BossLevelNoUnlock(LevelGen):
+    """
+
+    ## Description
+
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
+    No implicit unlocking.
+
+    ## Mission Space
+
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-BossLevelNoUnlock-v0`
+    """
+
     def __init__(self, **kwargs):
         super().__init__(locked_room_prob=0, implicit_unlock=False, **kwargs)

+ 266 - 0
minigrid/envs/babyai/unlock.py

@@ -12,9 +12,55 @@ from minigrid.envs.babyai.core.verifier import ObjDesc, OpenInstr, PickupInstr
 
 class Unlock(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door.
 
     Competencies: Maze, Open, Unlock. No unblocking.
+
+    ## Mission Space
+
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the correct door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-Unlock-v0`
+
     """
 
     def gen_mission(self):
@@ -66,8 +112,52 @@ class Unlock(RoomGridLevel):
 
 class UnlockLocal(RoomGridLevel):
     """
+
+    ## Description
+
     Fetch a key and unlock a door
     (in the current room)
+
+    ## Mission Space
+
+    "open the door"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-UnlockLocal-v0`
+    - `BabyAI-UnlockLocalDist-v0`
+
     """
 
     def __init__(self, distractors=False, **kwargs):
@@ -86,7 +176,50 @@ class UnlockLocal(RoomGridLevel):
 
 class KeyInBox(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door. Key is in a box (in the current room).
+
+    ## Mission Space
+
+    "open the door"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-KeyInBox-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -107,7 +240,54 @@ class KeyInBox(RoomGridLevel):
 
 class UnlockPickup(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door, then pick up a box in another room
+
+    ## Mission Space
+
+    "pick up the {color} box"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the correct box.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-UnlockPickup-v0`
+    - `BabyAI-UnlockPickupDist-v0`
+
     """
 
     def __init__(self, distractors=False, max_steps: int | None = None, **kwargs):
@@ -137,8 +317,51 @@ class UnlockPickup(RoomGridLevel):
 
 class BlockedUnlockPickup(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door blocked by a ball, then pick up a box
     in another room
+
+    ## Mission Space
+
+    "pick up the box"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the box.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-BlockedUnlockPickup-v0`
+
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):
@@ -168,7 +391,50 @@ class BlockedUnlockPickup(RoomGridLevel):
 
 class UnlockToUnlock(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door A that requires to unlock a door B before
+
+    ## Mission Space
+
+    "pick up the ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-UnlockToUnlock-v0`
+
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):

+ 222 - 1
minigrid/wrappers.py

@@ -18,19 +18,49 @@ class ReseedWrapper(Wrapper):
     Wrapper to always regenerate an environment with the same set of seeds.
     This can be used to force an environment to always keep the same
     configuration when reset.
+
+    Example:
+        >>> import minigrid
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ReseedWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [1, 9, 5, 8, 4, 3, 8, 8, 3, 1]
+        >>> env = ReseedWrapper(env, seeds=[0, 1], seed_idx=0)
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [8, 6, 5, 2, 3, 0, 0, 0, 1, 8]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [4, 5, 7, 9, 0, 1, 8, 9, 2, 3]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [8, 6, 5, 2, 3, 0, 0, 0, 1, 8]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [4, 5, 7, 9, 0, 1, 8, 9, 2, 3]
     """
 
     def __init__(self, env, seeds=[0], seed_idx=0):
+        """A wrapper that always regenerate an environment with the same set of seeds.
+
+        Args:
+            env: The environment to apply the wrapper
+            seeds: A list of seed to be applied to the env
+            seed_idx: Index of the initial seed in seeds
+        """
         self.seeds = list(seeds)
         self.seed_idx = seed_idx
         super().__init__(env)
 
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         seed = self.seeds[self.seed_idx]
         self.seed_idx = (self.seed_idx + 1) % len(self.seeds)
         return self.env.reset(seed=seed, **kwargs)
 
     def step(self, action):
+        """Steps through the environment with `action`."""
         return self.env.step(action)
 
 
@@ -39,13 +69,40 @@ class ActionBonus(gym.Wrapper):
     Wrapper which adds an exploration bonus.
     This is a reward to encourage exploration of less
     visited (state,action) pairs.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ActionBonus
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> _, _ = env.reset(seed=0)
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> env_bonus = ActionBonus(env)
+        >>> _, _ = env_bonus.reset(seed=0)
+        >>> _, reward, _, _, _ = env_bonus.step(1)
+        >>> print(reward)
+        1.0
+        >>> _, reward, _, _, _ = env_bonus.step(1)
+        >>> print(reward)
+        1.0
     """
 
     def __init__(self, env):
+        """A wrapper that adds an exploration bonus to less visited (state,action) pairs.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         self.counts = {}
 
     def step(self, action):
+        """Steps through the environment with `action`."""
         obs, reward, terminated, truncated, info = self.env.step(action)
 
         env = self.unwrapped
@@ -66,20 +123,49 @@ class ActionBonus(gym.Wrapper):
         return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         return self.env.reset(**kwargs)
 
 
+# Should be named PositionBonus
 class StateBonus(Wrapper):
     """
     Adds an exploration bonus based on which positions
     are visited on the grid.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import StateBonus
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> _, _ = env.reset(seed=0)
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> env_bonus = StateBonus(env)
+        >>> obs, _ = env_bonus.reset(seed=0)
+        >>> obs, reward, terminated, truncated, info = env_bonus.step(1)
+        >>> print(reward)
+        1.0
+        >>> obs, reward, terminated, truncated, info = env_bonus.step(1)
+        >>> print(reward)
+        0.7071067811865475
     """
 
     def __init__(self, env):
+        """A wrapper that adds an exploration bonus to less visited positions.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         self.counts = {}
 
     def step(self, action):
+        """Steps through the environment with `action`."""
         obs, reward, terminated, truncated, info = self.env.step(action)
 
         # Tuple based on which we index the counts
@@ -102,15 +188,34 @@ class StateBonus(Wrapper):
         return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         return self.env.reset(**kwargs)
 
 
 class ImgObsWrapper(ObservationWrapper):
     """
     Use the image as the only observation output, no language/mission.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ImgObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs.keys()
+        dict_keys(['image', 'direction', 'mission'])
+        >>> env = ImgObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> obs.shape
+        (7, 7, 3)
     """
 
     def __init__(self, env):
+        """A wrapper that makes image the only observation.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         self.observation_space = env.observation_space.spaces["image"]
 
@@ -122,9 +227,40 @@ class OneHotPartialObsWrapper(ObservationWrapper):
     """
     Wrapper to get a one-hot encoding of a partially observable
     agent view as observation.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import OneHotPartialObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs["image"][0, :, :]
+        array([[2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0]], dtype=uint8)
+        >>> env = OneHotPartialObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> obs["image"][0, :, :]
+        array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]],
+               dtype=uint8)
     """
 
     def __init__(self, env, tile_size=8):
+        """A wrapper that makes the image observation a one-hot encoding of a partially observable agent view.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
 
         self.tile_size = tile_size
@@ -162,6 +298,20 @@ class RGBImgObsWrapper(ObservationWrapper):
     """
     Wrapper to use fully observable RGB image as observation,
     This can be used to have the agent to solve the gridworld in pixel space.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import RGBImgObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs['image'])
+        ![NoWrapper](../figures/lavacrossing_NoWrapper.png)
+        >>> env = RGBImgObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs['image'])
+        ![RGBImgObsWrapper](../figures/lavacrossing_RGBImgObsWrapper.png)
     """
 
     def __init__(self, env, tile_size=8):
@@ -190,6 +340,24 @@ class RGBImgPartialObsWrapper(ObservationWrapper):
     """
     Wrapper to use partially observable RGB image as observation.
     This can be used to have the agent to solve the gridworld in pixel space.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import RGBImgObsWrapper, RGBImgPartialObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs["image"])
+        ![NoWrapper](../figures/lavacrossing_NoWrapper.png)
+        >>> env_obs = RGBImgObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> plt.imshow(obs["image"])
+        ![RGBImgObsWrapper](../figures/lavacrossing_RGBImgObsWrapper.png)
+        >>> env_obs = RGBImgPartialObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> plt.imshow(obs["image"])
+        ![RGBImgPartialObsWrapper](../figures/lavacrossing_RGBImgPartialObsWrapper.png)
     """
 
     def __init__(self, env, tile_size=8):
@@ -218,7 +386,21 @@ class RGBImgPartialObsWrapper(ObservationWrapper):
 
 class FullyObsWrapper(ObservationWrapper):
     """
-    Fully observable gridworld using a compact grid encoding
+    Fully observable gridworld using a compact grid encoding instead of the agent view.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import FullyObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['image'].shape
+        (7, 7, 3)
+        >>> env_obs = FullyObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['image'].shape
+        (11, 11, 3)
     """
 
     def __init__(self, env):
@@ -251,6 +433,20 @@ class DictObservationSpaceWrapper(ObservationWrapper):
     where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
 
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import DictObservationSpaceWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['mission']
+        'avoid the lava and get to the green goal square'
+        >>> env_obs = DictObservationSpaceWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['mission'][:10]
+        [19, 31, 17, 36, 20, 38, 31, 2, 15, 35]
     """
 
     def __init__(self, env, max_words_in_mission=50, word_dict=None):
@@ -371,6 +567,17 @@ class FlatObsWrapper(ObservationWrapper):
     and combine these with observed images into one flat array.
 
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import FlatObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> env_obs = FlatObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs.shape
+        (2835,)
     """
 
     def __init__(self, env, maxStrLen=96):
@@ -432,6 +639,20 @@ class ViewSizeWrapper(Wrapper):
     """
     Wrapper to customize the agent field of view size.
     This cannot be used with fully observable wrappers.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import ViewSizeWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['image'].shape
+        (7, 7, 3)
+        >>> env_obs = ViewSizeWrapper(env, agent_view_size=5)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['image'].shape
+        (5, 5, 3)
     """
 
     def __init__(self, env, agent_view_size=7):

+ 4 - 1
tests/utils.py

@@ -7,7 +7,10 @@ import numpy as np
 all_testing_env_specs = [
     env_spec
     for env_spec in gym.envs.registry.values()
-    if env_spec.entry_point.startswith("minigrid.envs")
+    if (
+        isinstance(env_spec.entry_point, str)
+        and env_spec.entry_point.startswith("minigrid.envs")
+    )
 ]
 
 minigrid_testing_env_specs = [