浏览代码

Added Docstrings to the BabyAI envs (#302)

Bolun 2 年之前
父节点
当前提交
88ebc71e9c

+ 1 - 1
.pre-commit-config.yaml

@@ -22,7 +22,7 @@ repos:
           - --show-source
           - --show-source
           - --statistics
           - --statistics
   - repo: https://github.com/PyCQA/isort
   - repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
+    rev: 5.12.0
     hooks:
     hooks:
       - id: isort
       - id: isort
         args: ["--profile", "black"]
         args: ["--profile", "black"]

+ 14 - 11
docs/scripts/gen_envs_display.py

@@ -56,19 +56,22 @@ if __name__ == "__main__":
     type_dict = {}
     type_dict = {}
 
 
     for env_spec in gymnasium.envs.registry.values():
     for env_spec in gymnasium.envs.registry.values():
-        # minigrid.envs:Env or minigrid.envs.babyai:Env
-        split = env_spec.entry_point.split(".")
-        # ignore minigrid.envs.env_type:Env
-        env_module = split[0]
-        env_name = split[-1].split(":")[-1]
-        env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
+        if isinstance(env_spec.entry_point, str):
+            # minigrid.envs:Env or minigrid.envs.babyai:Env
+            split = env_spec.entry_point.split(".")
+            # ignore minigrid.envs.env_type:Env
+            env_module = split[0]
+            env_name = split[-1].split(":")[-1]
+            env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
 
 
-        if env_module == "minigrid":
-            if env_type not in type_dict.keys():
-                type_dict[env_type] = []
+            if env_module == "minigrid":
+                if env_type not in type_dict.keys():
+                    type_dict[env_type] = []
 
 
-            if env_name not in type_dict[env_type]:
-                type_dict[env_type].append(env_name)
+                if env_name not in type_dict[env_type]:
+                    type_dict[env_type].append(env_name)
+        else:
+            continue
 
 
     for key, value in type_dict.items():
     for key, value in type_dict.items():
         env_type = key
         env_type = key

+ 14 - 13
docs/scripts/gen_mds.py

@@ -32,19 +32,20 @@ babyai_envs = {}
 
 
 # Obtain filtered list
 # Obtain filtered list
 for env_spec in tqdm(all_envs):
 for env_spec in tqdm(all_envs):
-    # minigrid.envs:Env
-    split = env_spec.entry_point.split(".")
-    # ignore gymnasium.envs.env_type:Env
-    env_module = split[0]
-
-    if len(split) > 2 and "babyai" in split[2]:
-        curr_babyai_env = split[2]
-        babyai_env_name = curr_babyai_env.split(":")[1]
-        babyai_envs[babyai_env_name] = env_spec
-    elif env_module == "minigrid":
-        env_name = split[1]
-        filtered_envs_by_type[env_name] = env_spec
-    # if env_module != "minigrid":
+    if isinstance(env_spec.entry_point, str):
+        # minigrid.envs:Env
+        split = env_spec.entry_point.split(".")
+        # ignore gymnasium.envs.env_type:Env
+        env_module = split[0]
+
+        if len(split) > 2 and "babyai" in split[2]:
+            curr_babyai_env = split[2]
+            babyai_env_name = curr_babyai_env.split(":")[1]
+            babyai_envs[babyai_env_name] = env_spec
+        elif env_module == "minigrid":
+            env_name = split[1]
+            filtered_envs_by_type[env_name] = env_spec
+        # if env_module != "minigrid":
     else:
     else:
         continue
         continue
 
 

二进制
figures/lavacrossing_NoWrapper.png


二进制
figures/lavacrossing_RGBImgObsWrapper.png


二进制
figures/lavacrossing_RGBImgPartialObsWrapper.png


+ 535 - 2
minigrid/envs/babyai/goto.py

@@ -11,9 +11,52 @@ from minigrid.envs.babyai.core.verifier import GoToInstr, ObjDesc
 
 
 class GoToRedBallGrey(RoomGridLevel):
 class GoToRedBallGrey(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Go to the red ball, single room, with distractors.
     Go to the red ball, single room, with distractors.
     The distractors are all grey to reduce perceptual complexity.
     The distractors are all grey to reduce perceptual complexity.
     This level has distractors but doesn't make use of language.
     This level has distractors but doesn't make use of language.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
+
     """
     """
 
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -36,8 +79,50 @@ class GoToRedBallGrey(RoomGridLevel):
 
 
 class GoToRedBall(RoomGridLevel):
 class GoToRedBall(RoomGridLevel):
     """
     """
+    ## Description
+
     Go to the red ball, single room, with distractors.
     Go to the red ball, single room, with distractors.
     This level has distractors but doesn't make use of language.
     This level has distractors but doesn't make use of language.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBall-v0`
+
     """
     """
 
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -57,7 +142,50 @@ class GoToRedBall(RoomGridLevel):
 
 
 class GoToRedBallNoDists(GoToRedBall):
 class GoToRedBallNoDists(GoToRedBall):
     """
     """
+
+    ## Description
+
     Go to the red ball. No distractors present.
     Go to the red ball. No distractors present.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallNoDists-v0`
+
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
@@ -66,7 +194,57 @@ class GoToRedBallNoDists(GoToRedBall):
 
 
 class GoToObj(RoomGridLevel):
 class GoToObj(RoomGridLevel):
     """
     """
-    Go to an object, inside a single room with no doors, no distractors
+    ## Description
+
+    Go to an object, inside a single room with no doors, no distractors. The
+    naming convention `GoToObjS{X}` represents a room of size `X`.
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToObj-v0`
+    - `BabyAI-GoToObjS4-v0`
+    - `BabyAI-GoToObjS6-v0`
+
     """
     """
 
 
     def __init__(self, room_size=8, **kwargs):
     def __init__(self, room_size=8, **kwargs):
@@ -81,7 +259,68 @@ class GoToObj(RoomGridLevel):
 
 
 class GoToLocal(RoomGridLevel):
 class GoToLocal(RoomGridLevel):
     """
     """
-    Go to an object, inside a single room with no doors, no distractors
+
+    ## Description
+
+    Go to an object, inside a single room with no doors, no distractors. The
+    naming convention `GoToLocalS{X}N{Y}` represents a room of size `X` with
+    distractor number `Y`.
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToLocal-v0`
+    - `BabyAI-GoToLocalS5N2-v0`
+    - `BabyAI-GoToLocalS6N2-v0`
+    - `BabyAI-GoToLocalS6N3-v0`
+    - `BabyAI-GoToLocalS6N4-v0`
+    - `BabyAI-GoToLocalS7N4-v0`
+    - `BabyAI-GoToLocalS7N5-v0`
+    - `BabyAI-GoToLocalS8N2-v0`
+    - `BabyAI-GoToLocalS8N3-v0`
+    - `BabyAI-GoToLocalS8N4-v0`
+    - `BabyAI-GoToLocalS8N5-v0`
+    - `BabyAI-GoToLocalS8N6-v0`
+    - `BabyAI-GoToLocalS8N7-v0`
     """
     """
 
 
     def __init__(self, room_size=8, num_dists=8, **kwargs):
     def __init__(self, room_size=8, num_dists=8, **kwargs):
@@ -98,7 +337,62 @@ class GoToLocal(RoomGridLevel):
 
 
 class GoTo(RoomGridLevel):
 class GoTo(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Go to an object, the object may be in another room. Many distractors.
     Go to an object, the object may be in another room. Many distractors.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoTo-v0`
+    - `BabyAI-GoToOpen-v0`
+    - `BabyAI-GoToObjMaze-v0`
+    - `BabyAI-GoToObjMazeOpen-v0`
+    - `BabyAI-GoToObjMazeS4R2-v0`
+    - `BabyAI-GoToObjMazeS4-v0`
+    - `BabyAI-GoToObjMazeS5-v0`
+    - `BabyAI-GoToObjMazeS6-v0`
+    - `BabyAI-GoToObjMazeS7-v0`
     """
     """
 
 
     def __init__(
     def __init__(
@@ -131,9 +425,57 @@ class GoTo(RoomGridLevel):
 
 
 class GoToImpUnlock(RoomGridLevel):
 class GoToImpUnlock(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Go to an object, which may be in a locked room.
     Go to an object, which may be in a locked room.
     Competencies: Maze, GoTo, ImpUnlock
     Competencies: Maze, GoTo, ImpUnlock
     No unblocking.
     No unblocking.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToImpUnlock-v0`
+
     """
     """
 
 
     def gen_mission(self):
     def gen_mission(self):
@@ -182,12 +524,64 @@ class GoToImpUnlock(RoomGridLevel):
 
 
 class GoToSeq(LevelGen):
 class GoToSeq(LevelGen):
     """
     """
+
+    ## Description
+
     Sequencing of go-to-object commands.
     Sequencing of go-to-object commands.
 
 
     Competencies: Maze, GoTo, Seq
     Competencies: Maze, GoTo, Seq
     No locked room.
     No locked room.
     No locations.
     No locations.
     No unblocking.
     No unblocking.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}" +
+    "and go to a/the {color} {type}" +
+    ", then go to a/the {color} {type}" +
+    "and go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToSeq-v0`
+    - `BabyAI-GoToSeqS5R2-v0`
+
     """
     """
 
 
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
@@ -206,10 +600,55 @@ class GoToSeq(LevelGen):
 
 
 class GoToRedBlueBall(RoomGridLevel):
 class GoToRedBlueBall(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Go to the red ball or to the blue ball.
     Go to the red ball or to the blue ball.
     There is exactly one red or blue ball, and some distractors.
     There is exactly one red or blue ball, and some distractors.
     The distractors are guaranteed not to be red or blue balls.
     The distractors are guaranteed not to be red or blue balls.
     Language is not required to solve this level.
     Language is not required to solve this level.
+
+    ## Mission Space
+
+    "go to the {color} ball"
+
+    {color} is the color of the box. Can be "red" or "blue".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBlueBall-v0`
+
     """
     """
 
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -237,9 +676,55 @@ class GoToRedBlueBall(RoomGridLevel):
 
 
 class GoToDoor(RoomGridLevel):
 class GoToDoor(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Go to a door
     Go to a door
     (of a given color, in the current room)
     (of a given color, in the current room)
     No distractors, no language variation
     No distractors, no language variation
+
+    ## Mission Space
+
+    "go to the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToDoor-v0`
+
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
@@ -258,8 +743,56 @@ class GoToDoor(RoomGridLevel):
 
 
 class GoToObjDoor(RoomGridLevel):
 class GoToObjDoor(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Go to an object or door
     Go to an object or door
     (of a given type and color, in the current room)
     (of a given type and color, in the current room)
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box", "key" or "door".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object or door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToObjDoor-v0`
+
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):

+ 242 - 0
minigrid/envs/babyai/open.py

@@ -17,7 +17,53 @@ from minigrid.envs.babyai.core.verifier import (
 
 
 class Open(RoomGridLevel):
 class Open(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Open a door, which may be in another room
     Open a door, which may be in another room
+
+    ## Mission Space
+
+    "open a {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-Open-v0`
+
     """
     """
 
 
     def gen_mission(self):
     def gen_mission(self):
@@ -41,10 +87,53 @@ class Open(RoomGridLevel):
 
 
 class OpenRedDoor(RoomGridLevel):
 class OpenRedDoor(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Go to the red door
     Go to the red door
     (always unlocked, in the current room)
     (always unlocked, in the current room)
     Note: this level is intentionally meant for debugging and is
     Note: this level is intentionally meant for debugging and is
     intentionally kept very simple.
     intentionally kept very simple.
+
+    ## Mission Space
+
+    "open the red door"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OpenRedDoor-v0`
+
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
@@ -58,9 +147,58 @@ class OpenRedDoor(RoomGridLevel):
 
 
 class OpenDoor(RoomGridLevel):
 class OpenDoor(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Go to the door
     Go to the door
     The door to open is given by its color or by its location.
     The door to open is given by its color or by its location.
     (always unlocked, in the current room)
     (always unlocked, in the current room)
+
+    ## Mission Space
+
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OpenDoor-v0`
+    - `BabyAI-OpenDoorDebug-v0`
+    - `BabyAI-OpenDoorColor-v0`
+    - `BabyAI-OpenDoorLoc-v0`
+
     """
     """
 
 
     def __init__(self, debug=False, select_by=None, **kwargs):
     def __init__(self, debug=False, select_by=None, **kwargs):
@@ -92,10 +230,58 @@ class OpenDoor(RoomGridLevel):
 
 
 class OpenTwoDoors(RoomGridLevel):
 class OpenTwoDoors(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Open door X, then open door Y
     Open door X, then open door Y
     The two doors are facing opposite directions, so that the agent
     The two doors are facing opposite directions, so that the agent
     Can't see whether the door behind him is open.
     Can't see whether the door behind him is open.
     This task requires memory (recurrent policy) to be solved effectively.
     This task requires memory (recurrent policy) to be solved effectively.
+
+    ## Mission Space
+
+    "open the {color} door, the open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OpenTwoDoors-v0`
+    - `BabyAI-OpenRedBlueDoors-v0`
+    - `BabyAI-OpenRedBlueDoorsDebug-v0`
+
     """
     """
 
 
     def __init__(
     def __init__(
@@ -139,7 +325,63 @@ class OpenTwoDoors(RoomGridLevel):
 
 
 class OpenDoorsOrder(RoomGridLevel):
 class OpenDoorsOrder(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Open one or two doors in the order specified.
     Open one or two doors in the order specified.
+
+    ## Mission Space
+
+    "open the {color} door, the open the {color} door"
+
+    or
+
+    "open the {color} door after you open the {color} door"
+
+    or
+
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OpenDoorsOrderN2-v0`
+    - `BabyAI-OpenDoorsOrderN4-v0`
+    - `BabyAI-OpenDoorsOrderN2Debug-v0`
+    - `BabyAI-OpenDoorsOrderN4Debug-v0`
     """
     """
 
 
     def __init__(self, num_doors, debug=False, max_steps: int | None = None, **kwargs):
     def __init__(self, num_doors, debug=False, max_steps: int | None = None, **kwargs):

+ 248 - 2
minigrid/envs/babyai/other.py

@@ -17,10 +17,66 @@ from minigrid.envs.babyai.core.verifier import (
 
 
 class ActionObjDoor(RoomGridLevel):
 class ActionObjDoor(RoomGridLevel):
     """
     """
+
+    ## Description
+
     [pick up an object] or
     [pick up an object] or
     [go to an object or door] or
     [go to an object or door] or
     [open a door]
     [open a door]
     (in the current room)
     (in the current room)
+
+    ## Mission Space
+
+    "pick up the {color} {type}"
+
+    or
+
+    "go to the {color} {type}"
+
+    or
+
+    "open a {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box", "door" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent finishes the instruction.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-ActionObjDoor-v0`
+
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
@@ -51,9 +107,56 @@ class ActionObjDoor(RoomGridLevel):
 
 
 class FindObjS5(RoomGridLevel):
 class FindObjS5(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Pick up an object (in a random room)
     Pick up an object (in a random room)
     Rooms have a size of 5
     Rooms have a size of 5
     This level requires potentially exhaustive exploration
     This level requires potentially exhaustive exploration
+
+    ## Mission Space
+
+    "pick up the {type}"
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-FindObjS5-v0`
+    - `BabyAI-FindObjS6-v0`
+    - `BabyAI-FindObjS7-v0`
+
     """
     """
 
 
     def __init__(self, room_size=5, max_steps: int | None = None, **kwargs):
     def __init__(self, room_size=5, max_steps: int | None = None, **kwargs):
@@ -76,8 +179,57 @@ class FindObjS5(RoomGridLevel):
 
 
 class KeyCorridor(RoomGridLevel):
 class KeyCorridor(RoomGridLevel):
     """
     """
+
+    ## Description
+
     A ball is behind a locked door, the key is placed in a
     A ball is behind a locked door, the key is placed in a
     random room.
     random room.
+
+    ## Mission Space
+
+    "pick up the ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-KeyCorridor-v0`
+    - `BabyAI-KeyCorridorS3R1-v0`
+    - `BabyAI-KeyCorridorS3R2-v0`
+    - `BabyAI-KeyCorridorS3R3-v0`
+    - `BabyAI-KeyCorridorS4R3-v0`
+    - `BabyAI-KeyCorridorS5R3-v0`
+    - `BabyAI-KeyCorridorS6R3-v0`
+
     """
     """
 
 
     def __init__(
     def __init__(
@@ -122,8 +274,53 @@ class KeyCorridor(RoomGridLevel):
 
 
 class OneRoomS8(RoomGridLevel):
 class OneRoomS8(RoomGridLevel):
     """
     """
-    Pick up the ball
-    Rooms have a size of 8
+
+    ## Description
+
+    Pick up the ball. Rooms have a size of 8.
+
+    ## Mission Space
+
+    "pick up the ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-OneRoomS8-v0`
+    - `BabyAI-OneRoomS12-v0`
+    - `BabyAI-OneRoomS16-v0`
+    - `BabyAI-OneRoomS20-v0`
+
     """
     """
 
 
     def __init__(self, room_size=8, **kwargs):
     def __init__(self, room_size=8, **kwargs):
@@ -137,9 +334,58 @@ class OneRoomS8(RoomGridLevel):
 
 
 class MoveTwoAcross(RoomGridLevel):
 class MoveTwoAcross(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Task of the form: move the A next to the B and the C next to the D.
     Task of the form: move the A next to the B and the C next to the D.
     This task is structured to have a very large number of possible
     This task is structured to have a very large number of possible
     instructions.
     instructions.
+
+    ## Mission Space
+
+    "put the {color} {type} next to the {color} {type}, then put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent finishes the instruction.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-MoveTwoAcrossS5N2-v0`
+    - `BabyAI-MoveTwoAcrossS8N9-v0`
+
     """
     """
 
 
     def __init__(
     def __init__(

+ 241 - 0
minigrid/envs/babyai/pickup.py

@@ -11,7 +11,55 @@ from minigrid.envs.babyai.core.verifier import ObjDesc, PickupInstr
 
 
 class Pickup(RoomGridLevel):
 class Pickup(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Pick up an object, the object may be in another room.
     Pick up an object, the object may be in another room.
+
+    ## Mission Space
+
+    "pick up a {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-Pickup-v0`
+
     """
     """
 
 
     def gen_mission(self):
     def gen_mission(self):
@@ -25,8 +73,56 @@ class Pickup(RoomGridLevel):
 
 
 class UnblockPickup(RoomGridLevel):
 class UnblockPickup(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Pick up an object, the object may be in another room. The path may
     Pick up an object, the object may be in another room. The path may
     be blocked by one or more obstructors.
     be blocked by one or more obstructors.
+
+    ## Mission Space
+
+    "pick up a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-UnblockPickup-v0`
+
     """
     """
 
 
     def gen_mission(self):
     def gen_mission(self):
@@ -45,10 +141,58 @@ class UnblockPickup(RoomGridLevel):
 
 
 class PickupLoc(LevelGen):
 class PickupLoc(LevelGen):
     """
     """
+
+    ## Description
+
     Pick up an object which may be described using its location. This is a
     Pick up an object which may be described using its location. This is a
     single room environment.
     single room environment.
 
 
     Competencies: PickUp, Loc. No unblocking.
     Competencies: PickUp, Loc. No unblocking.
+
+    ## Mission Space
+
+    "pick up the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PickupLoc-v0`
+
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
@@ -69,10 +213,59 @@ class PickupLoc(LevelGen):
 
 
 class PickupDist(RoomGridLevel):
 class PickupDist(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Pick up an object
     Pick up an object
     The object to pick up is given by its type only, or
     The object to pick up is given by its type only, or
     by its color, or by its type and color.
     by its color, or by its type and color.
     (in the current room, with distractors)
     (in the current room, with distractors)
+
+    ## Mission Space
+
+    "pick up a/the {color}/{type}/{color}{type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PickupDist-v0`
+    - `BabyAI-PickupDistDebug-v0`
+
     """
     """
 
 
     def __init__(self, debug=False, **kwargs):
     def __init__(self, debug=False, **kwargs):
@@ -98,8 +291,56 @@ class PickupDist(RoomGridLevel):
 
 
 class PickupAbove(RoomGridLevel):
 class PickupAbove(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Pick up an object (in the room above)
     Pick up an object (in the room above)
     This task requires to use the compass to be solved effectively.
     This task requires to use the compass to be solved effectively.
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PickupAbove-v0`
+
     """
     """
 
 
     def __init__(self, max_steps: int | None = None, **kwargs):
     def __init__(self, max_steps: int | None = None, **kwargs):

+ 105 - 0
minigrid/envs/babyai/putnext.py

@@ -10,8 +10,58 @@ from minigrid.envs.babyai.core.verifier import ObjDesc, PutNextInstr
 
 
 class PutNextLocal(RoomGridLevel):
 class PutNextLocal(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Put an object next to another object, inside a single room
     Put an object next to another object, inside a single room
     with no doors, no distractors
     with no doors, no distractors
+
+    ## Mission Space
+
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent finishes the instructed task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PutNextLocal-v0`
+    - `BabyAI-PutNextLocalS5N3-v0`
+    - `BabyAI-PutNextLocalS6N4-v0``
+
     """
     """
 
 
     def __init__(self, room_size=8, num_objs=8, **kwargs):
     def __init__(self, room_size=8, num_objs=8, **kwargs):
@@ -31,9 +81,64 @@ class PutNextLocal(RoomGridLevel):
 
 
 class PutNext(RoomGridLevel):
 class PutNext(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Task of the form: move the A next to the B and the C next to the D.
     Task of the form: move the A next to the B and the C next to the D.
     This task is structured to have a very large number of possible
     This task is structured to have a very large number of possible
     instructions.
     instructions.
+
+    ## Mission Space
+
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent finishes the instructed task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-PutNextS4N1-v0`
+    - `BabyAI-PutNextS5N2-v0`
+    - `BabyAI-PutNextS5N1-v0`
+    - `BabyAI-PutNextS6N3-v0`
+    - `BabyAI-PutNextS7N4-v0`
+    - `BabyAI-PutNextS5N2Carrying-v0`
+    - `BabyAI-PutNextS6N3Carrying-v0`
+    - `BabyAI-PutNextS7N4Carrying-v0`
+
     """
     """
 
 
     def __init__(
     def __init__(

+ 487 - 8
minigrid/envs/babyai/synth.py

@@ -11,11 +11,73 @@ from minigrid.envs.babyai.core.levelgen import LevelGen
 
 
 class Synth(LevelGen):
 class Synth(LevelGen):
     """
     """
-    Union of all instructions from PutNext, Open, Goto and PickUp. The agent
-    may need to move objects around. The agent may have to unlock the door,
-    but only if it is explicitly referred by the instruction.
+
+    ## Description
+
+    Union of all instructions from PutNext, Open, Goto and PickUp.
+    The agent may need to move objects around. The agent may have
+    to unlock the door, but only if it is explicitly referred by
+    the instruction.
 
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    or
+
+    "pick up a/the {color} {type}"
+
+    or
+
+    "open the {color} door"
+
+    or
+
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-Synth-v0`
+    - `BabyAI-SynthS5R2-v0`
+
     """
     """
 
 
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
@@ -34,17 +96,74 @@ class Synth(LevelGen):
         )
         )
 
 
 
 
-class SynthS5R2(Synth):
-    def __init__(self, **kwargs):
-        super().__init__(room_size=5, num_rows=2, num_cols=2, num_dists=7, **kwargs)
-
-
 class SynthLoc(LevelGen):
 class SynthLoc(LevelGen):
     """
     """
+
+    ## Description
+
     Like Synth, but a significant share of object descriptions involves
     Like Synth, but a significant share of object descriptions involves
     location language like in PickUpLoc. No implicit unlocking.
     location language like in PickUpLoc. No implicit unlocking.
 
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc
+
+    ## Mission Space
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-SynthLoc-v0`
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
@@ -61,10 +180,96 @@ class SynthLoc(LevelGen):
 
 
 class SynthSeq(LevelGen):
 class SynthSeq(LevelGen):
     """
     """
+
+    ## Description
+
     Like SynthLoc, but now with multiple commands, combined just like in GoToSeq.
     Like SynthLoc, but now with multiple commands, combined just like in GoToSeq.
     No implicit unlocking.
     No implicit unlocking.
 
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc, Seq
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc, Seq
+
+    ## Mission Space
+
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-SynthSeq-v0`
+
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
@@ -76,6 +281,99 @@ class SynthSeq(LevelGen):
 
 
 
 
 class MiniBossLevel(LevelGen):
 class MiniBossLevel(LevelGen):
+    """
+
+    ## Description
+
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
+    Compared to BossLevel this has a smaller room and a lower probability of
+    locked rooms.
+
+    ## Mission Space
+
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-MiniBossLevel-v0`
+
+    """
+
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
         super().__init__(
         super().__init__(
             num_cols=2,
             num_cols=2,
@@ -88,10 +386,191 @@ class MiniBossLevel(LevelGen):
 
 
 
 
 class BossLevel(LevelGen):
 class BossLevel(LevelGen):
+    """
+
+    ## Description
+
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
+
+    ## Mission Space
+
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-BossLevel-v0`
+    """
+
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         super().__init__(**kwargs)
 
 
 
 
 class BossLevelNoUnlock(LevelGen):
 class BossLevelNoUnlock(LevelGen):
+    """
+
+    ## Description
+
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
+    No implicit unlocking.
+
+    ## Mission Space
+
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent achieves the task.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-BossLevelNoUnlock-v0`
+    """
+
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
         super().__init__(locked_room_prob=0, implicit_unlock=False, **kwargs)
         super().__init__(locked_room_prob=0, implicit_unlock=False, **kwargs)

+ 266 - 0
minigrid/envs/babyai/unlock.py

@@ -12,9 +12,55 @@ from minigrid.envs.babyai.core.verifier import ObjDesc, OpenInstr, PickupInstr
 
 
 class Unlock(RoomGridLevel):
 class Unlock(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Unlock a door.
     Unlock a door.
 
 
     Competencies: Maze, Open, Unlock. No unblocking.
     Competencies: Maze, Open, Unlock. No unblocking.
+
+    ## Mission Space
+
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the correct door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-Unlock-v0`
+
     """
     """
 
 
     def gen_mission(self):
     def gen_mission(self):
@@ -66,8 +112,52 @@ class Unlock(RoomGridLevel):
 
 
 class UnlockLocal(RoomGridLevel):
 class UnlockLocal(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Fetch a key and unlock a door
     Fetch a key and unlock a door
     (in the current room)
     (in the current room)
+
+    ## Mission Space
+
+    "open the door"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-UnlockLocal-v0`
+    - `BabyAI-UnlockLocalDist-v0`
+
     """
     """
 
 
     def __init__(self, distractors=False, **kwargs):
     def __init__(self, distractors=False, **kwargs):
@@ -86,7 +176,50 @@ class UnlockLocal(RoomGridLevel):
 
 
 class KeyInBox(RoomGridLevel):
 class KeyInBox(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Unlock a door. Key is in a box (in the current room).
     Unlock a door. Key is in a box (in the current room).
+
+    ## Mission Space
+
+    "open the door"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent opens the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-KeyInBox-v0`
+
     """
     """
 
 
     def __init__(self, **kwargs):
     def __init__(self, **kwargs):
@@ -107,7 +240,54 @@ class KeyInBox(RoomGridLevel):
 
 
 class UnlockPickup(RoomGridLevel):
 class UnlockPickup(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Unlock a door, then pick up a box in another room
     Unlock a door, then pick up a box in another room
+
+    ## Mission Space
+
+    "pick up the {color} box"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the correct box.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-UnlockPickup-v0`
+    - `BabyAI-UnlockPickupDist-v0`
+
     """
     """
 
 
     def __init__(self, distractors=False, max_steps: int | None = None, **kwargs):
     def __init__(self, distractors=False, max_steps: int | None = None, **kwargs):
@@ -137,8 +317,51 @@ class UnlockPickup(RoomGridLevel):
 
 
 class BlockedUnlockPickup(RoomGridLevel):
 class BlockedUnlockPickup(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Unlock a door blocked by a ball, then pick up a box
     Unlock a door blocked by a ball, then pick up a box
     in another room
     in another room
+
+    ## Mission Space
+
+    "pick up the box"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the box.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-BlockedUnlockPickup-v0`
+
     """
     """
 
 
     def __init__(self, max_steps: int | None = None, **kwargs):
     def __init__(self, max_steps: int | None = None, **kwargs):
@@ -168,7 +391,50 @@ class BlockedUnlockPickup(RoomGridLevel):
 
 
 class UnlockToUnlock(RoomGridLevel):
 class UnlockToUnlock(RoomGridLevel):
     """
     """
+
+    ## Description
+
     Unlock a door A that requires to unlock a door B before
     Unlock a door A that requires to unlock a door B before
+
+    ## Mission Space
+
+    "pick up the ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent picks up the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-UnlockToUnlock-v0`
+
     """
     """
 
 
     def __init__(self, max_steps: int | None = None, **kwargs):
     def __init__(self, max_steps: int | None = None, **kwargs):

+ 222 - 1
minigrid/wrappers.py

@@ -18,19 +18,49 @@ class ReseedWrapper(Wrapper):
     Wrapper to always regenerate an environment with the same set of seeds.
     Wrapper to always regenerate an environment with the same set of seeds.
     This can be used to force an environment to always keep the same
     This can be used to force an environment to always keep the same
     configuration when reset.
     configuration when reset.
+
+    Example:
+        >>> import minigrid
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ReseedWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [1, 9, 5, 8, 4, 3, 8, 8, 3, 1]
+        >>> env = ReseedWrapper(env, seeds=[0, 1], seed_idx=0)
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [8, 6, 5, 2, 3, 0, 0, 0, 1, 8]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [4, 5, 7, 9, 0, 1, 8, 9, 2, 3]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [8, 6, 5, 2, 3, 0, 0, 0, 1, 8]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [4, 5, 7, 9, 0, 1, 8, 9, 2, 3]
     """
     """
 
 
     def __init__(self, env, seeds=[0], seed_idx=0):
     def __init__(self, env, seeds=[0], seed_idx=0):
+        """A wrapper that always regenerate an environment with the same set of seeds.
+
+        Args:
+            env: The environment to apply the wrapper
+            seeds: A list of seed to be applied to the env
+            seed_idx: Index of the initial seed in seeds
+        """
         self.seeds = list(seeds)
         self.seeds = list(seeds)
         self.seed_idx = seed_idx
         self.seed_idx = seed_idx
         super().__init__(env)
         super().__init__(env)
 
 
     def reset(self, **kwargs):
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         seed = self.seeds[self.seed_idx]
         seed = self.seeds[self.seed_idx]
         self.seed_idx = (self.seed_idx + 1) % len(self.seeds)
         self.seed_idx = (self.seed_idx + 1) % len(self.seeds)
         return self.env.reset(seed=seed, **kwargs)
         return self.env.reset(seed=seed, **kwargs)
 
 
     def step(self, action):
     def step(self, action):
+        """Steps through the environment with `action`."""
         return self.env.step(action)
         return self.env.step(action)
 
 
 
 
@@ -39,13 +69,40 @@ class ActionBonus(gym.Wrapper):
     Wrapper which adds an exploration bonus.
     Wrapper which adds an exploration bonus.
     This is a reward to encourage exploration of less
     This is a reward to encourage exploration of less
     visited (state,action) pairs.
     visited (state,action) pairs.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ActionBonus
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> _, _ = env.reset(seed=0)
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> env_bonus = ActionBonus(env)
+        >>> _, _ = env_bonus.reset(seed=0)
+        >>> _, reward, _, _, _ = env_bonus.step(1)
+        >>> print(reward)
+        1.0
+        >>> _, reward, _, _, _ = env_bonus.step(1)
+        >>> print(reward)
+        1.0
     """
     """
 
 
     def __init__(self, env):
     def __init__(self, env):
+        """A wrapper that adds an exploration bonus to less visited (state,action) pairs.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         super().__init__(env)
         self.counts = {}
         self.counts = {}
 
 
     def step(self, action):
     def step(self, action):
+        """Steps through the environment with `action`."""
         obs, reward, terminated, truncated, info = self.env.step(action)
         obs, reward, terminated, truncated, info = self.env.step(action)
 
 
         env = self.unwrapped
         env = self.unwrapped
@@ -66,20 +123,49 @@ class ActionBonus(gym.Wrapper):
         return obs, reward, terminated, truncated, info
         return obs, reward, terminated, truncated, info
 
 
     def reset(self, **kwargs):
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         return self.env.reset(**kwargs)
         return self.env.reset(**kwargs)
 
 
 
 
+# Should be named PositionBonus
 class StateBonus(Wrapper):
 class StateBonus(Wrapper):
     """
     """
     Adds an exploration bonus based on which positions
     Adds an exploration bonus based on which positions
     are visited on the grid.
     are visited on the grid.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import StateBonus
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> _, _ = env.reset(seed=0)
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> env_bonus = StateBonus(env)
+        >>> obs, _ = env_bonus.reset(seed=0)
+        >>> obs, reward, terminated, truncated, info = env_bonus.step(1)
+        >>> print(reward)
+        1.0
+        >>> obs, reward, terminated, truncated, info = env_bonus.step(1)
+        >>> print(reward)
+        0.7071067811865475
     """
     """
 
 
     def __init__(self, env):
     def __init__(self, env):
+        """A wrapper that adds an exploration bonus to less visited positions.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         super().__init__(env)
         self.counts = {}
         self.counts = {}
 
 
     def step(self, action):
     def step(self, action):
+        """Steps through the environment with `action`."""
         obs, reward, terminated, truncated, info = self.env.step(action)
         obs, reward, terminated, truncated, info = self.env.step(action)
 
 
         # Tuple based on which we index the counts
         # Tuple based on which we index the counts
@@ -102,15 +188,34 @@ class StateBonus(Wrapper):
         return obs, reward, terminated, truncated, info
         return obs, reward, terminated, truncated, info
 
 
     def reset(self, **kwargs):
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         return self.env.reset(**kwargs)
         return self.env.reset(**kwargs)
 
 
 
 
 class ImgObsWrapper(ObservationWrapper):
 class ImgObsWrapper(ObservationWrapper):
     """
     """
     Use the image as the only observation output, no language/mission.
     Use the image as the only observation output, no language/mission.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ImgObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs.keys()
+        dict_keys(['image', 'direction', 'mission'])
+        >>> env = ImgObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> obs.shape
+        (7, 7, 3)
     """
     """
 
 
     def __init__(self, env):
     def __init__(self, env):
+        """A wrapper that makes image the only observation.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         super().__init__(env)
         self.observation_space = env.observation_space.spaces["image"]
         self.observation_space = env.observation_space.spaces["image"]
 
 
@@ -122,9 +227,40 @@ class OneHotPartialObsWrapper(ObservationWrapper):
     """
     """
     Wrapper to get a one-hot encoding of a partially observable
     Wrapper to get a one-hot encoding of a partially observable
     agent view as observation.
     agent view as observation.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import OneHotPartialObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs["image"][0, :, :]
+        array([[2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0]], dtype=uint8)
+        >>> env = OneHotPartialObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> obs["image"][0, :, :]
+        array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]],
+               dtype=uint8)
     """
     """
 
 
     def __init__(self, env, tile_size=8):
     def __init__(self, env, tile_size=8):
+        """A wrapper that makes the image observation a one-hot encoding of a partially observable agent view.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         super().__init__(env)
 
 
         self.tile_size = tile_size
         self.tile_size = tile_size
@@ -162,6 +298,20 @@ class RGBImgObsWrapper(ObservationWrapper):
     """
     """
     Wrapper to use fully observable RGB image as observation,
     Wrapper to use fully observable RGB image as observation,
     This can be used to have the agent to solve the gridworld in pixel space.
     This can be used to have the agent to solve the gridworld in pixel space.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import RGBImgObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs['image'])
+        ![NoWrapper](../figures/lavacrossing_NoWrapper.png)
+        >>> env = RGBImgObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs['image'])
+        ![RGBImgObsWrapper](../figures/lavacrossing_RGBImgObsWrapper.png)
     """
     """
 
 
     def __init__(self, env, tile_size=8):
     def __init__(self, env, tile_size=8):
@@ -190,6 +340,24 @@ class RGBImgPartialObsWrapper(ObservationWrapper):
     """
     """
     Wrapper to use partially observable RGB image as observation.
     Wrapper to use partially observable RGB image as observation.
     This can be used to have the agent to solve the gridworld in pixel space.
     This can be used to have the agent to solve the gridworld in pixel space.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import RGBImgObsWrapper, RGBImgPartialObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs["image"])
+        ![NoWrapper](../figures/lavacrossing_NoWrapper.png)
+        >>> env_obs = RGBImgObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> plt.imshow(obs["image"])
+        ![RGBImgObsWrapper](../figures/lavacrossing_RGBImgObsWrapper.png)
+        >>> env_obs = RGBImgPartialObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> plt.imshow(obs["image"])
+        ![RGBImgPartialObsWrapper](../figures/lavacrossing_RGBImgPartialObsWrapper.png)
     """
     """
 
 
     def __init__(self, env, tile_size=8):
     def __init__(self, env, tile_size=8):
@@ -218,7 +386,21 @@ class RGBImgPartialObsWrapper(ObservationWrapper):
 
 
 class FullyObsWrapper(ObservationWrapper):
 class FullyObsWrapper(ObservationWrapper):
     """
     """
-    Fully observable gridworld using a compact grid encoding
+    Fully observable gridworld using a compact grid encoding instead of the agent view.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import FullyObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['image'].shape
+        (7, 7, 3)
+        >>> env_obs = FullyObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['image'].shape
+        (11, 11, 3)
     """
     """
 
 
     def __init__(self, env):
     def __init__(self, env):
@@ -251,6 +433,20 @@ class DictObservationSpaceWrapper(ObservationWrapper):
     where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
     where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
 
 
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import DictObservationSpaceWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['mission']
+        'avoid the lava and get to the green goal square'
+        >>> env_obs = DictObservationSpaceWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['mission'][:10]
+        [19, 31, 17, 36, 20, 38, 31, 2, 15, 35]
     """
     """
 
 
     def __init__(self, env, max_words_in_mission=50, word_dict=None):
     def __init__(self, env, max_words_in_mission=50, word_dict=None):
@@ -371,6 +567,17 @@ class FlatObsWrapper(ObservationWrapper):
     and combine these with observed images into one flat array.
     and combine these with observed images into one flat array.
 
 
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import FlatObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> env_obs = FlatObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs.shape
+        (2835,)
     """
     """
 
 
     def __init__(self, env, maxStrLen=96):
     def __init__(self, env, maxStrLen=96):
@@ -432,6 +639,20 @@ class ViewSizeWrapper(Wrapper):
     """
     """
     Wrapper to customize the agent field of view size.
     Wrapper to customize the agent field of view size.
     This cannot be used with fully observable wrappers.
     This cannot be used with fully observable wrappers.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import ViewSizeWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['image'].shape
+        (7, 7, 3)
+        >>> env_obs = ViewSizeWrapper(env, agent_view_size=5)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['image'].shape
+        (5, 5, 3)
     """
     """
 
 
     def __init__(self, env, agent_view_size=7):
     def __init__(self, env, agent_view_size=7):

+ 4 - 1
tests/utils.py

@@ -7,7 +7,10 @@ import numpy as np
 all_testing_env_specs = [
 all_testing_env_specs = [
     env_spec
     env_spec
     for env_spec in gym.envs.registry.values()
     for env_spec in gym.envs.registry.values()
-    if env_spec.entry_point.startswith("minigrid.envs")
+    if (
+        isinstance(env_spec.entry_point, str)
+        and env_spec.entry_point.startswith("minigrid.envs")
+    )
 ]
 ]
 
 
 minigrid_testing_env_specs = [
 minigrid_testing_env_specs = [