2 years ago · e4e4f62280
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -4,3 +4,6 @@ _site
 
				 __pycache__
			
 
				 _build/
			
 
				 .vscode/
			
 
				+
			
 
				+environments/
			
 
				+!environments/index.md
			
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -84,9 +84,9 @@ html_context: Dict[str, Any] = {}
 
				 html_context["conf_py_path"] = "/docs/"
			
 
				 html_context["display_github"] = True
			
 
				 html_context["github_user"] = "Farama-Foundation"
			
 
				-html_context["github_repo"] = "Gymnasium"
			
 
				+html_context["github_repo"] = "Minigrid"
			
 
				 html_context["github_version"] = "master"
			
 
				-html_context["slug"] = "gymnasium"
			
 
				+html_context["slug"] = "minigrid"
			
 
				 
			
 
				 html_static_path = ["_static"]
			
 
				 html_css_files = []
			
--- a/docs/environments/blocked_unlock_pickup.md
+++ b/docs/environments/blocked_unlock_pickup.md
@@ -1,59 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Blocked Unlock Pickup
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Blocked Unlock Pickup
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-The agent has to pick up a box which is placed in another room, behind a
			
 
				-locked door. The door is also blocked by a ball which the agent has to move
			
 
				-before it can unlock the door. Hence, the agent has to learn to move the
			
 
				-ball, pick up the key, open the door and pick up the object in the other
			
 
				-room. This environment can be solved without relying on language.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"pick up the {color} {type}"
			
 
				-
			
 
				-{color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				-"yellow" or "grey".
			
 
				-
			
 
				-{type} is the type of the object. Can be "box" or "key".
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action            |
			
 
				-|-----|--------------|-------------------|
			
 
				-| 0   | left         | Turn left         |
			
 
				-| 1   | right        | Turn right        |
			
 
				-| 2   | forward      | Move forward      |
			
 
				-| 3   | pickup       | Pick up an object |
			
 
				-| 4   | drop         | Unused            |
			
 
				-| 5   | toggle       | Unused            |
			
 
				-| 6   | done         | Unused            |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent picks up the correct box.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-BlockedUnlockPickup-v0`
			
--- a/docs/environments/design.md
+++ b/docs/environments/design.md
@@ -1,39 +0,0 @@
 
				----
			
 
				-layout: "contents"
			
 
				-title: Design
			
 
				-firstpage:
			
 
				----
			
 
				-
			
 
				-# General Structure
			
 
				-
			
 
				-Structure of the world:
			
 
				-- The world is an NxM grid of tiles
			
 
				-- Each tile in the grid world contains zero or one object
			
 
				-  - Cells that do not contain an object have the value `None`
			
 
				-- Each object has an associated discrete color (string)
			
 
				-- Each object has an associated type (string)
			
 
				-  - Provided object types are: wall, floor, lava, door, key, ball, box and goal
			
 
				-- The agent can pick up and carry exactly one object (eg: ball or key)
			
 
				-- To open a locked door, the agent has to be carrying a key matching the door's color
			
 
				-
			
 
				-Actions in the basic environment:
			
 
				-- Turn left
			
 
				-- Turn right
			
 
				-- Move forward
			
 
				-- Pick up an object
			
 
				-- Drop the object being carried
			
 
				-- Toggle (open doors, interact with objects)
			
 
				-- Done (task completed, optional)
			
 
				-
			
 
				-Default tile/observation encoding:
			
 
				-- Each tile is encoded as a 3 dimensional tuple: `(OBJECT_IDX, COLOR_IDX, STATE)` 
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-By default, sparse rewards are given for reaching a green goal tile. A
			
 
				-reward of 1 is given for success, and zero for failure. There is also an
			
 
				-environment-specific time step limit for completing the task.
			
 
				-You can define your own reward function by creating a class derived
			
 
				-from `MiniGridEnv`. Extending the environment with new object types or new actions
			
 
				-should be very easy. If you wish to do this, you should take a look at the
			
 
				-[minigrid/minigrid.py](minigrid/minigrid.py) source file.
			
--- a/docs/environments/dist_shift2.md
+++ b/docs/environments/dist_shift2.md
@@ -1,58 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Dist Shift2
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Dist Shift2
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment is based on one of the DeepMind [AI safety gridworlds]
			
 
				-(https://github.com/deepmind/ai-safety-gridworlds). The agent starts in the
			
 
				-top-left corner and must reach the goal which is in the top-right corner,
			
 
				-but has to avoid stepping into lava on its way. The aim of this environment
			
 
				-is to test an agent's ability to generalize. There are two slightly
			
 
				-different variants of the environment, so that the agent can be trained on
			
 
				-one variant and tested on the other.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"get to the green goal square"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action       |
			
 
				-|-----|--------------|--------------|
			
 
				-| 0   | left         | Turn left    |
			
 
				-| 1   | right        | Turn right   |
			
 
				-| 2   | forward      | Move forward |
			
 
				-| 3   | pickup       | Unused       |
			
 
				-| 4   | drop         | Unused       |
			
 
				-| 5   | toggle       | Unused       |
			
 
				-| 6   | done         | Unused       |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. The agent falls into lava.
			
 
				-3. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-DistShift1-v0`
			
 
				-- `MiniGrid-DistShift2-v0`
			
--- a/docs/environments/door_key.md
+++ b/docs/environments/door_key.md
@@ -1,56 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Door Key
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Door Key
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment has a key that the agent must pick up in order to unlock a
			
 
				-goal and then get to the green goal square. This environment is difficult,
			
 
				-because of the sparse reward, to solve using classical RL algorithms. It is
			
 
				-useful to experiment with curiosity or curriculum learning.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"use the key to open the door and then get to the goal"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action                    |
			
 
				-|-----|--------------|---------------------------|
			
 
				-| 0   | left         | Turn left                 |
			
 
				-| 1   | right        | Turn right                |
			
 
				-| 2   | forward      | Move forward              |
			
 
				-| 3   | pickup       | Pick up an object         |
			
 
				-| 4   | drop         | Unused                    |
			
 
				-| 5   | toggle       | Toggle/activate an object |
			
 
				-| 6   | done         | Unused                    |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-DoorKey-5x5-v0`
			
 
				-- `MiniGrid-DoorKey-6x6-v0`
			
 
				-- `MiniGrid-DoorKey-8x8-v0`
			
 
				-- `MiniGrid-DoorKey-16x16-v0`
			
--- a/docs/environments/dynamic.md
+++ b/docs/environments/dynamic.md
@@ -1,62 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Dynamic
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Dynamic
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment is an empty room with moving obstacles.
			
 
				-The goal of the agent is to reach the green goal square without colliding
			
 
				-with any obstacle. A large penalty is subtracted if the agent collides with
			
 
				-an obstacle and the episode finishes. This environment is useful to test
			
 
				-Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in
			
 
				-Partial Observability.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"get to the green goal square"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action       |
			
 
				-|-----|--------------|--------------|
			
 
				-| 0   | left         | Turn left    |
			
 
				-| 1   | right        | Turn right   |
			
 
				-| 2   | forward      | Move forward |
			
 
				-| 3   | pickup       | Unused       |
			
 
				-| 4   | drop         | Unused       |
			
 
				-| 5   | toggle       | Unused       |
			
 
				-| 6   | done         | Unused       |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure. A '-1' penalty is
			
 
				-subtracted if the agent collides with an obstacle.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. The agent collides with an obstacle.
			
 
				-3. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-Dynamic-Obstacles-5x5-v0`
			
 
				-- `MiniGrid-Dynamic-Obstacles-Random-5x5-v0`
			
 
				-- `MiniGrid-Dynamic-Obstacles-6x6-v0`
			
 
				-- `MiniGrid-Dynamic-Obstacles-Random-6x6-v0`
			
 
				-- `MiniGrid-Dynamic-Obstacles-8x8-v0`
			
 
				-- `MiniGrid-Dynamic-Obstacles-16x16-v0`
			
--- a/docs/environments/empty.md
+++ b/docs/environments/empty.md
@@ -1,62 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Empty
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Empty
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment is an empty room, and the goal of the agent is to reach the
			
 
				-green goal square, which provides a sparse reward. A small penalty is
			
 
				-subtracted for the number of steps to reach the goal. This environment is
			
 
				-useful, with small rooms, to validate that your RL algorithm works
			
 
				-correctly, and with large rooms to experiment with sparse rewards and
			
 
				-exploration. The random variants of the environment have the agent starting
			
 
				-at a random position for each episode, while the regular variants have the
			
 
				-agent always starting in the corner opposite to the goal.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"get to the green goal square"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action       |
			
 
				-|-----|--------------|--------------|
			
 
				-| 0   | left         | Turn left    |
			
 
				-| 1   | right        | Turn right   |
			
 
				-| 2   | forward      | Move forward |
			
 
				-| 3   | pickup       | Unused       |
			
 
				-| 4   | drop         | Unused       |
			
 
				-| 5   | toggle       | Unused       |
			
 
				-| 6   | done         | Unused       |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-Empty-5x5-v0`
			
 
				-- `MiniGrid-Empty-Random-5x5-v0`
			
 
				-- `MiniGrid-Empty-6x6-v0`
			
 
				-- `MiniGrid-Empty-Random-6x6-v0`
			
 
				-- `MiniGrid-Empty-8x8-v0`
			
 
				-- `MiniGrid-Empty-16x16-v0`
			
--- a/docs/environments/fetch.md
+++ b/docs/environments/fetch.md
@@ -1,66 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Fetch
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Fetch
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment has multiple objects of assorted types and colors. The
			
 
				-agent receives a textual string as part of its observation telling it which
			
 
				-object to pick up. Picking up the wrong object terminates the episode with
			
 
				-zero reward.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"{syntax} {color} {type}"
			
 
				-
			
 
				-{syntax} is one of the following: "get a", "go get a", "fetch a",
			
 
				-"go fetch a", "you must fetch a".
			
 
				-
			
 
				-{color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				-"yellow" or "grey".
			
 
				-
			
 
				-{type} is the type of the object. Can be "key" or "ball".
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action               |
			
 
				-|-----|--------------|----------------------|
			
 
				-| 0   | left         | Turn left            |
			
 
				-| 1   | right        | Turn right           |
			
 
				-| 2   | forward      | Move forward         |
			
 
				-| 3   | pickup       | Pick up an object    |
			
 
				-| 4   | drop         | Unused               |
			
 
				-| 5   | toggle       | Unused               |
			
 
				-| 6   | done         | Unused               |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent picks up the correct object.
			
 
				-2. The agent picks up the wrong object.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-N: number of objects to be generated.
			
 
				-
			
 
				-- `MiniGrid-Fetch-5x5-N2-v0`
			
 
				-- `MiniGrid-Fetch-6x6-N2-v0`
			
 
				-- `MiniGrid-Fetch-8x8-N3-v0`
			
--- a/docs/environments/four_rooms.md
+++ b/docs/environments/four_rooms.md
@@ -1,53 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Four Rooms
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Four Rooms
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-Classic four room reinforcement learning environment. The agent must
			
 
				-navigate in a maze composed of four rooms interconnected by 4 gaps in the
			
 
				-walls. To obtain a reward, the agent must reach the green goal square. Both
			
 
				-the agent and the goal square are randomly placed in any of the four rooms.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"reach the goal"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action       |
			
 
				-|-----|--------------|--------------|
			
 
				-| 0   | left         | Turn left    |
			
 
				-| 1   | right        | Turn right   |
			
 
				-| 2   | forward      | Move forward |
			
 
				-| 3   | pickup       | Unused       |
			
 
				-| 4   | drop         | Unused       |
			
 
				-| 5   | toggle       | Unused       |
			
 
				-| 6   | done         | Unused       |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-FourRooms-v0`
			
--- a/docs/environments/go_to_door.md
+++ b/docs/environments/go_to_door.md
@@ -1,59 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Go To Door
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Go To Door
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment is a room with four doors, one on each wall. The agent
			
 
				-receives a textual (mission) string as input, telling it which door to go
			
 
				-to, (eg: "go to the red door"). It receives a positive reward for performing
			
 
				-the `done` action next to the correct door, as indicated in the mission
			
 
				-string.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"go to the {color} door"
			
 
				-
			
 
				-{color} is the color of the door. Can be "red", "green", "blue", "purple",
			
 
				-"yellow" or "grey".
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action               |
			
 
				-|-----|--------------|----------------------|
			
 
				-| 0   | left         | Turn left            |
			
 
				-| 1   | right        | Turn right           |
			
 
				-| 2   | forward      | Move forward         |
			
 
				-| 3   | pickup       | Unused               |
			
 
				-| 4   | drop         | Unused               |
			
 
				-| 5   | toggle       | Unused               |
			
 
				-| 6   | done         | Done completing task |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent stands next the correct door performing the `done` action.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-GoToDoor-5x5-v0`
			
 
				-- `MiniGrid-GoToDoor-6x6-v0`
			
 
				-- `MiniGrid-GoToDoor-8x8-v0`
			
--- a/docs/environments/go_to_object.md
+++ b/docs/environments/go_to_object.md
@@ -1,10 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Go To Object
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Go To Object
			
 
				-
			
 
				-Environment in which the agent is instructed to go to a given object
			
 
				-named using an English text string
			
--- a/docs/environments/key_corridor_s6_r3.md
+++ b/docs/environments/key_corridor_s6_r3.md
@@ -1,69 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Key Corridor S6 R3
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Key Corridor S6 R3
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment is similar to the locked room environment, but there are
			
 
				-multiple registered environment configurations of increasing size,
			
 
				-making it easier to use curriculum learning to train an agent to solve it.
			
 
				-The agent has to pick up an object which is behind a locked door. The key is
			
 
				-hidden in another room, and the agent has to explore the environment to find
			
 
				-it. The mission string does not give the agent any clues as to where the
			
 
				-key is placed. This environment can be solved without relying on language.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"pick up the {color} {obj_type}"
			
 
				-
			
 
				-{color} is the color of the object. Can be "red", "green", "blue", "purple",
			
 
				-"yellow" or "grey".
			
 
				-
			
 
				-{type} is the type of the object. Can be "ball" or "key".
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action            |
			
 
				-|-----|--------------|-------------------|
			
 
				-| 0   | left         | Turn left         |
			
 
				-| 1   | right        | Turn right        |
			
 
				-| 2   | forward      | Move forward      |
			
 
				-| 3   | pickup       | Pick up an object |
			
 
				-| 4   | drop         | Unused            |
			
 
				-| 5   | toggle       | Unused            |
			
 
				-| 6   | done         | Unused            |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent picks up the correct object.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-S: room size.
			
 
				-R: Number of rows.
			
 
				-
			
 
				-- `MiniGrid-KeyCorridorS3R1-v0`
			
 
				-- `MiniGrid-KeyCorridorS3R2-v0`
			
 
				-- `MiniGrid-KeyCorridorS3R3-v0`
			
 
				-- `MiniGrid-KeyCorridorS4R3-v0`
			
 
				-- `MiniGrid-KeyCorridorS5R3-v0`
			
 
				-- `MiniGrid-KeyCorridorS6R3-v0`
			
--- a/docs/environments/lava_gap_s7.md
+++ b/docs/environments/lava_gap_s7.md
@@ -1,60 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Lava Gap S7
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Lava Gap S7
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-The agent has to reach the green goal square at the opposite corner of the
			
 
				-room, and must pass through a narrow gap in a vertical strip of deadly lava.
			
 
				-Touching the lava terminate the episode with a zero reward. This environment
			
 
				-is useful for studying safety and safe exploration.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-Depending on the `obstacle_type` parameter:
			
 
				-- `Lava`: "avoid the lava and get to the green goal square"
			
 
				-- otherwise: "find the opening and get to the green goal square"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action       |
			
 
				-|-----|--------------|--------------|
			
 
				-| 0   | left         | Turn left    |
			
 
				-| 1   | right        | Turn right   |
			
 
				-| 2   | forward      | Move forward |
			
 
				-| 3   | pickup       | Unused       |
			
 
				-| 4   | drop         | Unused       |
			
 
				-| 5   | toggle       | Unused       |
			
 
				-| 6   | done         | Unused       |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. The agent falls into lava.
			
 
				-3. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-S: size of map SxS.
			
 
				-
			
 
				-- `MiniGrid-LavaGapS5-v0`
			
 
				-- `MiniGrid-LavaGapS6-v0`
			
 
				-- `MiniGrid-LavaGapS7-v0`
			
--- a/docs/environments/locked_room.md
+++ b/docs/environments/locked_room.md
@@ -1,57 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Locked Room
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Locked Room
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-The environment has six rooms, one of which is locked. The agent receives
			
 
				-a textual mission string as input, telling it which room to go to in order
			
 
				-to get the key that opens the locked room. It then has to go into the locked
			
 
				-room in order to reach the final goal. This environment is extremely
			
 
				-difficult to solve with vanilla reinforcement learning alone.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
			
 
				-
			
 
				-{lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
			
 
				-"blue", "purple", "yellow" or "grey".
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action                    |
			
 
				-|-----|--------------|---------------------------|
			
 
				-| 0   | left         | Turn left                 |
			
 
				-| 1   | right        | Turn right                |
			
 
				-| 2   | forward      | Move forward              |
			
 
				-| 3   | pickup       | Pick up an object         |
			
 
				-| 4   | drop         | Unused                    |
			
 
				-| 5   | toggle       | Toggle/activate an object |
			
 
				-| 6   | done         | Unused                    |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-LockedRoom-v0`
			
--- a/docs/environments/memory_s7.md
+++ b/docs/environments/memory_s7.md
@@ -1,60 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Memory S7
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Memory S7
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment is a memory test. The agent starts in a small room where it
			
 
				-sees an object. It then has to go through a narrow hallway which ends in a
			
 
				-split. At each end of the split there is an object, one of which is the same
			
 
				-as the object in the starting room. The agent has to remember the initial
			
 
				-object, and go to the matching object at split.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"go to the matching object at the end of the hallway"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action                    |
			
 
				-|-----|--------------|---------------------------|
			
 
				-| 0   | left         | Turn left                 |
			
 
				-| 1   | right        | Turn right                |
			
 
				-| 2   | forward      | Move forward              |
			
 
				-| 3   | pickup       | Pick up an object         |
			
 
				-| 4   | drop         | Unused                    |
			
 
				-| 5   | toggle       | Toggle/activate an object |
			
 
				-| 6   | done         | Unused                    |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the correct matching object.
			
 
				-2. The agent reaches the wrong matching object.
			
 
				-3. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-S: size of map SxS.
			
 
				-
			
 
				-- `MiniGrid-MemoryS17Random-v0`
			
 
				-- `MiniGrid-MemoryS13Random-v0`
			
 
				-- `MiniGrid-MemoryS13-v0`
			
 
				-- `MiniGrid-MemoryS11-v0`
			
--- a/docs/environments/multi_room.md
+++ b/docs/environments/multi_room.md
@@ -1,59 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Multi Room
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Multi Room
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-This environment has a series of connected rooms with doors that must be
			
 
				-opened in order to get to the next room. The final room has the green goal
			
 
				-square the agent must get to. This environment is extremely difficult to
			
 
				-solve using RL alone. However, by gradually increasing the number of rooms
			
 
				-and building a curriculum, the environment can be solved.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"traverse the rooms to get to the goal"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action                    |
			
 
				-|-----|--------------|---------------------------|
			
 
				-| 0   | left         | Turn left                 |
			
 
				-| 1   | right        | Turn right                |
			
 
				-| 2   | forward      | Move forward              |
			
 
				-| 3   | pickup       | Unused                    |
			
 
				-| 4   | drop         | Unused                    |
			
 
				-| 5   | toggle       | Toggle/activate an object |
			
 
				-| 6   | done         | Unused                    |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-S: size of map SxS.
			
 
				-N: number of rooms.
			
 
				-
			
 
				-- `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
			
 
				-- `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
			
 
				-- `MiniGrid-MultiRoom-N6-v0` (six rooms)
			
--- a/docs/environments/obstructed_maze.md
+++ b/docs/environments/obstructed_maze.md
@@ -1,11 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Obstructed Maze
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Obstructed Maze
			
 
				-
			
 
				-A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
			
 
				-are locked, doors are obstructed by a ball and keys are hidden in
			
 
				-boxes.
			
--- a/docs/environments/playground.md
+++ b/docs/environments/playground.md
@@ -1,10 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Playground
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Playground
			
 
				-
			
 
				-Environment with multiple rooms and random objects.
			
 
				-This environment has no specific goals or rewards.
			
--- a/docs/environments/put_near.md
+++ b/docs/environments/put_near.md
@@ -1,62 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Put Near
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Put Near
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-The agent is instructed through a textual string to pick up an object and
			
 
				-place it next to another object. This environment is easy to solve with two
			
 
				-objects, but difficult to solve with more, as it involves both textual
			
 
				-understanding and spatial reasoning involving multiple objects.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"put the {move_color} {move_type} near the {target_color} {target_type}"
			
 
				-
			
 
				-{move_color} and {target_color} can be "red", "green", "blue", "purple",
			
 
				-"yellow" or "grey".
			
 
				-
			
 
				-{move_type} and {target_type} Can be "box", "ball" or "key".
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action            |
			
 
				-|-----|--------------|-------------------|
			
 
				-| 0   | left         | Turn left         |
			
 
				-| 1   | right        | Turn right        |
			
 
				-| 2   | forward      | Move forward      |
			
 
				-| 3   | pickup       | Pick up an object |
			
 
				-| 4   | drop         | Drop an object    |
			
 
				-| 5   | toggle       | Unused            |
			
 
				-| 6   | done         | Unused            |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent picks up the wrong object.
			
 
				-2. The agent drop the correct object near the target.
			
 
				-3. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-N: number of objects.
			
 
				-
			
 
				-- `MiniGrid-PutNear-6x6-N2-v0`
			
 
				-- `MiniGrid-PutNear-8x8-N3-v0`
			
--- a/docs/environments/red_blue_doors.md
+++ b/docs/environments/red_blue_doors.md
@@ -1,55 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Red Blue Doors
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Red Blue Doors
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-The agent is randomly placed within a room with one red and one blue door
			
 
				-facing opposite directions. The agent has to open the red door and then open
			
 
				-the blue door, in that order. Note that, surprisingly, this environment is
			
 
				-solvable without memory.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"open the red door then the blue door"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action                    |
			
 
				-|-----|--------------|---------------------------|
			
 
				-| 0   | left         | Turn left                 |
			
 
				-| 1   | right        | Turn right                |
			
 
				-| 2   | forward      | Move forward              |
			
 
				-| 3   | pickup       | Unused                    |
			
 
				-| 4   | drop         | Unused                    |
			
 
				-| 5   | toggle       | Toggle/activate an object |
			
 
				-| 6   | done         | Unused                    |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent opens the blue door having already opened the red door.
			
 
				-2. The agent opens the blue door without having opened the red door yet.
			
 
				-3. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-RedBlueDoors-6x6-v0`
			
 
				-- `MiniGrid-RedBlueDoors-8x8-v0`
			
--- a/docs/environments/simple_crossing_s11_n5.md
+++ b/docs/environments/simple_crossing_s11_n5.md
@@ -1,76 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Simple Crossing S11 N5
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Simple Crossing S11 N5
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-Depending on the `obstacle_type` parameter:
			
 
				-- `Lava` - The agent has to reach the green goal square on the other corner
			
 
				-    of the room while avoiding rivers of deadly lava which terminate the
			
 
				-    episode in failure. Each lava stream runs across the room either
			
 
				-    horizontally or vertically, and has a single crossing point which can be
			
 
				-    safely used; Luckily, a path to the goal is guaranteed to exist. This
			
 
				-    environment is useful for studying safety and safe exploration.
			
 
				-- otherwise - Similar to the `LavaCrossing` environment, the agent has to
			
 
				-    reach the green goal square on the other corner of the room, however
			
 
				-    lava is replaced by walls. This MDP is therefore much easier and maybe
			
 
				-    useful for quickly testing your algorithms.
			
 
				-
			
 
				-### Mission Space
			
 
				-Depending on the `obstacle_type` parameter:
			
 
				-- `Lava` - "avoid the lava and get to the green goal square"
			
 
				-- otherwise - "find the opening and get to the green goal square"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action       |
			
 
				-|-----|--------------|--------------|
			
 
				-| 0   | left         | Turn left    |
			
 
				-| 1   | right        | Turn right   |
			
 
				-| 2   | forward      | Move forward |
			
 
				-| 3   | pickup       | Unused       |
			
 
				-| 4   | drop         | Unused       |
			
 
				-| 5   | toggle       | Unused       |
			
 
				-| 6   | done         | Unused       |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent reaches the goal.
			
 
				-2. The agent falls into lava.
			
 
				-3. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-S: size of the map SxS.
			
 
				-N: number of valid crossings across lava or walls from the starting position
			
 
				-to the goal
			
 
				-
			
 
				-- `Lava` :
			
 
				-    - `MiniGrid-LavaCrossingS9N1-v0`
			
 
				-    - `MiniGrid-LavaCrossingS9N2-v0`
			
 
				-    - `MiniGrid-LavaCrossingS9N3-v0`
			
 
				-    - `MiniGrid-LavaCrossingS11N5-v0`
			
 
				-
			
 
				-- otherwise :
			
 
				-    - `MiniGrid-SimpleCrossingS9N1-v0`
			
 
				-    - `MiniGrid-SimpleCrossingS9N2-v0`
			
 
				-    - `MiniGrid-SimpleCrossingS9N3-v0`
			
 
				-    - `MiniGrid-SimpleCrossingS11N5-v0`
			
--- a/docs/environments/unlock.md
+++ b/docs/environments/unlock.md
@@ -1,51 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Unlock
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Unlock
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-The agent has to open a locked door. This environment can be solved without
			
 
				-relying on language.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"open the door"
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action                    |
			
 
				-|-----|--------------|---------------------------|
			
 
				-| 0   | left         | Turn left                 |
			
 
				-| 1   | right        | Turn right                |
			
 
				-| 2   | forward      | Move forward              |
			
 
				-| 3   | pickup       | Unused                    |
			
 
				-| 4   | drop         | Unused                    |
			
 
				-| 5   | toggle       | Toggle/activate an object |
			
 
				-| 6   | done         | Unused                    |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent opens the door.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-Unlock-v0`
			
--- a/docs/environments/unlock_pickup.md
+++ b/docs/environments/unlock_pickup.md
@@ -1,54 +0,0 @@
 
				----
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				-title: Unlock Pickup
			
 
				----
			
 
				-
			
 
				-
			
 
				-# Unlock Pickup
			
 
				-
			
 
				-### Description
			
 
				-
			
 
				-The agent has to pick up a box which is placed in another room, behind a
			
 
				-locked door. This environment can be solved without relying on language.
			
 
				-
			
 
				-### Mission Space
			
 
				-
			
 
				-"pick up the {color} box"
			
 
				-
			
 
				-{color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				-"yellow" or "grey".
			
 
				-
			
 
				-### Action Space
			
 
				-
			
 
				-| Num | Name         | Action                    |
			
 
				-|-----|--------------|---------------------------|
			
 
				-| 0   | left         | Turn left                 |
			
 
				-| 1   | right        | Turn right                |
			
 
				-| 2   | forward      | Move forward              |
			
 
				-| 3   | pickup       | Pick up an object         |
			
 
				-| 4   | drop         | Unused                    |
			
 
				-| 5   | toggle       | Toggle/activate an object |
			
 
				-| 6   | done         | Unused                    |
			
 
				-
			
 
				-### Observation Encoding
			
 
				-
			
 
				-- Each tile is encoded as a 3 dimensional tuple:
			
 
				-    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				-- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				-    [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				-- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				-
			
 
				-### Rewards
			
 
				-
			
 
				-A reward of '1' is given for success, and '0' for failure.
			
 
				-
			
 
				-### Termination
			
 
				-
			
 
				-The episode ends if any one of the following conditions is met:
			
 
				-
			
 
				-1. The agent picks up the correct box.
			
 
				-2. Timeout (see `max_steps`).
			
 
				-
			
 
				-### Registered Configurations
			
 
				-
			
 
				-- `MiniGrid-Unlock-v0`
			
--- a/docs/scripts/gen_mds.py
+++ b/docs/scripts/gen_mds.py
@@ -58,7 +58,7 @@ for env_name, env_spec in filtered_envs.items():
 
				     )
			
 
				 
			
 
				     front_matter = f"""---
			
 
				-AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+autogenerated:
			
 
				 title: {title_env_name}
			
 
				 ---
			
 
				 """