2 yıl önce · 72873a133c
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@@ -24,9 +24,12 @@ jobs:
 
				       - name: Build Envs Docs
			
 
				         run: python docs/scripts/gen_mds.py
			
 
				 
			
 
				+      - name: Build Envs Display
			
 
				+        run: python docs/scripts/gen_envs_display.py
			
 
				+
			
 
				       - name: Build
			
 
				         run: sphinx-build -b dirhtml -v docs _build
			
 
				-        
			
 
				+
			
 
				       - name: Move 404
			
 
				         run: mv _build/404/index.html _build/404.html
			
 
				 
			
@@ -40,5 +43,5 @@ jobs:
 
				         uses: JamesIves/github-pages-deploy-action@v4
			
 
				         with:
			
 
				           folder: _build
			
 
				-          clean-exclude: | 
			
 
				+          clean-exclude: |
			
 
				             *.*.*/
			
--- a/.gitignore
+++ b/.gitignore
@@ -15,9 +15,8 @@ _site
 
				 .jekyll-cache
			
 
				 __pycache__
			
 
				 .vscode/
			
 
				-/docs/environments/*.md
			
 
				-!docs/environments/index.md
			
 
				-!docs/environments/babyAI_index.md
			
 
				+docs/environments/**/*.*
			
 
				+!docs/environments/**/index.md
			
 
				 
			
 
				 # Virtual environments
			
 
				 .env
			
--- a/docs/_static/videos/babyai/ActionObjDoor.gif
+++ b/docs/_static/videos/babyai/ActionObjDoor.gif
--- a/docs/_static/videos/babyai/BlockedUnlockPickup.gif
+++ b/docs/_static/videos/babyai/BlockedUnlockPickup.gif
--- a/docs/_static/videos/babyai/BossLevel.gif
+++ b/docs/_static/videos/babyai/BossLevel.gif
--- a/docs/_static/videos/babyai/BossLevelNoUnlock.gif
+++ b/docs/_static/videos/babyai/BossLevelNoUnlock.gif
--- a/docs/_static/videos/babyai/FindObjS5.gif
+++ b/docs/_static/videos/babyai/FindObjS5.gif
--- a/docs/_static/videos/babyai/GoTo.gif
+++ b/docs/_static/videos/babyai/GoTo.gif
--- a/docs/_static/videos/babyai/GoToDoor.gif
+++ b/docs/_static/videos/babyai/GoToDoor.gif
--- a/docs/_static/videos/babyai/GoToImpUnlock.gif
+++ b/docs/_static/videos/babyai/GoToImpUnlock.gif
--- a/docs/_static/videos/babyai/GoToLocal.gif
+++ b/docs/_static/videos/babyai/GoToLocal.gif
--- a/docs/_static/videos/babyai/GoToObj.gif
+++ b/docs/_static/videos/babyai/GoToObj.gif
--- a/docs/_static/videos/babyai/GoToObjDoor.gif
+++ b/docs/_static/videos/babyai/GoToObjDoor.gif
--- a/docs/_static/videos/babyai/GoToRedBall.gif
+++ b/docs/_static/videos/babyai/GoToRedBall.gif
--- a/docs/_static/videos/babyai/GoToRedBallGrey.gif
+++ b/docs/_static/videos/babyai/GoToRedBallGrey.gif
--- a/docs/_static/videos/babyai/GoToRedBallNoDists.gif
+++ b/docs/_static/videos/babyai/GoToRedBallNoDists.gif
--- a/docs/_static/videos/babyai/GoToRedBlueBall.gif
+++ b/docs/_static/videos/babyai/GoToRedBlueBall.gif
--- a/docs/_static/videos/babyai/GoToSeq.gif
+++ b/docs/_static/videos/babyai/GoToSeq.gif
--- a/docs/_static/videos/babyai/KeyCorridor.gif
+++ b/docs/_static/videos/babyai/KeyCorridor.gif
--- a/docs/_static/videos/babyai/KeyInBox.gif
+++ b/docs/_static/videos/babyai/KeyInBox.gif
--- a/docs/_static/videos/babyai/MiniBossLevel.gif
+++ b/docs/_static/videos/babyai/MiniBossLevel.gif
--- a/docs/_static/videos/babyai/MoveTwoAcross.gif
+++ b/docs/_static/videos/babyai/MoveTwoAcross.gif
--- a/docs/_static/videos/babyai/OneRoomS8.gif
+++ b/docs/_static/videos/babyai/OneRoomS8.gif
--- a/docs/_static/videos/babyai/Open.gif
+++ b/docs/_static/videos/babyai/Open.gif
--- a/docs/_static/videos/babyai/OpenDoor.gif
+++ b/docs/_static/videos/babyai/OpenDoor.gif
--- a/docs/_static/videos/babyai/OpenDoorsOrder.gif
+++ b/docs/_static/videos/babyai/OpenDoorsOrder.gif
--- a/docs/_static/videos/babyai/OpenRedDoor.gif
+++ b/docs/_static/videos/babyai/OpenRedDoor.gif
--- a/docs/_static/videos/babyai/OpenTwoDoors.gif
+++ b/docs/_static/videos/babyai/OpenTwoDoors.gif
--- a/docs/_static/videos/babyai/Pickup.gif
+++ b/docs/_static/videos/babyai/Pickup.gif
--- a/docs/_static/videos/babyai/PickupAbove.gif
+++ b/docs/_static/videos/babyai/PickupAbove.gif
--- a/docs/_static/videos/babyai/PickupDist.gif
+++ b/docs/_static/videos/babyai/PickupDist.gif
--- a/docs/_static/videos/babyai/PickupLoc.gif
+++ b/docs/_static/videos/babyai/PickupLoc.gif
--- a/docs/_static/videos/babyai/PutNext.gif
+++ b/docs/_static/videos/babyai/PutNext.gif
--- a/docs/_static/videos/babyai/PutNextLocal.gif
+++ b/docs/_static/videos/babyai/PutNextLocal.gif
--- a/docs/_static/videos/babyai/Synth.gif
+++ b/docs/_static/videos/babyai/Synth.gif
--- a/docs/_static/videos/babyai/SynthLoc.gif
+++ b/docs/_static/videos/babyai/SynthLoc.gif
--- a/docs/_static/videos/babyai/SynthSeq.gif
+++ b/docs/_static/videos/babyai/SynthSeq.gif
--- a/docs/_static/videos/babyai/UnblockPickup.gif
+++ b/docs/_static/videos/babyai/UnblockPickup.gif
--- a/docs/_static/videos/babyai/Unlock.gif
+++ b/docs/_static/videos/babyai/Unlock.gif
--- a/docs/_static/videos/babyai/UnlockLocal.gif
+++ b/docs/_static/videos/babyai/UnlockLocal.gif
--- a/docs/_static/videos/babyai/UnlockPickup.gif
+++ b/docs/_static/videos/babyai/UnlockPickup.gif
--- a/docs/_static/videos/babyai/UnlockToUnlock.gif
+++ b/docs/_static/videos/babyai/UnlockToUnlock.gif
--- a/docs/_static/videos/minigrid/BlockedUnlockPickupEnv.gif
+++ b/docs/_static/videos/minigrid/BlockedUnlockPickupEnv.gif
--- a/docs/_static/videos/minigrid/CrossingEnv.gif
+++ b/docs/_static/videos/minigrid/CrossingEnv.gif
--- a/docs/_static/videos/minigrid/DistShiftEnv.gif
+++ b/docs/_static/videos/minigrid/DistShiftEnv.gif
--- a/docs/_static/videos/minigrid/DoorKeyEnv.gif
+++ b/docs/_static/videos/minigrid/DoorKeyEnv.gif
--- a/docs/_static/videos/minigrid/DynamicObstaclesEnv.gif
+++ b/docs/_static/videos/minigrid/DynamicObstaclesEnv.gif
--- a/docs/_static/videos/minigrid/EmptyEnv.gif
+++ b/docs/_static/videos/minigrid/EmptyEnv.gif
--- a/docs/_static/videos/minigrid/FetchEnv.gif
+++ b/docs/_static/videos/minigrid/FetchEnv.gif
--- a/docs/_static/videos/minigrid/FourRoomsEnv.gif
+++ b/docs/_static/videos/minigrid/FourRoomsEnv.gif
--- a/docs/_static/videos/minigrid/GoToDoorEnv.gif
+++ b/docs/_static/videos/minigrid/GoToDoorEnv.gif
--- a/docs/_static/videos/minigrid/GoToObjectEnv.gif
+++ b/docs/_static/videos/minigrid/GoToObjectEnv.gif
--- a/docs/_static/videos/minigrid/KeyCorridorEnv.gif
+++ b/docs/_static/videos/minigrid/KeyCorridorEnv.gif
--- a/docs/_static/videos/minigrid/LavaGapEnv.gif
+++ b/docs/_static/videos/minigrid/LavaGapEnv.gif
--- a/docs/_static/videos/minigrid/LockedRoomEnv.gif
+++ b/docs/_static/videos/minigrid/LockedRoomEnv.gif
--- a/docs/_static/videos/minigrid/MemoryEnv.gif
+++ b/docs/_static/videos/minigrid/MemoryEnv.gif
--- a/docs/_static/videos/minigrid/MultiRoomEnv.gif
+++ b/docs/_static/videos/minigrid/MultiRoomEnv.gif
--- a/docs/_static/videos/minigrid/ObstructedMaze_1Dlhb.gif
+++ b/docs/_static/videos/minigrid/ObstructedMaze_1Dlhb.gif
--- a/docs/_static/videos/minigrid/ObstructedMaze_Full.gif
+++ b/docs/_static/videos/minigrid/ObstructedMaze_Full.gif
--- a/docs/_static/videos/minigrid/PlaygroundEnv.gif
+++ b/docs/_static/videos/minigrid/PlaygroundEnv.gif
--- a/docs/_static/videos/minigrid/PutNearEnv.gif
+++ b/docs/_static/videos/minigrid/PutNearEnv.gif
--- a/docs/_static/videos/minigrid/RedBlueDoorEnv.gif
+++ b/docs/_static/videos/minigrid/RedBlueDoorEnv.gif
--- a/docs/_static/videos/minigrid/UnlockEnv.gif
+++ b/docs/_static/videos/minigrid/UnlockEnv.gif
--- a/docs/_static/videos/minigrid/UnlockPickupEnv.gif
+++ b/docs/_static/videos/minigrid/UnlockPickupEnv.gif
--- a/docs/environments/babyAI_index.md
+++ b/docs/environments/babyAI_index.md
@@ -1,51 +0,0 @@
 
				----
			
 
				-firstpage:
			
 
				-lastpage:
			
 
				----
			
 
				-
			
 
				-## BabyAI Environments
			
 
				-
			
 
				-```{toctree}
			
 
				-:hidden:
			
 
				-:caption: BabyAI Environments
			
 
				-babyai_action_obj_door
			
 
				-babyai_blocked_unlock_pickup
			
 
				-babyai_boss_level_no_unlock
			
 
				-babyai_boss_level
			
 
				-babyai_find_obj
			
 
				-babyai_go_to_door
			
 
				-babyai_go_to_imp_unlock
			
 
				-babyai_go_to_local
			
 
				-babyai_go_to_obj_door
			
 
				-babyai_go_to_obj_maze
			
 
				-babyai_go_to_obj
			
 
				-babyai_go_to_red_ball_grey
			
 
				-babyai_go_to_red_ball_no_dists
			
 
				-babyai_go_to_red_ball
			
 
				-babyai_go_to_red_blue_ball
			
 
				-babyai_go_to_seq
			
 
				-babyai_key_corridor
			
 
				-babyai_key_in_box
			
 
				-babyai_mini_boss_level
			
 
				-babyai_move_two_across
			
 
				-babyai_one_room
			
 
				-babyai_open_door_loc
			
 
				-babyai_open_doors_order
			
 
				-babyai_open_red_blue_doors_debug
			
 
				-babyai_open
			
 
				-babyai_pickup_above
			
 
				-babyai_pickup_dist_debug
			
 
				-babyai_pickup_loc
			
 
				-babyai_pickup
			
 
				-babyai_put_next_local
			
 
				-babyai_put_next
			
 
				-babyai_synth_loc
			
 
				-babyai_synth_seq
			
 
				-babyai_synth
			
 
				-babyai_unblock_pickup
			
 
				-babyai_unlock_local_dist
			
 
				-babyai_unlock_pickup_dist
			
 
				-babyai_unlock_to_unlock
			
 
				-babyai_unlock
			
 
				-
			
 
				-```
			
--- a/docs/environments/babyai/index.md
+++ b/docs/environments/babyai/index.md
@@ -0,0 +1,57 @@
 
				+---
			
 
				+firstpage:
			
 
				+lastpage:
			
 
				+---
			
 
				+
			
 
				+## BabyAI Environments
			
 
				+
			
 
				+```{raw} html
			
 
				+   :file: list.html
			
 
				+```
			
 
				+
			
 
				+```{toctree}
			
 
				+:hidden:
			
 
				+:caption: BabyAI Environments
			
 
				+
			
 
				+GoToRedBallGrey
			
 
				+GoToRedBall
			
 
				+GoToRedBallNoDists
			
 
				+GoToObj
			
 
				+GoToLocal
			
 
				+GoTo
			
 
				+GoToImpUnlock
			
 
				+GoToSeq
			
 
				+GoToRedBlueBall
			
 
				+GoToDoor
			
 
				+GoToObjDoor
			
 
				+Open
			
 
				+OpenRedDoor
			
 
				+OpenDoor
			
 
				+OpenTwoDoors
			
 
				+OpenDoorsOrder
			
 
				+Pickup
			
 
				+UnblockPickup
			
 
				+PickupLoc
			
 
				+PickupDist
			
 
				+PickupAbove
			
 
				+PutNextLocal
			
 
				+PutNext
			
 
				+Unlock
			
 
				+UnlockLocal
			
 
				+KeyInBox
			
 
				+UnlockPickup
			
 
				+BlockedUnlockPickup
			
 
				+UnlockToUnlock
			
 
				+ActionObjDoor
			
 
				+FindObjS5
			
 
				+KeyCorridor
			
 
				+OneRoomS8
			
 
				+MoveTwoAcross
			
 
				+Synth
			
 
				+SynthLoc
			
 
				+SynthSeq
			
 
				+MiniBossLevel
			
 
				+BossLevel
			
 
				+BossLevelNoUnlock
			
 
				+
			
 
				+```
			
--- a/docs/environments/index.md
+++ b/docs/environments/index.md
@@ -10,29 +10,35 @@ Each environment provides one or more configurations registered with OpenAI gym.
 
				 is also programmatically tunable in terms of size/complexity, which is useful for curriculum learning
			
 
				 or to fine-tune difficulty.
			
 
				 
			
 
				+```{raw} html
			
 
				+   :file: list.html
			
 
				+```
			
 
				+
			
 
				 ```{toctree}
			
 
				 :hidden:
			
 
				 :caption: Minigrid Environments
			
 
				-blocked_unlock_pickup
			
 
				-dist_shift
			
 
				-door_key
			
 
				-dynamic
			
 
				-empty
			
 
				-fetch
			
 
				-four_rooms
			
 
				-go_to_door
			
 
				-go_to_object
			
 
				-key_corridor
			
 
				-lava_gap
			
 
				-locked_room
			
 
				-memory
			
 
				-multi_room
			
 
				-obstructed_maze
			
 
				-playground
			
 
				-put_near
			
 
				-red_blue_doors
			
 
				-simple_crossing
			
 
				-unlock_pickup
			
 
				-unlock
			
 
				+
			
 
				+BlockedUnlockPickupEnv
			
 
				+CrossingEnv
			
 
				+DistShiftEnv
			
 
				+DoorKeyEnv
			
 
				+DynamicObstaclesEnv
			
 
				+EmptyEnv
			
 
				+FetchEnv
			
 
				+FourRoomsEnv
			
 
				+GoToDoorEnv
			
 
				+GoToObjectEnv
			
 
				+KeyCorridorEnv
			
 
				+LavaGapEnv
			
 
				+LockedRoomEnv
			
 
				+MemoryEnv
			
 
				+MultiRoomEnv
			
 
				+ObstructedMaze_1Dlhb
			
 
				+ObstructedMaze_Full
			
 
				+PlaygroundEnv
			
 
				+PutNearEnv
			
 
				+RedBlueDoorEnv
			
 
				+UnlockEnv
			
 
				+UnlockPickupEnv
			
 
				 
			
 
				 ```
			
--- a/docs/index.md
+++ b/docs/index.md
@@ -5,7 +5,7 @@ lastpage:
 
				 ---
			
 
				 ## Minigrid contains simple and easily configurable grid world environments to conduct Reinforcement Learning research.
			
 
				 
			
 
				-[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) 
			
 
				+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/)
			
 
				 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
			
 
				 
			
 
				 This library contains a collection of 2D grid-world environments with goal-oriented tasks. The agent in these environments is a triangle-like agent with a discrete action space. The tasks involve solving different maze maps and interacting with different objects such as doors, keys, or boxes.  The design of the library is meant to be simple, fast, and easily customizable.
			
@@ -48,9 +48,8 @@ api/wrapper
 
				 :hidden:
			
 
				 :caption: Environments
			
 
				 
			
 
				-environments/design
			
 
				-environments/index
			
 
				-environments/babyAI_index
			
 
				+environments/minigrid/index
			
 
				+environments/babyai/index
			
 
				 ```
			
 
				 
			
 
				 ```{toctree}
			
--- a/docs/scripts/gen_envs_display.py
+++ b/docs/scripts/gen_envs_display.py
@@ -0,0 +1,83 @@
 
				+import os
			
 
				+
			
 
				+import gymnasium
			
 
				+
			
 
				+
			
 
				+def create_grid_cell(type_id, env_id, base_path):
			
 
				+    return f"""
			
 
				+            <a href="{base_path}{env_id}">
			
 
				+                <div class="env-grid__cell">
			
 
				+                    <div class="cell__image-container">
			
 
				+                        <img src="/_static/videos/{type_id}/{env_id}.gif">
			
 
				+                    </div>
			
 
				+                    <div class="cell__title">
			
 
				+                        <span>{' '.join(env_id.split('_')).title()}</span>
			
 
				+                    </div>
			
 
				+                </div>
			
 
				+            </a>
			
 
				+    """
			
 
				+
			
 
				+
			
 
				+def generate_page(env, limit=-1, base_path=""):
			
 
				+    env_type_id = env["id"]
			
 
				+    env_list = env["list"]
			
 
				+    cells = [create_grid_cell(env_type_id, env_id, base_path) for env_id in env_list]
			
 
				+    non_limited_page = limit == -1 or limit >= len(cells)
			
 
				+    if non_limited_page:
			
 
				+        cells = "\n".join(cells)
			
 
				+    else:
			
 
				+        cells = "\n".join(cells[:limit])
			
 
				+
			
 
				+    more_btn = (
			
 
				+        """
			
 
				+<a href="./complete_list">
			
 
				+    <button class="more-btn">
			
 
				+        See More Environments
			
 
				+    </button>
			
 
				+</a>
			
 
				+"""
			
 
				+        if not non_limited_page
			
 
				+        else ""
			
 
				+    )
			
 
				+    return f"""
			
 
				+<div class="env-grid">
			
 
				+    {cells}
			
 
				+</div>
			
 
				+{more_btn}
			
 
				+    """
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    """
			
 
				+    python gen_envs_display
			
 
				+    """
			
 
				+    type_dict = {}
			
 
				+
			
 
				+    for env_spec in gymnasium.envs.registry.values():
			
 
				+        # minigrid.envs:Env or minigrid.envs.babyai:Env
			
 
				+        split = env_spec.entry_point.split(".")
			
 
				+        # ignore minigrid.envs.env_type:Env
			
 
				+        env_module = split[0]
			
 
				+        env_name = split[-1].split(":")[-1]
			
 
				+        env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
			
 
				+
			
 
				+        if env_module == "minigrid":
			
 
				+            if env_type not in type_dict.keys():
			
 
				+                type_dict[env_type] = []
			
 
				+
			
 
				+            if env_name not in type_dict[env_type]:
			
 
				+                type_dict[env_type].append(env_name)
			
 
				+
			
 
				+    for key, value in type_dict.items():
			
 
				+        env_type = key
			
 
				+
			
 
				+        page = generate_page({"id": key, "list": value})
			
 
				+        fp = open(
			
 
				+            os.path.join(
			
 
				+                os.path.dirname(__file__), "..", "environments", env_type, "list.html"
			
 
				+            ),
			
 
				+            "w",
			
 
				+            encoding="utf-8",
			
 
				+        )
			
 
				+        fp.write(page)
			
 
				+        fp.close()
			
--- a/docs/scripts/gen_gifs.py
+++ b/docs/scripts/gen_gifs.py
@@ -0,0 +1,78 @@
 
				+import os
			
 
				+import re
			
 
				+
			
 
				+import gymnasium
			
 
				+from PIL import Image
			
 
				+from tqdm import tqdm
			
 
				+
			
 
				+# snake to camel case: https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case # noqa: E501
			
 
				+pattern = re.compile(r"(?<!^)(?=[A-Z])")
			
 
				+
			
 
				+# how many steps to record an env for
			
 
				+LENGTH = 300
			
 
				+
			
 
				+output_dir = os.path.join(os.path.dirname(__file__), "..", "_static", "videos")
			
 
				+os.makedirs(output_dir, exist_ok=True)
			
 
				+
			
 
				+# Some environments have multiple versions
			
 
				+# For example, KeyCorridorEnv -> KeyCorridorS3R1, KeyCorridorS3R2, KeyCorridorS3R3, etc
			
 
				+# We only want one as an example
			
 
				+envs_completed = []
			
 
				+
			
 
				+# iterate through all envspecs
			
 
				+for env_spec in tqdm(gymnasium.envs.registry.values()):
			
 
				+    # minigrid.envs:Env or minigrid.envs.babyai:Env
			
 
				+    split = env_spec.entry_point.split(".")
			
 
				+    # ignore minigrid.envs.env_type:Env
			
 
				+    env_module = split[0]
			
 
				+    env_name = split[-1].split(":")[-1]
			
 
				+    env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
			
 
				+
			
 
				+    if env_module == "minigrid" and env_name not in envs_completed:
			
 
				+        os.makedirs(os.path.join(output_dir, env_type), exist_ok=True)
			
 
				+        path = os.path.join(output_dir, env_type, env_name + ".gif")
			
 
				+        envs_completed.append(env_name)
			
 
				+
			
 
				+        # try catch in case missing some installs
			
 
				+        try:
			
 
				+            env = gymnasium.make(env_spec.id, render_mode="rgb_array")
			
 
				+            # the gymnasium needs to be rgb renderable
			
 
				+            if not ("rgb_array" in env.metadata["render_modes"]):
			
 
				+                continue
			
 
				+
			
 
				+            # obtain and save LENGTH frames worth of steps
			
 
				+            frames = []
			
 
				+            t = 0
			
 
				+            while True:
			
 
				+                state, info = env.reset()
			
 
				+                terminated, truncated = False, False
			
 
				+                while not (terminated or truncated) and len(frames) <= LENGTH:
			
 
				+
			
 
				+                    frame = env.render()
			
 
				+                    frames.append(Image.fromarray(frame))
			
 
				+                    action = env.action_space.sample()
			
 
				+
			
 
				+                    # Avoid to much movement
			
 
				+                    if t % 10 == 0:
			
 
				+                        state_next, reward, terminated, truncated, info = env.step(
			
 
				+                            action
			
 
				+                        )
			
 
				+                    t += 1
			
 
				+
			
 
				+                if len(frames) > LENGTH:
			
 
				+                    break
			
 
				+
			
 
				+            env.close()
			
 
				+
			
 
				+            frames[0].save(
			
 
				+                path,
			
 
				+                save_all=True,
			
 
				+                append_images=frames[1:],
			
 
				+                duration=50,
			
 
				+                loop=0,
			
 
				+            )
			
 
				+            print("Saved: " + env_name)
			
 
				+
			
 
				+        except BaseException as e:
			
 
				+            print("ERROR", e)
			
 
				+            continue
			
--- a/docs/scripts/gen_mds.py
+++ b/docs/scripts/gen_mds.py
@@ -13,6 +13,8 @@ from tqdm import tqdm
 
				 from utils import trim
			
 
				 from itertools import chain
			
 
				 
			
 
				+from utils import env_name_format
			
 
				+
			
 
				 readme_path = os.path.join(
			
 
				     os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
			
 
				     "README.md",
			
@@ -68,36 +70,44 @@ for env_name, env_spec in chain(filtered_envs.items(), filtered_babyai_envs.item
 
				 
			
 
				     docstring = trim(made.unwrapped.__doc__)
			
 
				 
			
 
				-    print(env_name)
			
 
				-    pascal_env_name = env_spec.id.split("-")[1]
			
 
				-    print(pascal_env_name)
			
 
				-    # remove suffix
			
 
				-    p = re.compile(r"([A-Z][a-z]+)*")
			
 
				-    name = p.search(pascal_env_name).group()
			
 
				-
			
 
				-    snake_env_name = pattern.sub("_", name).lower()
			
 
				-    env_names.append(snake_env_name)
			
 
				-    title_env_name = snake_env_name.replace("_", " ").title()
			
 
				+    # minigrid.envs:Env or minigrid.envs.babyai:Env
			
 
				+    split = env_spec.entry_point.split(".")
			
 
				+    # ignore minigrid.envs.env_type:Env
			
 
				+    env_module = split[0]
			
 
				+    env_name = split[-1].split(":")[-1]
			
 
				+    env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
			
 
				 
			
 
				     path_name = ""
			
 
				-
			
 
				-    if env_name.startswith("envs:"):
			
 
				-        path_name = snake_env_name + ".md"
			
 
				-    else:
			
 
				-        path_name = "babyai_" + snake_env_name + ".md"
			
 
				+    os.makedirs(
			
 
				+        os.path.join(
			
 
				+            os.path.dirname(os.path.dirname(__file__)), "environments", env_type
			
 
				+        ),
			
 
				+        exist_ok=True,
			
 
				+    )
			
 
				 
			
 
				     v_path = os.path.join(
			
 
				         os.path.dirname(os.path.dirname(__file__)),
			
 
				         "environments",
			
 
				-        path_name,
			
 
				+        env_type,
			
 
				+        f"{env_name}.md",
			
 
				     )
			
 
				 
			
 
				+    formatted_env_name = env_name_format(env_name)
			
 
				+
			
 
				     front_matter = f"""---
			
 
				 autogenerated:
			
 
				-title: {title_env_name}
			
 
				+title: {formatted_env_name}
			
 
				 ---
			
 
				 """
			
 
				-    title = f"# {title_env_name}"
			
 
				+    title = f"# {formatted_env_name}"
			
 
				+    gif = (
			
 
				+        "```{figure} "
			
 
				+        + f"""/_static/videos/{env_type}/{env_name}.gif
			
 
				+:alt: {formatted_env_name}
			
 
				+:width: 200px
			
 
				+```
			
 
				+"""
			
 
				+    )
			
 
				 
			
 
				     if docstring is None:
			
 
				         docstring = "No information provided"
			
@@ -105,6 +115,8 @@ title: {title_env_name}
 
				 
			
 
				 {title}
			
 
				 
			
 
				+{gif}
			
 
				+
			
 
				 {docstring}
			
 
				 """
			
 
				     file = open(v_path, "w+", encoding="utf-8")
			
--- a/docs/scripts/utils.py
+++ b/docs/scripts/utils.py
@@ -1,3 +1,6 @@
 
				+import re
			
 
				+
			
 
				+
			
 
				 # stolen from python docs
			
 
				 def trim(docstring):
			
 
				     if not docstring:
			
@@ -25,20 +28,9 @@ def trim(docstring):
 
				     return "\n".join(trimmed)
			
 
				 
			
 
				 
			
 
				-# dont want envs which contain these
			
 
				-kill_strs = [
			
 
				-    "eterministic",
			
 
				-    "ALE",
			
 
				-    "-ram",
			
 
				-    "Frameskip",
			
 
				-    "Hard",
			
 
				-    "LanderContinu",
			
 
				-    "8x8",
			
 
				-    "uessing",
			
 
				-    "otter",
			
 
				-    "oinflip",
			
 
				-    "hain",
			
 
				-    "oulette",
			
 
				-    "DomainRandom",
			
 
				-    "RacingDiscrete",
			
 
				-]
			
 
				+def env_name_format(str):
			
 
				+    # KeyCorridorEnv
			
 
				+    split = re.findall(r"[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))", str)
			
 
				+    # ['Key', 'Corridor', 'Env']
			
 
				+    split = filter(lambda x: x.upper() != "ENV", split)
			
 
				+    return " ".join(split)
			
--- a/minigrid/envs/blockedunlockpickup.py
+++ b/minigrid/envs/blockedunlockpickup.py
@@ -9,11 +9,8 @@ from minigrid.core.world_object import Ball
 
				 class BlockedUnlockPickupEnv(RoomGrid):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/BlockedUnlockPickup.png" alt="BlockedUnlockPickup" width="200px"/>
			
 
				-    </p>
			
 
				 
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     The agent has to pick up a box which is placed in another room, behind a
			
 
				     locked door. The door is also blocked by a ball which the agent has to move
			
@@ -21,7 +18,7 @@ class BlockedUnlockPickupEnv(RoomGrid):
 
				     ball, pick up the key, open the door and pick up the object in the other
			
 
				     room. This environment can be solved without relying on language.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "pick up the {color} {type}"
			
 
				 
			
@@ -30,7 +27,7 @@ class BlockedUnlockPickupEnv(RoomGrid):
 
				 
			
 
				     {type} is the type of the object. Can be "box" or "key".
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action            |
			
 
				     |-----|--------------|-------------------|
			
@@ -42,7 +39,7 @@ class BlockedUnlockPickupEnv(RoomGrid):
 
				     | 5   | toggle       | Unused            |
			
 
				     | 6   | done         | Unused            |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -50,18 +47,18 @@ class BlockedUnlockPickupEnv(RoomGrid):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent picks up the correct box.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-BlockedUnlockPickup-v0`
			
 
				 
			
--- a/minigrid/envs/crossing.py
+++ b/minigrid/envs/crossing.py
@@ -12,21 +12,7 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class CrossingEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/LavaCrossingS9N1.png" alt="LavaCrossingS9N1" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/LavaCrossingS9N2.png" alt="LavaCrossingS9N2" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/LavaCrossingS9N3.png" alt="LavaCrossingS9N3" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/LavaCrossingS11N5.png" alt="LavaCrossingS11N5" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    <p>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/SimpleCrossingS9N1.png" alt="SimpleCrossingS9N1" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/SimpleCrossingS9N2.png" alt="SimpleCrossingS9N2" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/SimpleCrossingS9N3.png" alt="SimpleCrossingS9N3" width="200px"/>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/SimpleCrossingS11N5.png" alt="SimpleCrossingS11N5" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     Depending on the `obstacle_type` parameter:
			
 
				     - `Lava` - The agent has to reach the green goal square on the other corner
			
@@ -40,12 +26,12 @@ class CrossingEnv(MiniGridEnv):
 
				         lava is replaced by walls. This MDP is therefore much easier and maybe
			
 
				         useful for quickly testing your algorithms.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				     Depending on the `obstacle_type` parameter:
			
 
				     - `Lava` - "avoid the lava and get to the green goal square"
			
 
				     - otherwise - "find the opening and get to the green goal square"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action       |
			
 
				     |-----|--------------|--------------|
			
@@ -57,7 +43,7 @@ class CrossingEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused       |
			
 
				     | 6   | done         | Unused       |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -65,11 +51,11 @@ class CrossingEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
@@ -77,7 +63,7 @@ class CrossingEnv(MiniGridEnv):
 
				     2. The agent falls into lava.
			
 
				     3. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     S: size of the map SxS.
			
 
				     N: number of valid crossings across lava or walls from the starting position
			
--- a/minigrid/envs/distshift.py
+++ b/minigrid/envs/distshift.py
@@ -9,12 +9,7 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class DistShiftEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/DistShift1.png" alt="DistShift1" width="200px"/>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/DistShift2.png" alt="DistShift2" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment is based on one of the DeepMind [AI safety gridworlds](https://github.com/deepmind/ai-safety-gridworlds).
			
 
				     The agent starts in the
			
@@ -24,11 +19,11 @@ class DistShiftEnv(MiniGridEnv):
 
				     different variants of the environment, so that the agent can be trained on
			
 
				     one variant and tested on the other.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "get to the green goal square"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action       |
			
 
				     |-----|--------------|--------------|
			
@@ -40,7 +35,7 @@ class DistShiftEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused       |
			
 
				     | 6   | done         | Unused       |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -48,11 +43,11 @@ class DistShiftEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
@@ -60,7 +55,7 @@ class DistShiftEnv(MiniGridEnv):
 
				     2. The agent falls into lava.
			
 
				     3. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-DistShift1-v0`
			
 
				     - `MiniGrid-DistShift2-v0`
			
--- a/minigrid/envs/doorkey.py
+++ b/minigrid/envs/doorkey.py
@@ -9,23 +9,18 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class DoorKeyEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/door-key-curriculum.gif" alt="door-key-curriculum" width="200px"/>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/door-key-env.png" alt="door-key-env" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment has a key that the agent must pick up in order to unlock a
			
 
				     goal and then get to the green goal square. This environment is difficult,
			
 
				     because of the sparse reward, to solve using classical RL algorithms. It is
			
 
				     useful to experiment with curiosity or curriculum learning.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "use the key to open the door and then get to the goal"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action                    |
			
 
				     |-----|--------------|---------------------------|
			
@@ -37,7 +32,7 @@ class DoorKeyEnv(MiniGridEnv):
 
				     | 5   | toggle       | Toggle/activate an object |
			
 
				     | 6   | done         | Unused                    |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -45,18 +40,18 @@ class DoorKeyEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent reaches the goal.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-DoorKey-5x5-v0`
			
 
				     - `MiniGrid-DoorKey-6x6-v0`
			
--- a/minigrid/envs/dynamicobstacles.py
+++ b/minigrid/envs/dynamicobstacles.py
@@ -11,11 +11,7 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 
			
 
				 class DynamicObstaclesEnv(MiniGridEnv):
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/dynamic_obstacles.gif" alt="dynamic_obstacles" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment is an empty room with moving obstacles.
			
 
				     The goal of the agent is to reach the green goal square without colliding
			
@@ -24,11 +20,11 @@ class DynamicObstaclesEnv(MiniGridEnv):
 
				     Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in
			
 
				     Partial Observability.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "get to the green goal square"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action       |
			
 
				     |-----|--------------|--------------|
			
@@ -40,7 +36,7 @@ class DynamicObstaclesEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused       |
			
 
				     | 6   | done         | Unused       |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -48,12 +44,12 @@ class DynamicObstaclesEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure. A '-1' penalty is
			
 
				     subtracted if the agent collides with an obstacle.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
@@ -61,7 +57,7 @@ class DynamicObstaclesEnv(MiniGridEnv):
 
				     2. The agent collides with an obstacle.
			
 
				     3. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-Dynamic-Obstacles-5x5-v0`
			
 
				     - `MiniGrid-Dynamic-Obstacles-Random-5x5-v0`
			
--- a/minigrid/envs/empty.py
+++ b/minigrid/envs/empty.py
@@ -8,11 +8,7 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 
			
 
				 class EmptyEnv(MiniGridEnv):
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/empty-env.png" alt="dempty-env" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment is an empty room, and the goal of the agent is to reach the
			
 
				     green goal square, which provides a sparse reward. A small penalty is
			
@@ -23,11 +19,11 @@ class EmptyEnv(MiniGridEnv):
 
				     at a random position for each episode, while the regular variants have the
			
 
				     agent always starting in the corner opposite to the goal.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "get to the green goal square"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action       |
			
 
				     |-----|--------------|--------------|
			
@@ -39,7 +35,7 @@ class EmptyEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused       |
			
 
				     | 6   | done         | Unused       |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -47,18 +43,18 @@ class EmptyEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent reaches the goal.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-Empty-5x5-v0`
			
 
				     - `MiniGrid-Empty-Random-5x5-v0`
			
--- a/minigrid/envs/fetch.py
+++ b/minigrid/envs/fetch.py
@@ -10,18 +10,14 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class FetchEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/fetch-env.png" alt="fetch-env" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment has multiple objects of assorted types and colors. The
			
 
				     agent receives a textual string as part of its observation telling it which
			
 
				     object to pick up. Picking up the wrong object terminates the episode with
			
 
				     zero reward.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "{syntax} {color} {type}"
			
 
				 
			
@@ -33,7 +29,7 @@ class FetchEnv(MiniGridEnv):
 
				 
			
 
				     {type} is the type of the object. Can be "key" or "ball".
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action               |
			
 
				     |-----|--------------|----------------------|
			
@@ -45,7 +41,7 @@ class FetchEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused               |
			
 
				     | 6   | done         | Unused               |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -53,11 +49,11 @@ class FetchEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
@@ -65,7 +61,7 @@ class FetchEnv(MiniGridEnv):
 
				     2. The agent picks up the wrong object.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     N: number of objects to be generated.
			
 
				 
			
--- a/minigrid/envs/fourrooms.py
+++ b/minigrid/envs/fourrooms.py
@@ -7,22 +7,18 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class FourRoomsEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/four-rooms-env.png" alt="four-rooms-env" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     Classic four room reinforcement learning environment. The agent must
			
 
				     navigate in a maze composed of four rooms interconnected by 4 gaps in the
			
 
				     walls. To obtain a reward, the agent must reach the green goal square. Both
			
 
				     the agent and the goal square are randomly placed in any of the four rooms.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "reach the goal"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action       |
			
 
				     |-----|--------------|--------------|
			
@@ -34,7 +30,7 @@ class FourRoomsEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused       |
			
 
				     | 6   | done         | Unused       |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -42,18 +38,18 @@ class FourRoomsEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent reaches the goal.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-FourRooms-v0`
			
 
				 
			
--- a/minigrid/envs/gotodoor.py
+++ b/minigrid/envs/gotodoor.py
@@ -9,14 +9,7 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 
			
 
				 class GoToDoorEnv(MiniGridEnv):
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/gotodoor-6x6.png" alt="gotodoor-6x6" width="200px"/>
			
 
				-        <video width="200px">
			
 
				-            <source src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/gotodoor-6x6.mp4" type="video/mp4" />
			
 
				-        </video>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment is a room with four doors, one on each wall. The agent
			
 
				     receives a textual (mission) string as input, telling it which door to go
			
@@ -24,14 +17,14 @@ class GoToDoorEnv(MiniGridEnv):
 
				     the `done` action next to the correct door, as indicated in the mission
			
 
				     string.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "go to the {color} door"
			
 
				 
			
 
				     {color} is the color of the door. Can be "red", "green", "blue", "purple",
			
 
				     "yellow" or "grey".
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action               |
			
 
				     |-----|--------------|----------------------|
			
@@ -43,7 +36,7 @@ class GoToDoorEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused               |
			
 
				     | 6   | done         | Done completing task |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -51,18 +44,18 @@ class GoToDoorEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent stands next the correct door performing the `done` action.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-GoToDoor-5x5-v0`
			
 
				     - `MiniGrid-GoToDoor-6x6-v0`
			
--- a/minigrid/envs/keycorridor.py
+++ b/minigrid/envs/keycorridor.py
@@ -8,19 +8,7 @@ from minigrid.core.roomgrid import RoomGrid
 
				 class KeyCorridorEnv(RoomGrid):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/KeyCorridorS3R1.png" alt="KeyCorridorS3R1" width="200px"/>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/KeyCorridorS3R2.png" alt="KeyCorridorS3R2" width="200px"/>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/KeyCorridorS3R3.png" alt="KeyCorridorS3R3" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    <p>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/KeyCorridorS4R3.png" alt="KeyCorridorS4R3" width="200px"/>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/KeyCorridorS5R3.png" alt="KeyCorridorS5R3" width="200px"/>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/KeyCorridorS6R3.png" alt="KeyCorridorS6R3" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment is similar to the locked room environment, but there are
			
 
				     multiple registered environment configurations of increasing size,
			
@@ -30,7 +18,7 @@ class KeyCorridorEnv(RoomGrid):
 
				     it. The mission string does not give the agent any clues as to where the
			
 
				     key is placed. This environment can be solved without relying on language.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "pick up the {color} {obj_type}"
			
 
				 
			
@@ -39,7 +27,7 @@ class KeyCorridorEnv(RoomGrid):
 
				 
			
 
				     {type} is the type of the object. Can be "ball" or "key".
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action            |
			
 
				     |-----|--------------|-------------------|
			
@@ -51,7 +39,7 @@ class KeyCorridorEnv(RoomGrid):
 
				     | 5   | toggle       | Unused            |
			
 
				     | 6   | done         | Unused            |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -59,18 +47,18 @@ class KeyCorridorEnv(RoomGrid):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent picks up the correct object.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     S: room size.
			
 
				     R: Number of rows.
			
--- a/minigrid/envs/lavagap.py
+++ b/minigrid/envs/lavagap.py
@@ -11,24 +11,20 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class LavaGapEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/LavaGapS6.png" alt="LavaGapS6" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     The agent has to reach the green goal square at the opposite corner of the
			
 
				     room, and must pass through a narrow gap in a vertical strip of deadly lava.
			
 
				     Touching the lava terminate the episode with a zero reward. This environment
			
 
				     is useful for studying safety and safe exploration.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     Depending on the `obstacle_type` parameter:
			
 
				     - `Lava`: "avoid the lava and get to the green goal square"
			
 
				     - otherwise: "find the opening and get to the green goal square"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action       |
			
 
				     |-----|--------------|--------------|
			
@@ -40,7 +36,7 @@ class LavaGapEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused       |
			
 
				     | 6   | done         | Unused       |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -48,11 +44,11 @@ class LavaGapEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
@@ -60,7 +56,7 @@ class LavaGapEnv(MiniGridEnv):
 
				     2. The agent falls into lava.
			
 
				     3. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     S: size of map SxS.
			
 
				 
			
--- a/minigrid/envs/lockedroom.py
+++ b/minigrid/envs/lockedroom.py
@@ -24,7 +24,7 @@ class LockedRoom:
 
				 class LockedRoomEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     The environment has six rooms, one of which is locked. The agent receives
			
 
				     a textual mission string as input, telling it which room to go to in order
			
@@ -32,14 +32,14 @@ class LockedRoomEnv(MiniGridEnv):
 
				     room in order to reach the final goal. This environment is extremely
			
 
				     difficult to solve with vanilla reinforcement learning alone.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
			
 
				 
			
 
				     {lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
			
 
				     "blue", "purple", "yellow" or "grey".
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action                    |
			
 
				     |-----|--------------|---------------------------|
			
@@ -51,7 +51,7 @@ class LockedRoomEnv(MiniGridEnv):
 
				     | 5   | toggle       | Toggle/activate an object |
			
 
				     | 6   | done         | Unused                    |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -59,18 +59,18 @@ class LockedRoomEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent reaches the goal.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-LockedRoom-v0`
			
 
				 
			
--- a/minigrid/envs/memory.py
+++ b/minigrid/envs/memory.py
@@ -12,7 +12,7 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class MemoryEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment is a memory test. The agent starts in a small room where it
			
 
				     sees an object. It then has to go through a narrow hallway which ends in a
			
@@ -20,11 +20,11 @@ class MemoryEnv(MiniGridEnv):
 
				     as the object in the starting room. The agent has to remember the initial
			
 
				     object, and go to the matching object at split.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "go to the matching object at the end of the hallway"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action                    |
			
 
				     |-----|--------------|---------------------------|
			
@@ -36,7 +36,7 @@ class MemoryEnv(MiniGridEnv):
 
				     | 5   | toggle       | Toggle/activate an object |
			
 
				     | 6   | done         | Unused                    |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -44,11 +44,11 @@ class MemoryEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
@@ -56,7 +56,7 @@ class MemoryEnv(MiniGridEnv):
 
				     2. The agent reaches the wrong matching object.
			
 
				     3. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     S: size of map SxS.
			
 
				 
			
--- a/minigrid/envs/multiroom.py
+++ b/minigrid/envs/multiroom.py
@@ -18,11 +18,7 @@ class MultiRoom:
 
				 class MultiRoomEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/multi-room.gif" alt="multi-room" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     This environment has a series of connected rooms with doors that must be
			
 
				     opened in order to get to the next room. The final room has the green goal
			
@@ -30,11 +26,11 @@ class MultiRoomEnv(MiniGridEnv):
 
				     solve using RL alone. However, by gradually increasing the number of rooms
			
 
				     and building a curriculum, the environment can be solved.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "traverse the rooms to get to the goal"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action                    |
			
 
				     |-----|--------------|---------------------------|
			
@@ -46,7 +42,7 @@ class MultiRoomEnv(MiniGridEnv):
 
				     | 5   | toggle       | Toggle/activate an object |
			
 
				     | 6   | done         | Unused                    |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -54,18 +50,18 @@ class MultiRoomEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent reaches the goal.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     S: size of map SxS.
			
 
				     N: number of rooms.
			
--- a/minigrid/envs/obstructedmaze.py
+++ b/minigrid/envs/obstructedmaze.py
@@ -9,35 +9,17 @@ from minigrid.core.world_object import Ball, Box, Key
 
				 class ObstructedMazeEnv(RoomGrid):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-1Dl.png" alt="ObstructedMaze-1Dl" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-1Dlh.png" alt="ObstructedMaze-1Dlh" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-1Dlhb.png" alt="ObstructedMaze-1Dlhb" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-1Q.png" alt="ObstructedMaze-1Q" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    <p>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-2Dl.png" alt="ObstructedMaze-2Dl" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-2Dlh.png" alt="ObstructedMaze-2Dlh" width="200px"/>
			
 
				-    <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-2Dlhb.png" alt="ObstructedMaze-2Dlhb" width="200px"/>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-2Q.png" alt="ObstructedMaze-2Q" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    <p>
			
 
				-    <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-4Q.png" alt="ObstructedMaze-4Q" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     The agent has to pick up a box which is placed in a corner of a 3x3 maze.
			
 
				     The doors are locked, the keys are hidden in boxes and doors are obstructed
			
 
				     by balls. This environment can be solved without relying on language.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "pick up the {COLOR_NAMES[0]} ball"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action                    |
			
 
				     |-----|--------------|---------------------------|
			
@@ -49,7 +31,7 @@ class ObstructedMazeEnv(RoomGrid):
 
				     | 5   | toggle       | Toggle/activate an object |
			
 
				     | 6   | done         | Unused                    |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -57,18 +39,18 @@ class ObstructedMazeEnv(RoomGrid):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent picks up the blue ball.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     "NDl" are the number of doors locked.
			
 
				     "h" if the key is hidden in a box.
			
--- a/minigrid/envs/putnear.py
+++ b/minigrid/envs/putnear.py
@@ -10,14 +10,14 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class PutNearEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     The agent is instructed through a textual string to pick up an object and
			
 
				     place it next to another object. This environment is easy to solve with two
			
 
				     objects, but difficult to solve with more, as it involves both textual
			
 
				     understanding and spatial reasoning involving multiple objects.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "put the {move_color} {move_type} near the {target_color} {target_type}"
			
 
				 
			
@@ -26,7 +26,7 @@ class PutNearEnv(MiniGridEnv):
 
				 
			
 
				     {move_type} and {target_type} Can be "box", "ball" or "key".
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action            |
			
 
				     |-----|--------------|-------------------|
			
@@ -38,7 +38,7 @@ class PutNearEnv(MiniGridEnv):
 
				     | 5   | toggle       | Unused            |
			
 
				     | 6   | done         | Unused            |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -46,11 +46,11 @@ class PutNearEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
@@ -58,7 +58,7 @@ class PutNearEnv(MiniGridEnv):
 
				     2. The agent drop the correct object near the target.
			
 
				     3. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     N: number of objects.
			
 
				 
			
--- a/minigrid/envs/redbluedoors.py
+++ b/minigrid/envs/redbluedoors.py
@@ -9,18 +9,18 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 class RedBlueDoorEnv(MiniGridEnv):
			
 
				 
			
 
				     """
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     The agent is randomly placed within a room with one red and one blue door
			
 
				     facing opposite directions. The agent has to open the red door and then open
			
 
				     the blue door, in that order. Note that, surprisingly, this environment is
			
 
				     solvable without memory.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "open the red door then the blue door"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action                    |
			
 
				     |-----|--------------|---------------------------|
			
@@ -32,7 +32,7 @@ class RedBlueDoorEnv(MiniGridEnv):
 
				     | 5   | toggle       | Toggle/activate an object |
			
 
				     | 6   | done         | Unused                    |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -40,11 +40,11 @@ class RedBlueDoorEnv(MiniGridEnv):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
@@ -52,7 +52,7 @@ class RedBlueDoorEnv(MiniGridEnv):
 
				     2. The agent opens the blue door without having opened the red door yet.
			
 
				     3. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-RedBlueDoors-6x6-v0`
			
 
				     - `MiniGrid-RedBlueDoors-8x8-v0`
			
--- a/minigrid/envs/unlock.py
+++ b/minigrid/envs/unlock.py
@@ -7,20 +7,16 @@ from minigrid.core.roomgrid import RoomGrid
 
				 class UnlockEnv(RoomGrid):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/Unlock.png" alt="Unlock" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     The agent has to open a locked door. This environment can be solved without
			
 
				     relying on language.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "open the door"
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action                    |
			
 
				     |-----|--------------|---------------------------|
			
@@ -32,7 +28,7 @@ class UnlockEnv(RoomGrid):
 
				     | 5   | toggle       | Toggle/activate an object |
			
 
				     | 6   | done         | Unused                    |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -40,18 +36,18 @@ class UnlockEnv(RoomGrid):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent opens the door.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-Unlock-v0`
			
 
				 
			
--- a/minigrid/envs/unlockpickup.py
+++ b/minigrid/envs/unlockpickup.py
@@ -8,23 +8,19 @@ from minigrid.core.roomgrid import RoomGrid
 
				 class UnlockPickupEnv(RoomGrid):
			
 
				 
			
 
				     """
			
 
				-    <p>
			
 
				-        <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/UnlockPickup.png" alt="UnlockPickup" width="200px"/>
			
 
				-    </p>
			
 
				-
			
 
				-    ### Description
			
 
				+    ## Description
			
 
				 
			
 
				     The agent has to pick up a box which is placed in another room, behind a
			
 
				     locked door. This environment can be solved without relying on language.
			
 
				 
			
 
				-    ### Mission Space
			
 
				+    ## Mission Space
			
 
				 
			
 
				     "pick up the {color} box"
			
 
				 
			
 
				     {color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				     "yellow" or "grey".
			
 
				 
			
 
				-    ### Action Space
			
 
				+    ## Action Space
			
 
				 
			
 
				     | Num | Name         | Action                    |
			
 
				     |-----|--------------|---------------------------|
			
@@ -36,7 +32,7 @@ class UnlockPickupEnv(RoomGrid):
 
				     | 5   | toggle       | Toggle/activate an object |
			
 
				     | 6   | done         | Unused                    |
			
 
				 
			
 
				-    ### Observation Encoding
			
 
				+    ## Observation Encoding
			
 
				 
			
 
				     - Each tile is encoded as a 3 dimensional tuple:
			
 
				         `(OBJECT_IDX, COLOR_IDX, STATE)`
			
@@ -44,18 +40,18 @@ class UnlockPickupEnv(RoomGrid):
 
				         [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				 
			
 
				-    ### Rewards
			
 
				+    ## Rewards
			
 
				 
			
 
				     A reward of '1' is given for success, and '0' for failure.
			
 
				 
			
 
				-    ### Termination
			
 
				+    ## Termination
			
 
				 
			
 
				     The episode ends if any one of the following conditions is met:
			
 
				 
			
 
				     1. The agent picks up the correct box.
			
 
				     2. Timeout (see `max_steps`).
			
 
				 
			
 
				-    ### Registered Configurations
			
 
				+    ## Registered Configurations
			
 
				 
			
 
				     - `MiniGrid-Unlock-v0`