2 years ago · f2a30bd1b2
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@@ -0,0 +1,41 @@
 
				+name: Deploy Docs
			
 
				+on:
			
 
				+  push:
			
 
				+    branches: [master]
			
 
				+
			
 
				+permissions:
			
 
				+  contents: write
			
 
				+
			
 
				+jobs:
			
 
				+  docs:
			
 
				+    name: Generate Website
			
 
				+    runs-on: ubuntu-latest
			
 
				+
			
 
				+    steps:
			
 
				+      - uses: actions/checkout@v3
			
 
				+      - uses: actions/setup-python@v4
			
 
				+
			
 
				+      - name: Install dependencies
			
 
				+        run: pip install -r docs/requirements.txt && git clone https://github.com/Farama-Foundation/MiniGrid.git && pip install ./MiniGrid
			
 
				+
			
 
				+      - name: Build Envs Docs
			
 
				+        run: python docs/scripts/gen_mds.py
			
 
				+
			
 
				+      - name: Build
			
 
				+        run: sphinx-build -b dirhtml -v docs _build
			
 
				+        
			
 
				+      - name: Move 404
			
 
				+        run: mv _build/404/index.html _build/404.html
			
 
				+
			
 
				+      - name: Update 404 links
			
 
				+        run: python docs/scripts/move404.py _build/404.html
			
 
				+
			
 
				+      - name: Remove .doctrees
			
 
				+        run: rm -r _build/.doctrees
			
 
				+
			
 
				+      - name: Upload to GitHub Pages
			
 
				+        uses: JamesIves/github-pages-deploy-action@v4
			
 
				+        with:
			
 
				+          folder: _build
			
 
				+          clean-exclude: | 
			
 
				+            *.*.*/
			
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,11 @@ trained_models
 
				 build/*
			
 
				 dist/*
			
 
				 .idea/
			
 
				+
			
 
				+#docs
			
 
				+_build/*
			
 
				+.DS_Store
			
 
				+_site
			
 
				+.jekyll-cache
			
 
				+__pycache__
			
 
				+.vscode/
			
--- a/CNAME
+++ b/CNAME
@@ -0,0 +1 @@
 
				+fenggu.me
			
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -0,0 +1,6 @@
 
				+.DS_Store
			
 
				+_site
			
 
				+.jekyll-cache
			
 
				+__pycache__
			
 
				+_build/
			
 
				+.vscode/
			
--- a/docs/404.md
+++ b/docs/404.md
@@ -0,0 +1,8 @@
 
				+---
			
 
				+hide-toc: true
			
 
				+:orphan:
			
 
				+---
			
 
				+
			
 
				+# 404
			
 
				+
			
 
				+## Page Not Found
			
--- a/docs/LICENSE
+++ b/docs/LICENSE
@@ -0,0 +1,21 @@
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2021 Farama Foundation
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
 
				+# Minimal makefile for Sphinx documentation
			
 
				+#
			
 
				+
			
 
				+# You can set these variables from the command line, and also
			
 
				+# from the environment for the first two.
			
 
				+SPHINXOPTS    ?=
			
 
				+SPHINXBUILD   ?= sphinx-build
			
 
				+SOURCEDIR     = source
			
 
				+BUILDDIR      = _build
			
 
				+
			
 
				+# Put it first so that "make" without argument is like "make help".
			
 
				+help:
			
 
				+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
			
 
				+
			
 
				+.PHONY: help Makefile
			
 
				+
			
 
				+# Catch-all target: route all unknown targets to Sphinx using the new
			
 
				+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
			
 
				+%: Makefile
			
 
				+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
			
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,40 @@
 
				+# MiniGrid-docs
			
 
				+
			
 
				+
			
 
				+This repo contains the [NEW website]() for [MiniGrid](https://github.com/Farama-Foundation/MiniGrid). This site is currently in Beta and we are in the process of adding/editing information. 
			
 
				+
			
 
				+
			
 
				+The documentation uses Sphinx. However, the documentation is written in regular md, NOT rst.
			
 
				+
			
 
				+If you are modifying a non-environment page or an atari environment page, please PR this repo. Otherwise, follow the steps below:
			
 
				+
			
 
				+## Instructions for modifying environment pages
			
 
				+
			
 
				+### Editing an environment page
			
 
				+
			
 
				+If you are editing an Atari environment, directly edit the md file in this repository. 
			
 
				+
			
 
				+Otherwise, fork Gym and edit the docstring in the environment's Python file. Then, pip install your Gym fork and run `docs/scripts/gen_mds.py` in this repo. This will automatically generate a md documentation file for the environment.
			
 
				+
			
 
				+## Build the Documentation
			
 
				+
			
 
				+Install the required packages and Gym (or your fork):
			
 
				+
			
 
				+```
			
 
				+pip install -r requirements.txt
			
 
				+pip install gym
			
 
				+```
			
 
				+
			
 
				+To build the documentation once:
			
 
				+
			
 
				+```
			
 
				+cd docs
			
 
				+make dirhtml
			
 
				+```
			
 
				+
			
 
				+To rebuild the documentation automatically every time a change is made:
			
 
				+
			
 
				+```
			
 
				+cd docs
			
 
				+sphinx-autobuild -b dirhtml ./source build/html
			
 
				+```
			
--- a/docs/_static/css/custom.css
+++ b/docs/_static/css/custom.css
@@ -0,0 +1,53 @@
 
				+h1 {
			
 
				+    font-size: 2.25rem;
			
 
				+}
			
 
				+
			
 
				+h2 {
			
 
				+    font-size: 1.75rem;
			
 
				+}
			
 
				+
			
 
				+h3 {
			
 
				+    font-size: 1.45rem;
			
 
				+}
			
 
				+
			
 
				+.cookie_alert {
			
 
				+    position: fixed;
			
 
				+    display: flex;
			
 
				+    width: 100%;
			
 
				+    min-height: 70px;
			
 
				+    background-color: var(--color-background-secondary);
			
 
				+    color: var(--color-foreground-primary);
			
 
				+    border-top: 1px solid var(--color-background-border);
			
 
				+    bottom: 0;
			
 
				+    left: 0;
			
 
				+    z-index: 99999;
			
 
				+}
			
 
				+
			
 
				+.cookie_container {
			
 
				+    display: flex;
			
 
				+    align-items: center;
			
 
				+    width: 700px;
			
 
				+    max-width: calc(100% - 28px);
			
 
				+    margin: auto;
			
 
				+}
			
 
				+
			
 
				+.cookie_alert p {
			
 
				+    flex: 1;
			
 
				+}
			
 
				+
			
 
				+.cookie_alert button {
			
 
				+    background-color: transparent;
			
 
				+    border: none;
			
 
				+    width: 26px;
			
 
				+    height: 26px;
			
 
				+    padding: 0;
			
 
				+    cursor: pointer;
			
 
				+}
			
 
				+
			
 
				+.cookie_alert button svg .cls-1 {
			
 
				+     stroke: var(--color-foreground-primary);
			
 
				+}
			
 
				+
			
 
				+.cookie_alert button svg {
			
 
				+    width: 100%;
			
 
				+}
			
--- a/docs/_static/css/env_pages.css
+++ b/docs/_static/css/env_pages.css
@@ -0,0 +1,35 @@
 
				+.env-grid {
			
 
				+    display: flex;
			
 
				+    flex-wrap: wrap;
			
 
				+    justify-content: center;
			
 
				+    width: 100%;
			
 
				+    box-sizing: border-box;
			
 
				+}
			
 
				+.env-grid__cell {
			
 
				+    display: flex;
			
 
				+    flex-direction: column;
			
 
				+    width: 180px;
			
 
				+    height: 180px;
			
 
				+    padding: 10px;
			
 
				+}
			
 
				+.cell__image-container {
			
 
				+    display: flex;
			
 
				+    height: 148px;
			
 
				+    justify-content: center;
			
 
				+}
			
 
				+.cell__image-container img {
			
 
				+    max-height: 100%;
			
 
				+}
			
 
				+.cell__title {
			
 
				+    display: flex;
			
 
				+    justify-content: center;
			
 
				+    text-align: center;
			
 
				+    align-items: flex-end;
			
 
				+    height: 32px;
			
 
				+    line-height: 16px;
			
 
				+}
			
 
				+.more-btn {
			
 
				+    width: 240px;
			
 
				+    margin: 12px auto;
			
 
				+    display: block;
			
 
				+}
			
--- a/docs/_static/img/favicon.svg
+++ b/docs/_static/img/favicon.svg
--- a/docs/_static/img/figures/BlockedUnlockPickup.png
+++ b/docs/_static/img/figures/BlockedUnlockPickup.png
--- a/docs/_static/img/figures/DistShift1.png
+++ b/docs/_static/img/figures/DistShift1.png
--- a/docs/_static/img/figures/DistShift2.png
+++ b/docs/_static/img/figures/DistShift2.png
--- a/docs/_static/img/figures/KeyCorridorS3R1.png
+++ b/docs/_static/img/figures/KeyCorridorS3R1.png
--- a/docs/_static/img/figures/KeyCorridorS3R2.png
+++ b/docs/_static/img/figures/KeyCorridorS3R2.png
--- a/docs/_static/img/figures/KeyCorridorS3R3.png
+++ b/docs/_static/img/figures/KeyCorridorS3R3.png
--- a/docs/_static/img/figures/KeyCorridorS4R3.png
+++ b/docs/_static/img/figures/KeyCorridorS4R3.png
--- a/docs/_static/img/figures/KeyCorridorS5R3.png
+++ b/docs/_static/img/figures/KeyCorridorS5R3.png
--- a/docs/_static/img/figures/KeyCorridorS6R3.png
+++ b/docs/_static/img/figures/KeyCorridorS6R3.png
--- a/docs/_static/img/figures/LavaCrossingS11N5.png
+++ b/docs/_static/img/figures/LavaCrossingS11N5.png
--- a/docs/_static/img/figures/LavaCrossingS9N1.png
+++ b/docs/_static/img/figures/LavaCrossingS9N1.png
--- a/docs/_static/img/figures/LavaCrossingS9N2.png
+++ b/docs/_static/img/figures/LavaCrossingS9N2.png
--- a/docs/_static/img/figures/LavaCrossingS9N3.png
+++ b/docs/_static/img/figures/LavaCrossingS9N3.png
--- a/docs/_static/img/figures/LavaGapS6.png
+++ b/docs/_static/img/figures/LavaGapS6.png
--- a/docs/_static/img/figures/ObstructedMaze-1Dl.png
+++ b/docs/_static/img/figures/ObstructedMaze-1Dl.png
--- a/docs/_static/img/figures/ObstructedMaze-1Dlh.png
+++ b/docs/_static/img/figures/ObstructedMaze-1Dlh.png
--- a/docs/_static/img/figures/ObstructedMaze-1Dlhb.png
+++ b/docs/_static/img/figures/ObstructedMaze-1Dlhb.png
--- a/docs/_static/img/figures/ObstructedMaze-1Q.png
+++ b/docs/_static/img/figures/ObstructedMaze-1Q.png
--- a/docs/_static/img/figures/ObstructedMaze-2Dl.png
+++ b/docs/_static/img/figures/ObstructedMaze-2Dl.png
--- a/docs/_static/img/figures/ObstructedMaze-2Dlh.png
+++ b/docs/_static/img/figures/ObstructedMaze-2Dlh.png
--- a/docs/_static/img/figures/ObstructedMaze-2Dlhb.png
+++ b/docs/_static/img/figures/ObstructedMaze-2Dlhb.png
--- a/docs/_static/img/figures/ObstructedMaze-2Q.png
+++ b/docs/_static/img/figures/ObstructedMaze-2Q.png
--- a/docs/_static/img/figures/ObstructedMaze-4Q.png
+++ b/docs/_static/img/figures/ObstructedMaze-4Q.png
--- a/docs/_static/img/figures/SimpleCrossingS11N5.png
+++ b/docs/_static/img/figures/SimpleCrossingS11N5.png
--- a/docs/_static/img/figures/SimpleCrossingS9N1.png
+++ b/docs/_static/img/figures/SimpleCrossingS9N1.png
--- a/docs/_static/img/figures/SimpleCrossingS9N2.png
+++ b/docs/_static/img/figures/SimpleCrossingS9N2.png
--- a/docs/_static/img/figures/SimpleCrossingS9N3.png
+++ b/docs/_static/img/figures/SimpleCrossingS9N3.png
--- a/docs/_static/img/figures/Unlock.png
+++ b/docs/_static/img/figures/Unlock.png
--- a/docs/_static/img/figures/UnlockPickup.png
+++ b/docs/_static/img/figures/UnlockPickup.png
--- a/docs/_static/img/figures/door-key-curriculum.gif
+++ b/docs/_static/img/figures/door-key-curriculum.gif
--- a/docs/_static/img/figures/door-key-env.png
+++ b/docs/_static/img/figures/door-key-env.png
--- a/docs/_static/img/figures/dynamic_obstacles.gif
+++ b/docs/_static/img/figures/dynamic_obstacles.gif
--- a/docs/_static/img/figures/empty-env.png
+++ b/docs/_static/img/figures/empty-env.png
--- a/docs/_static/img/figures/fetch-env.png
+++ b/docs/_static/img/figures/fetch-env.png
--- a/docs/_static/img/figures/four-rooms-env.png
+++ b/docs/_static/img/figures/four-rooms-env.png
--- a/docs/_static/img/figures/gotodoor-6x6.mp4
+++ b/docs/_static/img/figures/gotodoor-6x6.mp4
--- a/docs/_static/img/figures/gotodoor-6x6.png
+++ b/docs/_static/img/figures/gotodoor-6x6.png
--- a/docs/_static/img/figures/multi-room.gif
+++ b/docs/_static/img/figures/multi-room.gif
--- a/docs/_static/img/github_icon.svg
+++ b/docs/_static/img/github_icon.svg
--- a/docs/_static/img/minigrid-github.svg
+++ b/docs/_static/img/minigrid-github.svg
--- a/docs/_static/img/minigrid-text.svg
+++ b/docs/_static/img/minigrid-text.svg
--- a/docs/_static/img/minigrid-white.png
+++ b/docs/_static/img/minigrid-white.png
--- a/docs/_static/img/minigrid-white.svg
+++ b/docs/_static/img/minigrid-white.svg
--- a/docs/_static/img/minigrid.png
+++ b/docs/_static/img/minigrid.png
--- a/docs/_static/img/minigrid.svg
+++ b/docs/_static/img/minigrid.svg
--- a/docs/_templates/base.html
+++ b/docs/_templates/base.html
@@ -0,0 +1,42 @@
 
				+{% extends "furo/base.html" %}
			
 
				+
			
 
				+{%- block regular_scripts -%}
			
 
				+{{ super() }}
			
 
				+
			
 
				+<script>
			
 
				+    (() => {
			
 
				+        if (!localStorage.getItem("shownCookieAlert")) {
			
 
				+            const boxElem = document.createElement("div");
			
 
				+            boxElem.classList.add("cookie_alert");
			
 
				+            const containerElem = document.createElement("div");
			
 
				+            containerElem.classList.add("cookie_container");
			
 
				+            const textElem = document.createElement("p");
			
 
				+            textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
			
 
				+                                Google Analytics</a> to collect statistics. You can disable it by blocking
			
 
				+                                the JavaScript coming from www.google-analytics.com.`;
			
 
				+                                containerElem.appendChild(textElem);
			
 
				+            const closeBtn = document.createElement("button");
			
 
				+            closeBtn.innerHTML = `<?xml version="1.0" ?><svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg"><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-linejoin:round;stroke-width:2px;}</style></defs><title/><g id="cross"><line class="cls-1" x1="7" x2="25" y1="7" y2="25"/><line class="cls-1" x1="7" x2="25" y1="25" y2="7"/></g></svg>`
			
 
				+            closeBtn.onclick = () => {
			
 
				+                localStorage.setItem("shownCookieAlert", "true");
			
 
				+                boxElem.style.display = "none";
			
 
				+            }
			
 
				+            containerElem.appendChild(closeBtn);
			
 
				+            boxElem.appendChild(containerElem);
			
 
				+            document.body.appendChild(boxElem);
			
 
				+        }
			
 
				+    })()
			
 
				+
			
 
				+</script>
			
 
				+
			
 
				+<!-- Google tag (gtag.js) -->
			
 
				+<script async src="https://www.googletagmanager.com/gtag/js?id=G-JGXSLW7N06"></script>
			
 
				+<script>
			
 
				+  window.dataLayer = window.dataLayer || [];
			
 
				+  function gtag(){dataLayer.push(arguments);}
			
 
				+  gtag('js', new Date());
			
 
				+
			
 
				+  gtag('config', 'G-JGXSLW7N06');
			
 
				+</script>
			
 
				+
			
 
				+{%- endblock regular_scripts -%}
			
--- a/docs/_templates/page.html
+++ b/docs/_templates/page.html
--- a/docs/api/wrappers.md
+++ b/docs/api/wrappers.md
@@ -0,0 +1,33 @@
 
				+---
			
 
				+title: Wrappers
			
 
				+lastpage:
			
 
				+---
			
 
				+
			
 
				+# Wrappers
			
 
				+
			
 
				+MiniGrid is built to support tasks involving natural language and sparse rewards.
			
 
				+The observations are dictionaries, with an 'image' field, partially observable
			
 
				+view of the environment, a 'mission' field which is a textual string
			
 
				+describing the objective the agent should reach to get a reward, and a 'direction'
			
 
				+field which can be used as an optional compass. Using dictionaries makes it
			
 
				+easy for you to add additional information to observations
			
 
				+if you need to, without having to encode everything into a single tensor.
			
 
				+
			
 
				+There are a variety of wrappers to change the observation format available in [minigrid/wrappers.py](../../../../minigrid/wrappers.py). 
			
 
				+If your RL code expects one single tensor for observations, take a look at `FlatObsWrapper`. 
			
 
				+There is also an `ImgObsWrapper` that gets rid of the 'mission' field in observations, leaving only the image field tensor.
			
 
				+
			
 
				+Please note that the default observation format is a partially observable view of the environment using a
			
 
				+compact and efficient encoding, with 3 input values per visible grid cell, 7x7x3 values total.
			
 
				+These values are **not pixels**. If you want to obtain an array of RGB pixels as observations instead,
			
 
				+use the `RGBImgPartialObsWrapper`. You can use it as follows:
			
 
				+
			
 
				+```python
			
 
				+import gym
			
 
				+from minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
			
 
				+
			
 
				+env = gym.make('MiniGrid-Empty-8x8-v0')
			
 
				+env = RGBImgPartialObsWrapper(env) # Get pixel observations
			
 
				+env = ImgObsWrapper(env) # Get rid of the 'mission' field
			
 
				+obs, _ = env.reset() # This now produces an RGB tensor only
			
 
				+```
			
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -0,0 +1,86 @@
 
				+# Configuration file for the Sphinx documentation builder.
			
 
				+#
			
 
				+# This file only contains a selection of the most common options. For a full
			
 
				+# list see the documentation:
			
 
				+# https://www.sphinx-doc.org/en/master/usage/configuration.html
			
 
				+
			
 
				+# -- Path setup --------------------------------------------------------------
			
 
				+
			
 
				+# If extensions (or modules to document with autodoc) are in another directory,
			
 
				+# add these directories to sys.path here. If the directory is relative to the
			
 
				+# documentation root, use os.path.abspath to make it absolute, like shown here.
			
 
				+#
			
 
				+# import os
			
 
				+# import sys
			
 
				+# sys.path.insert(0, os.path.abspath('.'))
			
 
				+
			
 
				+# -- Project information -----------------------------------------------------
			
 
				+# TODO: change to minigrid version
			
 
				+# from TODO import __version__ as minigrid_version
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+project = "MiniGrid"
			
 
				+copyright = "2022"
			
 
				+author = "Farama Foundation"
			
 
				+
			
 
				+# The full version, including alpha/beta/rc tags
			
 
				+# TODO: change to minigrid version
			
 
				+release = "1.2.1"
			
 
				+
			
 
				+sys.path.insert(0, os.path.abspath("../.."))
			
 
				+
			
 
				+# -- General configuration ---------------------------------------------------
			
 
				+
			
 
				+# Add any Sphinx extension module names here, as strings. They can be
			
 
				+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
			
 
				+# ones.
			
 
				+extensions = [
			
 
				+    "sphinx.ext.napoleon",
			
 
				+    "sphinx.ext.doctest",
			
 
				+    "sphinx.ext.autodoc",
			
 
				+    "sphinx.ext.githubpages",
			
 
				+    "myst_parser",
			
 
				+    "notfound.extension",
			
 
				+]
			
 
				+
			
 
				+source_suffix = {
			
 
				+    ".rst": "restructuredtext",
			
 
				+    ".md": "markdown",
			
 
				+}
			
 
				+
			
 
				+# Add any paths that contain templates here, relative to this directory.
			
 
				+templates_path = ["_templates"]
			
 
				+
			
 
				+# List of patterns, relative to source directory, that match files and
			
 
				+# directories to ignore when looking for source files.
			
 
				+# This pattern also affects html_static_path and html_extra_path.
			
 
				+exclude_patterns = []
			
 
				+
			
 
				+# Napoleon settings
			
 
				+napoleon_use_ivar = True
			
 
				+napoleon_use_admonition_for_references = True
			
 
				+# See https://github.com/sphinx-doc/sphinx/issues/9119
			
 
				+napoleon_custom_sections = [("Returns", "params_style")]
			
 
				+
			
 
				+# -- Options for HTML output -------------------------------------------------
			
 
				+
			
 
				+# The theme to use for HTML and HTML Help pages.  See the documentation for
			
 
				+# a list of builtin themes.
			
 
				+#
			
 
				+html_theme = "furo"
			
 
				+html_title = "MiniGrid Documentation"
			
 
				+html_baseurl = "https://fenggu.me/"
			
 
				+html_copy_source = False
			
 
				+html_favicon = "_static/img/minigrid-white.png"
			
 
				+html_theme_options = {
			
 
				+    "light_logo": "img/minigrid.png",
			
 
				+    "dark_logo": "img/minigrid-white.png",
			
 
				+}
			
 
				+html_static_path = ["_static"]
			
 
				+html_css_files = [
			
 
				+    "css/custom.css",
			
 
				+]
			
 
				+
			
 
				+notfound_urls_prefix = "/MiniGrid/"
			
--- a/docs/content/basic_usage.md
+++ b/docs/content/basic_usage.md
@@ -0,0 +1,32 @@
 
				+---
			
 
				+layout: "contents"
			
 
				+title: Basic Usage
			
 
				+firstpage:
			
 
				+---
			
 
				+
			
 
				+# Basic Usage
			
 
				+
			
 
				+## Basic Usage
			
 
				+
			
 
				+There is a UI application which allows you to manually control the agent with the arrow keys:
			
 
				+```
			
 
				+./minigrid/manual_control.py
			
 
				+```
			
 
				+
			
 
				+The environment being run can be selected with the `--env` option, eg:
			
 
				+```
			
 
				+./minigrid/manual_control.py --env MiniGrid-Empty-8x8-v0
			
 
				+```
			
 
				+
			
 
				+
			
 
				+## Reinforcement Learning
			
 
				+
			
 
				+If you want to train an agent with reinforcement learning, I recommend using the code found in the [torch-rl](https://github.com/lcswillems/torch-rl) repository. 
			
 
				+This code has been tested and is known to work with this environment. The default hyper-parameters are also known to converge.
			
 
				+
			
 
				+A sample training command is:
			
 
				+
			
 
				+``` {code-block} python
			
 
				+cd torch-rl
			
 
				+python3 -m scripts.train --env MiniGrid-Empty-8x8-v0 --algo ppo
			
 
				+```
			
--- a/docs/content/installation.md
+++ b/docs/content/installation.md
@@ -0,0 +1,21 @@
 
				+---
			
 
				+layout: "contents"
			
 
				+title: Installation
			
 
				+firstpage:
			
 
				+---
			
 
				+
			
 
				+# Installation
			
 
				+
			
 
				+There is now a [pip package](https://pypi.org/project/minigrid/) available, which is updated periodically:
			
 
				+
			
 
				+```
			
 
				+pip install gym-minigrid
			
 
				+```
			
 
				+
			
 
				+Alternatively, to get the latest version of MiniGrid, you can clone this repository and install the dependencies with `pip3`:
			
 
				+
			
 
				+```
			
 
				+git clone https://github.com/Farama-Foundation/MiniGrid
			
 
				+cd MiniGrid
			
 
				+pip install -e .
			
 
				+```
			
--- a/docs/content/pubs.md
+++ b/docs/content/pubs.md
@@ -0,0 +1,50 @@
 
				+---
			
 
				+layout: "contents"
			
 
				+title: Publications
			
 
				+firstpage:
			
 
				+---
			
 
				+
			
 
				+# List of publications
			
 
				+## List of publications & submissions using MiniGrid or BabyAI (please open a pull request to add missing entries):
			
 
				+- [History Compression via Language Models in Reinforcement Learning.](https://proceedings.mlr.press/v162/paischer22a.html) (Johannes Kepler University Linz, PMLR 2022)
			
 
				+- [Leveraging Approximate Symbolic Models for Reinforcement Learning via Skill Diversity](https://arxiv.org/abs/2202.02886) (Arizona State University, ICML 2022)
			
 
				+- [How to Stay Curious while avoiding Noisy TVs using Aleatoric Uncertainty Estimation](https://proceedings.mlr.press/v162/mavor-parker22a.html) (University College London, Boston University, ICML 2022)
			
 
				+- [In a Nutshell, the Human Asked for This: Latent Goals for Following Temporal Specifications](https://openreview.net/pdf?id=rUwm9wCjURV) (Imperial College London, ICLR 2022)
			
 
				+- [Interesting Object, Curious Agent: Learning Task-Agnostic Exploration](https://arxiv.org/abs/2111.13119) (Meta AI Research, NeurIPS 2021)
			
 
				+- [Safe Policy Optimization with Local Generalized Linear Function Approximations](https://arxiv.org/abs/2111.04894) (IBM Research, Tsinghua University, NeurIPS 2021)
			
 
				+- [A Consciousness-Inspired Planning Agent for Model-Based Reinforcement Learning](https://arxiv.org/abs/2106.02097) (Mila, McGill University, NeurIPS 2021)
			
 
				+- [SPOTTER: Extending Symbolic Planning Operators through Targeted Reinforcement Learning](http://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1118.pdf) (Tufts University, SIFT, AAMAS 2021)
			
 
				+- [Grid-to-Graph: Flexible Spatial Relational Inductive Biases for Reinforcement Learning](https://arxiv.org/abs/2102.04220) (UCL, AAMAS 2021)
			
 
				+- [Rank the Episodes: A Simple Approach for Exploration in Procedurally-Generated Environments](https://openreview.net/forum?id=MtEE0CktZht) (Texas A&M University, Kuai Inc., ICLR 2021)
			
 
				+- [Adversarially Guided Actor-Critic](https://openreview.net/forum?id=_mQp5cr_iNy) (INRIA, Google Brain, ICLR 2021)
			
 
				+- [Information-theoretic Task Selection for Meta-Reinforcement Learning](https://papers.nips.cc/paper/2020/file/ec3183a7f107d1b8dbb90cb3c01ea7d5-Paper.pdf) (University of Leeds, NeurIPS 2020)
			
 
				+- [BeBold: Exploration Beyond the Boundary of Explored Regions](https://arxiv.org/pdf/2012.08621.pdf) (UCB, December 2020)
			
 
				+- [Approximate Information State for Approximate Planning and Reinforcement Learning in Partially Observed Systems](https://arxiv.org/abs/2010.08843) (McGill, October 2020)
			
 
				+- [Prioritized Level Replay](https://arxiv.org/pdf/2010.03934.pdf) (FAIR, October 2020)
			
 
				+- [AllenAct: A Framework for Embodied AI Research](https://arxiv.org/pdf/2008.12760.pdf) (Allen Institute for AI, August 2020)
			
 
				+- [Learning with AMIGO: Adversarially Motivated Intrinsic Goals](https://arxiv.org/pdf/2006.12122.pdf) (MIT, FAIR, ICLR 2021)
			
 
				+- [RIDE: Rewarding Impact-Driven Exploration for Procedurally-Generated Environments](https://openreview.net/forum?id=rkg-TJBFPB) (FAIR, ICLR 2020)
			
 
				+- [Learning to Request Guidance in Emergent Communication](https://arxiv.org/pdf/1912.05525.pdf) (University of Amsterdam, Dec 2019)
			
 
				+- [Working Memory Graphs](https://arxiv.org/abs/1911.07141) (MSR, Nov 2019)
			
 
				+- [Fast Task-Adaptation for Tasks Labeled Using Natural Language in Reinforcement Learning](https://arxiv.org/pdf/1910.04040.pdf) (Oct 2019, University of Antwerp)
			
 
				+- [Generalization in Reinforcement Learning with Selective Noise Injection and Information Bottleneck](https://arxiv.org/abs/1910.12911) (MSR, NeurIPS, Oct 2019)
			
 
				+- [Recurrent Independent Mechanisms](https://arxiv.org/pdf/1909.10893.pdf) (Mila, Sept 2019) 
			
 
				+- [Learning Effective Subgoals with Multi-Task Hierarchical Reinforcement Learning](http://surl.tirl.info/proceedings/SURL-2019_paper_10.pdf) (Tsinghua University, August 2019)
			
 
				+- [Mastering emergent language: learning to guide in simulated navigation](https://arxiv.org/abs/1908.05135) (University of Amsterdam, Aug 2019)
			
 
				+- [Transfer Learning by Modeling a Distribution over Policies](https://arxiv.org/abs/1906.03574) (Mila, June 2019)
			
 
				+- [Reinforcement Learning with Competitive Ensembles of Information-Constrained Primitives](https://arxiv.org/abs/1906.10667) (Mila, June 2019)
			
 
				+- [Learning distant cause and effect using only local and immediate credit assignment](https://arxiv.org/abs/1905.11589) (Incubator 491, May 2019)
			
 
				+- [Practical Open-Loop Optimistic Planning](https://arxiv.org/abs/1904.04700) (INRIA, April 2019)
			
 
				+- [Learning World Graphs to Accelerate Hierarchical Reinforcement Learning](https://arxiv.org/abs/1907.00664) (Salesforce Research, 2019)
			
 
				+- [Variational State Encoding as Intrinsic Motivation in Reinforcement Learning](https://mila.quebec/wp-content/uploads/2019/05/WebPage.pdf) (Mila, TARL 2019)
			
 
				+- [Unsupervised Discovery of Decision States Through Intrinsic Control](https://tarl2019.github.io/assets/papers/modhe2019unsupervised.pdf) (Georgia Tech, TARL 2019)
			
 
				+- [Modeling the Long Term Future in Model-Based Reinforcement Learning](https://openreview.net/forum?id=SkgQBn0cF7) (Mila, ICLR 2019)
			
 
				+- [Unifying Ensemble Methods for Q-learning via Social Choice Theory](https://arxiv.org/pdf/1902.10646.pdf) (Max Planck Institute, Feb 2019)
			
 
				+- [Planning Beyond The Sensing Horizon Using a Learned Context](https://personalrobotics.cs.washington.edu/workshops/mlmp2018/assets/docs/18_CameraReadySubmission.pdf) (MLMP@IROS, 2018)
			
 
				+- [Guiding Policies with Language via Meta-Learning](https://arxiv.org/abs/1811.07882) (UC Berkeley, Nov 2018)
			
 
				+- [On the Complexity of Exploration in Goal-Driven Navigation](https://arxiv.org/abs/1811.06889) (CMU, NeurIPS, Nov 2018)
			
 
				+- [Transfer and Exploration via the Information Bottleneck](https://openreview.net/forum?id=rJg8yhAqKm) (Mila, Nov 2018)
			
 
				+- [Creating safer reward functions for reinforcement learning agents in the gridworld](https://gupea.ub.gu.se/bitstream/2077/62445/1/gupea_2077_62445_1.pdf) (University of Gothenburg, 2018)
			
 
				+- [BabyAI: First Steps Towards Grounded Language Learning With a Human In the Loop](https://arxiv.org/abs/1810.08272) (Mila, ICLR, Oct 2018)
			
 
				+
			
 
				+This environment has been built as part of work done at [Mila](https://mila.quebec). The Dynamic obstacles environment has been added as part of work done at [IAS in TU Darmstadt](https://www.ias.informatik.tu-darmstadt.de/) and the University of Genoa for mobile robot navigation with dynamic obstacles.
			
--- a/docs/environments/blocked_unlock_pickup.md
+++ b/docs/environments/blocked_unlock_pickup.md
@@ -0,0 +1,59 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Blocked Unlock Pickup
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Blocked Unlock Pickup
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+The agent has to pick up a box which is placed in another room, behind a
			
 
				+locked door. The door is also blocked by a ball which the agent has to move
			
 
				+before it can unlock the door. Hence, the agent has to learn to move the
			
 
				+ball, pick up the key, open the door and pick up the object in the other
			
 
				+room. This environment can be solved without relying on language.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"pick up the {color} {type}"
			
 
				+
			
 
				+{color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				+"yellow" or "grey".
			
 
				+
			
 
				+{type} is the type of the object. Can be "box" or "key".
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action            |
			
 
				+|-----|--------------|-------------------|
			
 
				+| 0   | left         | Turn left         |
			
 
				+| 1   | right        | Turn right        |
			
 
				+| 2   | forward      | Move forward      |
			
 
				+| 3   | pickup       | Pick up an object |
			
 
				+| 4   | drop         | Unused            |
			
 
				+| 5   | toggle       | Unused            |
			
 
				+| 6   | done         | Unused            |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent picks up the correct box.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-BlockedUnlockPickup-v0`
			
--- a/docs/environments/design.md
+++ b/docs/environments/design.md
@@ -0,0 +1,39 @@
 
				+---
			
 
				+layout: "contents"
			
 
				+title: Design
			
 
				+firstpage:
			
 
				+---
			
 
				+
			
 
				+# General Structure
			
 
				+
			
 
				+Structure of the world:
			
 
				+- The world is an NxM grid of tiles
			
 
				+- Each tile in the grid world contains zero or one object
			
 
				+  - Cells that do not contain an object have the value `None`
			
 
				+- Each object has an associated discrete color (string)
			
 
				+- Each object has an associated type (string)
			
 
				+  - Provided object types are: wall, floor, lava, door, key, ball, box and goal
			
 
				+- The agent can pick up and carry exactly one object (eg: ball or key)
			
 
				+- To open a locked door, the agent has to be carrying a key matching the door's color
			
 
				+
			
 
				+Actions in the basic environment:
			
 
				+- Turn left
			
 
				+- Turn right
			
 
				+- Move forward
			
 
				+- Pick up an object
			
 
				+- Drop the object being carried
			
 
				+- Toggle (open doors, interact with objects)
			
 
				+- Done (task completed, optional)
			
 
				+
			
 
				+Default tile/observation encoding:
			
 
				+- Each tile is encoded as a 3 dimensional tuple: `(OBJECT_IDX, COLOR_IDX, STATE)` 
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+By default, sparse rewards are given for reaching a green goal tile. A
			
 
				+reward of 1 is given for success, and zero for failure. There is also an
			
 
				+environment-specific time step limit for completing the task.
			
 
				+You can define your own reward function by creating a class derived
			
 
				+from `MiniGridEnv`. Extending the environment with new object types or new actions
			
 
				+should be very easy. If you wish to do this, you should take a look at the
			
 
				+[minigrid/minigrid.py](minigrid/minigrid.py) source file.
			
--- a/docs/environments/dist_shift2.md
+++ b/docs/environments/dist_shift2.md
@@ -0,0 +1,58 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Dist Shift2
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Dist Shift2
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment is based on one of the DeepMind [AI safety gridworlds]
			
 
				+(https://github.com/deepmind/ai-safety-gridworlds). The agent starts in the
			
 
				+top-left corner and must reach the goal which is in the top-right corner,
			
 
				+but has to avoid stepping into lava on its way. The aim of this environment
			
 
				+is to test an agent's ability to generalize. There are two slightly
			
 
				+different variants of the environment, so that the agent can be trained on
			
 
				+one variant and tested on the other.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"get to the green goal square"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action       |
			
 
				+|-----|--------------|--------------|
			
 
				+| 0   | left         | Turn left    |
			
 
				+| 1   | right        | Turn right   |
			
 
				+| 2   | forward      | Move forward |
			
 
				+| 3   | pickup       | Unused       |
			
 
				+| 4   | drop         | Unused       |
			
 
				+| 5   | toggle       | Unused       |
			
 
				+| 6   | done         | Unused       |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. The agent falls into lava.
			
 
				+3. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-DistShift1-v0`
			
 
				+- `MiniGrid-DistShift2-v0`
			
--- a/docs/environments/door_key.md
+++ b/docs/environments/door_key.md
@@ -0,0 +1,56 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Door Key
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Door Key
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment has a key that the agent must pick up in order to unlock a
			
 
				+goal and then get to the green goal square. This environment is difficult,
			
 
				+because of the sparse reward, to solve using classical RL algorithms. It is
			
 
				+useful to experiment with curiosity or curriculum learning.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"use the key to open the door and then get to the goal"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action                    |
			
 
				+|-----|--------------|---------------------------|
			
 
				+| 0   | left         | Turn left                 |
			
 
				+| 1   | right        | Turn right                |
			
 
				+| 2   | forward      | Move forward              |
			
 
				+| 3   | pickup       | Pick up an object         |
			
 
				+| 4   | drop         | Unused                    |
			
 
				+| 5   | toggle       | Toggle/activate an object |
			
 
				+| 6   | done         | Unused                    |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-DoorKey-5x5-v0`
			
 
				+- `MiniGrid-DoorKey-6x6-v0`
			
 
				+- `MiniGrid-DoorKey-8x8-v0`
			
 
				+- `MiniGrid-DoorKey-16x16-v0`
			
--- a/docs/environments/dynamic.md
+++ b/docs/environments/dynamic.md
@@ -0,0 +1,62 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Dynamic
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Dynamic
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment is an empty room with moving obstacles.
			
 
				+The goal of the agent is to reach the green goal square without colliding
			
 
				+with any obstacle. A large penalty is subtracted if the agent collides with
			
 
				+an obstacle and the episode finishes. This environment is useful to test
			
 
				+Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in
			
 
				+Partial Observability.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"get to the green goal square"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action       |
			
 
				+|-----|--------------|--------------|
			
 
				+| 0   | left         | Turn left    |
			
 
				+| 1   | right        | Turn right   |
			
 
				+| 2   | forward      | Move forward |
			
 
				+| 3   | pickup       | Unused       |
			
 
				+| 4   | drop         | Unused       |
			
 
				+| 5   | toggle       | Unused       |
			
 
				+| 6   | done         | Unused       |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure. A '-1' penalty is
			
 
				+subtracted if the agent collides with an obstacle.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. The agent collides with an obstacle.
			
 
				+3. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-Dynamic-Obstacles-5x5-v0`
			
 
				+- `MiniGrid-Dynamic-Obstacles-Random-5x5-v0`
			
 
				+- `MiniGrid-Dynamic-Obstacles-6x6-v0`
			
 
				+- `MiniGrid-Dynamic-Obstacles-Random-6x6-v0`
			
 
				+- `MiniGrid-Dynamic-Obstacles-8x8-v0`
			
 
				+- `MiniGrid-Dynamic-Obstacles-16x16-v0`
			
--- a/docs/environments/empty.md
+++ b/docs/environments/empty.md
@@ -0,0 +1,62 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Empty
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Empty
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment is an empty room, and the goal of the agent is to reach the
			
 
				+green goal square, which provides a sparse reward. A small penalty is
			
 
				+subtracted for the number of steps to reach the goal. This environment is
			
 
				+useful, with small rooms, to validate that your RL algorithm works
			
 
				+correctly, and with large rooms to experiment with sparse rewards and
			
 
				+exploration. The random variants of the environment have the agent starting
			
 
				+at a random position for each episode, while the regular variants have the
			
 
				+agent always starting in the corner opposite to the goal.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"get to the green goal square"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action       |
			
 
				+|-----|--------------|--------------|
			
 
				+| 0   | left         | Turn left    |
			
 
				+| 1   | right        | Turn right   |
			
 
				+| 2   | forward      | Move forward |
			
 
				+| 3   | pickup       | Unused       |
			
 
				+| 4   | drop         | Unused       |
			
 
				+| 5   | toggle       | Unused       |
			
 
				+| 6   | done         | Unused       |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-Empty-5x5-v0`
			
 
				+- `MiniGrid-Empty-Random-5x5-v0`
			
 
				+- `MiniGrid-Empty-6x6-v0`
			
 
				+- `MiniGrid-Empty-Random-6x6-v0`
			
 
				+- `MiniGrid-Empty-8x8-v0`
			
 
				+- `MiniGrid-Empty-16x16-v0`
			
--- a/docs/environments/fetch.md
+++ b/docs/environments/fetch.md
@@ -0,0 +1,66 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Fetch
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Fetch
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment has multiple objects of assorted types and colors. The
			
 
				+agent receives a textual string as part of its observation telling it which
			
 
				+object to pick up. Picking up the wrong object terminates the episode with
			
 
				+zero reward.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"{syntax} {color} {type}"
			
 
				+
			
 
				+{syntax} is one of the following: "get a", "go get a", "fetch a",
			
 
				+"go fetch a", "you must fetch a".
			
 
				+
			
 
				+{color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				+"yellow" or "grey".
			
 
				+
			
 
				+{type} is the type of the object. Can be "key" or "ball".
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action               |
			
 
				+|-----|--------------|----------------------|
			
 
				+| 0   | left         | Turn left            |
			
 
				+| 1   | right        | Turn right           |
			
 
				+| 2   | forward      | Move forward         |
			
 
				+| 3   | pickup       | Pick up an object    |
			
 
				+| 4   | drop         | Unused               |
			
 
				+| 5   | toggle       | Unused               |
			
 
				+| 6   | done         | Unused               |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent picks up the correct object.
			
 
				+2. The agent picks up the wrong object.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+N: number of objects to be generated.
			
 
				+
			
 
				+- `MiniGrid-Fetch-5x5-N2-v0`
			
 
				+- `MiniGrid-Fetch-6x6-N2-v0`
			
 
				+- `MiniGrid-Fetch-8x8-N3-v0`
			
--- a/docs/environments/four_rooms.md
+++ b/docs/environments/four_rooms.md
@@ -0,0 +1,53 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Four Rooms
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Four Rooms
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+Classic four room reinforcement learning environment. The agent must
			
 
				+navigate in a maze composed of four rooms interconnected by 4 gaps in the
			
 
				+walls. To obtain a reward, the agent must reach the green goal square. Both
			
 
				+the agent and the goal square are randomly placed in any of the four rooms.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"reach the goal"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action       |
			
 
				+|-----|--------------|--------------|
			
 
				+| 0   | left         | Turn left    |
			
 
				+| 1   | right        | Turn right   |
			
 
				+| 2   | forward      | Move forward |
			
 
				+| 3   | pickup       | Unused       |
			
 
				+| 4   | drop         | Unused       |
			
 
				+| 5   | toggle       | Unused       |
			
 
				+| 6   | done         | Unused       |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-FourRooms-v0`
			
--- a/docs/environments/go_to_door.md
+++ b/docs/environments/go_to_door.md
@@ -0,0 +1,59 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Go To Door
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Go To Door
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment is a room with four doors, one on each wall. The agent
			
 
				+receives a textual (mission) string as input, telling it which door to go
			
 
				+to, (eg: "go to the red door"). It receives a positive reward for performing
			
 
				+the `done` action next to the correct door, as indicated in the mission
			
 
				+string.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"go to the {color} door"
			
 
				+
			
 
				+{color} is the color of the door. Can be "red", "green", "blue", "purple",
			
 
				+"yellow" or "grey".
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action               |
			
 
				+|-----|--------------|----------------------|
			
 
				+| 0   | left         | Turn left            |
			
 
				+| 1   | right        | Turn right           |
			
 
				+| 2   | forward      | Move forward         |
			
 
				+| 3   | pickup       | Unused               |
			
 
				+| 4   | drop         | Unused               |
			
 
				+| 5   | toggle       | Unused               |
			
 
				+| 6   | done         | Done completing task |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent stands next the correct door performing the `done` action.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-GoToDoor-5x5-v0`
			
 
				+- `MiniGrid-GoToDoor-6x6-v0`
			
 
				+- `MiniGrid-GoToDoor-8x8-v0`
			
--- a/docs/environments/go_to_object.md
+++ b/docs/environments/go_to_object.md
@@ -0,0 +1,10 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Go To Object
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Go To Object
			
 
				+
			
 
				+Environment in which the agent is instructed to go to a given object
			
 
				+named using an English text string
			
--- a/docs/environments/index.md
+++ b/docs/environments/index.md
@@ -0,0 +1,37 @@
 
				+---
			
 
				+firstpage:
			
 
				+lastpage:
			
 
				+---
			
 
				+
			
 
				+# Included environments
			
 
				+
			
 
				+The environments listed below are implemented in the [minigrid/envs](../../../../minigrid/envs) directory.
			
 
				+Each environment provides one or more configurations registered with OpenAI gym. Each environment
			
 
				+is also programmatically tunable in terms of size/complexity, which is useful for curriculum learning
			
 
				+or to fine-tune difficulty.
			
 
				+
			
 
				+```{toctree}
			
 
				+:hidden:
			
 
				+empty
			
 
				+four_rooms
			
 
				+door_key
			
 
				+multi_room
			
 
				+fetch
			
 
				+go_to_door
			
 
				+put_near
			
 
				+red_blue_doors
			
 
				+memory_s7
			
 
				+locked_room
			
 
				+key_corridor_s6_r3
			
 
				+unlock
			
 
				+unlock_pickup
			
 
				+blocked_unlock_pickup
			
 
				+obstructed_maze
			
 
				+dist_shift2
			
 
				+lava_gap_s7
			
 
				+simple_crossing_s11_n5
			
 
				+dynamic
			
 
				+go_to_object
			
 
				+playground
			
 
				+
			
 
				+```
			
--- a/docs/environments/key_corridor_s6_r3.md
+++ b/docs/environments/key_corridor_s6_r3.md
@@ -0,0 +1,69 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Key Corridor S6 R3
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Key Corridor S6 R3
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment is similar to the locked room environment, but there are
			
 
				+multiple registered environment configurations of increasing size,
			
 
				+making it easier to use curriculum learning to train an agent to solve it.
			
 
				+The agent has to pick up an object which is behind a locked door. The key is
			
 
				+hidden in another room, and the agent has to explore the environment to find
			
 
				+it. The mission string does not give the agent any clues as to where the
			
 
				+key is placed. This environment can be solved without relying on language.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"pick up the {color} {obj_type}"
			
 
				+
			
 
				+{color} is the color of the object. Can be "red", "green", "blue", "purple",
			
 
				+"yellow" or "grey".
			
 
				+
			
 
				+{type} is the type of the object. Can be "ball" or "key".
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action            |
			
 
				+|-----|--------------|-------------------|
			
 
				+| 0   | left         | Turn left         |
			
 
				+| 1   | right        | Turn right        |
			
 
				+| 2   | forward      | Move forward      |
			
 
				+| 3   | pickup       | Pick up an object |
			
 
				+| 4   | drop         | Unused            |
			
 
				+| 5   | toggle       | Unused            |
			
 
				+| 6   | done         | Unused            |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent picks up the correct object.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+S: room size.
			
 
				+R: Number of rows.
			
 
				+
			
 
				+- `MiniGrid-KeyCorridorS3R1-v0`
			
 
				+- `MiniGrid-KeyCorridorS3R2-v0`
			
 
				+- `MiniGrid-KeyCorridorS3R3-v0`
			
 
				+- `MiniGrid-KeyCorridorS4R3-v0`
			
 
				+- `MiniGrid-KeyCorridorS5R3-v0`
			
 
				+- `MiniGrid-KeyCorridorS6R3-v0`
			
--- a/docs/environments/lava_gap_s7.md
+++ b/docs/environments/lava_gap_s7.md
@@ -0,0 +1,60 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Lava Gap S7
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Lava Gap S7
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+The agent has to reach the green goal square at the opposite corner of the
			
 
				+room, and must pass through a narrow gap in a vertical strip of deadly lava.
			
 
				+Touching the lava terminate the episode with a zero reward. This environment
			
 
				+is useful for studying safety and safe exploration.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+Depending on the `obstacle_type` parameter:
			
 
				+- `Lava`: "avoid the lava and get to the green goal square"
			
 
				+- otherwise: "find the opening and get to the green goal square"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action       |
			
 
				+|-----|--------------|--------------|
			
 
				+| 0   | left         | Turn left    |
			
 
				+| 1   | right        | Turn right   |
			
 
				+| 2   | forward      | Move forward |
			
 
				+| 3   | pickup       | Unused       |
			
 
				+| 4   | drop         | Unused       |
			
 
				+| 5   | toggle       | Unused       |
			
 
				+| 6   | done         | Unused       |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. The agent falls into lava.
			
 
				+3. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+S: size of map SxS.
			
 
				+
			
 
				+- `MiniGrid-LavaGapS5-v0`
			
 
				+- `MiniGrid-LavaGapS6-v0`
			
 
				+- `MiniGrid-LavaGapS7-v0`
			
--- a/docs/environments/locked_room.md
+++ b/docs/environments/locked_room.md
@@ -0,0 +1,57 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Locked Room
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Locked Room
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+The environment has six rooms, one of which is locked. The agent receives
			
 
				+a textual mission string as input, telling it which room to go to in order
			
 
				+to get the key that opens the locked room. It then has to go into the locked
			
 
				+room in order to reach the final goal. This environment is extremely
			
 
				+difficult to solve with vanilla reinforcement learning alone.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
			
 
				+
			
 
				+{lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
			
 
				+"blue", "purple", "yellow" or "grey".
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action                    |
			
 
				+|-----|--------------|---------------------------|
			
 
				+| 0   | left         | Turn left                 |
			
 
				+| 1   | right        | Turn right                |
			
 
				+| 2   | forward      | Move forward              |
			
 
				+| 3   | pickup       | Pick up an object         |
			
 
				+| 4   | drop         | Unused                    |
			
 
				+| 5   | toggle       | Toggle/activate an object |
			
 
				+| 6   | done         | Unused                    |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-LockedRoom-v0`
			
--- a/docs/environments/memory_s7.md
+++ b/docs/environments/memory_s7.md
@@ -0,0 +1,60 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Memory S7
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Memory S7
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment is a memory test. The agent starts in a small room where it
			
 
				+sees an object. It then has to go through a narrow hallway which ends in a
			
 
				+split. At each end of the split there is an object, one of which is the same
			
 
				+as the object in the starting room. The agent has to remember the initial
			
 
				+object, and go to the matching object at split.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"go to the matching object at the end of the hallway"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action                    |
			
 
				+|-----|--------------|---------------------------|
			
 
				+| 0   | left         | Turn left                 |
			
 
				+| 1   | right        | Turn right                |
			
 
				+| 2   | forward      | Move forward              |
			
 
				+| 3   | pickup       | Pick up an object         |
			
 
				+| 4   | drop         | Unused                    |
			
 
				+| 5   | toggle       | Toggle/activate an object |
			
 
				+| 6   | done         | Unused                    |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the correct matching object.
			
 
				+2. The agent reaches the wrong matching object.
			
 
				+3. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+S: size of map SxS.
			
 
				+
			
 
				+- `MiniGrid-MemoryS17Random-v0`
			
 
				+- `MiniGrid-MemoryS13Random-v0`
			
 
				+- `MiniGrid-MemoryS13-v0`
			
 
				+- `MiniGrid-MemoryS11-v0`
			
--- a/docs/environments/multi_room.md
+++ b/docs/environments/multi_room.md
@@ -0,0 +1,59 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Multi Room
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Multi Room
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+This environment has a series of connected rooms with doors that must be
			
 
				+opened in order to get to the next room. The final room has the green goal
			
 
				+square the agent must get to. This environment is extremely difficult to
			
 
				+solve using RL alone. However, by gradually increasing the number of rooms
			
 
				+and building a curriculum, the environment can be solved.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"traverse the rooms to get to the goal"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action                    |
			
 
				+|-----|--------------|---------------------------|
			
 
				+| 0   | left         | Turn left                 |
			
 
				+| 1   | right        | Turn right                |
			
 
				+| 2   | forward      | Move forward              |
			
 
				+| 3   | pickup       | Unused                    |
			
 
				+| 4   | drop         | Unused                    |
			
 
				+| 5   | toggle       | Toggle/activate an object |
			
 
				+| 6   | done         | Unused                    |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+S: size of map SxS.
			
 
				+N: number of rooms.
			
 
				+
			
 
				+- `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
			
 
				+- `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
			
 
				+- `MiniGrid-MultiRoom-N6-v0` (six rooms)
			
--- a/docs/environments/obstructed_maze.md
+++ b/docs/environments/obstructed_maze.md
@@ -0,0 +1,11 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Obstructed Maze
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Obstructed Maze
			
 
				+
			
 
				+A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
			
 
				+are locked, doors are obstructed by a ball and keys are hidden in
			
 
				+boxes.
			
--- a/docs/environments/playground.md
+++ b/docs/environments/playground.md
@@ -0,0 +1,10 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Playground
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Playground
			
 
				+
			
 
				+Environment with multiple rooms and random objects.
			
 
				+This environment has no specific goals or rewards.
			
--- a/docs/environments/put_near.md
+++ b/docs/environments/put_near.md
@@ -0,0 +1,62 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Put Near
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Put Near
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+The agent is instructed through a textual string to pick up an object and
			
 
				+place it next to another object. This environment is easy to solve with two
			
 
				+objects, but difficult to solve with more, as it involves both textual
			
 
				+understanding and spatial reasoning involving multiple objects.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"put the {move_color} {move_type} near the {target_color} {target_type}"
			
 
				+
			
 
				+{move_color} and {target_color} can be "red", "green", "blue", "purple",
			
 
				+"yellow" or "grey".
			
 
				+
			
 
				+{move_type} and {target_type} Can be "box", "ball" or "key".
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action            |
			
 
				+|-----|--------------|-------------------|
			
 
				+| 0   | left         | Turn left         |
			
 
				+| 1   | right        | Turn right        |
			
 
				+| 2   | forward      | Move forward      |
			
 
				+| 3   | pickup       | Pick up an object |
			
 
				+| 4   | drop         | Drop an object    |
			
 
				+| 5   | toggle       | Unused            |
			
 
				+| 6   | done         | Unused            |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent picks up the wrong object.
			
 
				+2. The agent drop the correct object near the target.
			
 
				+3. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+N: number of objects.
			
 
				+
			
 
				+- `MiniGrid-PutNear-6x6-N2-v0`
			
 
				+- `MiniGrid-PutNear-8x8-N3-v0`
			
--- a/docs/environments/red_blue_doors.md
+++ b/docs/environments/red_blue_doors.md
@@ -0,0 +1,55 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Red Blue Doors
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Red Blue Doors
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+The agent is randomly placed within a room with one red and one blue door
			
 
				+facing opposite directions. The agent has to open the red door and then open
			
 
				+the blue door, in that order. Note that, surprisingly, this environment is
			
 
				+solvable without memory.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"open the red door then the blue door"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action                    |
			
 
				+|-----|--------------|---------------------------|
			
 
				+| 0   | left         | Turn left                 |
			
 
				+| 1   | right        | Turn right                |
			
 
				+| 2   | forward      | Move forward              |
			
 
				+| 3   | pickup       | Unused                    |
			
 
				+| 4   | drop         | Unused                    |
			
 
				+| 5   | toggle       | Toggle/activate an object |
			
 
				+| 6   | done         | Unused                    |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent opens the blue door having already opened the red door.
			
 
				+2. The agent opens the blue door without having opened the red door yet.
			
 
				+3. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-RedBlueDoors-6x6-v0`
			
 
				+- `MiniGrid-RedBlueDoors-8x8-v0`
			
--- a/docs/environments/simple_crossing_s11_n5.md
+++ b/docs/environments/simple_crossing_s11_n5.md
@@ -0,0 +1,76 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Simple Crossing S11 N5
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Simple Crossing S11 N5
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+Depending on the `obstacle_type` parameter:
			
 
				+- `Lava` - The agent has to reach the green goal square on the other corner
			
 
				+    of the room while avoiding rivers of deadly lava which terminate the
			
 
				+    episode in failure. Each lava stream runs across the room either
			
 
				+    horizontally or vertically, and has a single crossing point which can be
			
 
				+    safely used; Luckily, a path to the goal is guaranteed to exist. This
			
 
				+    environment is useful for studying safety and safe exploration.
			
 
				+- otherwise - Similar to the `LavaCrossing` environment, the agent has to
			
 
				+    reach the green goal square on the other corner of the room, however
			
 
				+    lava is replaced by walls. This MDP is therefore much easier and maybe
			
 
				+    useful for quickly testing your algorithms.
			
 
				+
			
 
				+### Mission Space
			
 
				+Depending on the `obstacle_type` parameter:
			
 
				+- `Lava` - "avoid the lava and get to the green goal square"
			
 
				+- otherwise - "find the opening and get to the green goal square"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action       |
			
 
				+|-----|--------------|--------------|
			
 
				+| 0   | left         | Turn left    |
			
 
				+| 1   | right        | Turn right   |
			
 
				+| 2   | forward      | Move forward |
			
 
				+| 3   | pickup       | Unused       |
			
 
				+| 4   | drop         | Unused       |
			
 
				+| 5   | toggle       | Unused       |
			
 
				+| 6   | done         | Unused       |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent reaches the goal.
			
 
				+2. The agent falls into lava.
			
 
				+3. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+S: size of the map SxS.
			
 
				+N: number of valid crossings across lava or walls from the starting position
			
 
				+to the goal
			
 
				+
			
 
				+- `Lava` :
			
 
				+    - `MiniGrid-LavaCrossingS9N1-v0`
			
 
				+    - `MiniGrid-LavaCrossingS9N2-v0`
			
 
				+    - `MiniGrid-LavaCrossingS9N3-v0`
			
 
				+    - `MiniGrid-LavaCrossingS11N5-v0`
			
 
				+
			
 
				+- otherwise :
			
 
				+    - `MiniGrid-SimpleCrossingS9N1-v0`
			
 
				+    - `MiniGrid-SimpleCrossingS9N2-v0`
			
 
				+    - `MiniGrid-SimpleCrossingS9N3-v0`
			
 
				+    - `MiniGrid-SimpleCrossingS11N5-v0`
			
--- a/docs/environments/unlock.md
+++ b/docs/environments/unlock.md
@@ -0,0 +1,51 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Unlock
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Unlock
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+The agent has to open a locked door. This environment can be solved without
			
 
				+relying on language.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"open the door"
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action                    |
			
 
				+|-----|--------------|---------------------------|
			
 
				+| 0   | left         | Turn left                 |
			
 
				+| 1   | right        | Turn right                |
			
 
				+| 2   | forward      | Move forward              |
			
 
				+| 3   | pickup       | Unused                    |
			
 
				+| 4   | drop         | Unused                    |
			
 
				+| 5   | toggle       | Toggle/activate an object |
			
 
				+| 6   | done         | Unused                    |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent opens the door.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-Unlock-v0`
			
--- a/docs/environments/unlock_pickup.md
+++ b/docs/environments/unlock_pickup.md
@@ -0,0 +1,54 @@
 
				+---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: Unlock Pickup
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# Unlock Pickup
			
 
				+
			
 
				+### Description
			
 
				+
			
 
				+The agent has to pick up a box which is placed in another room, behind a
			
 
				+locked door. This environment can be solved without relying on language.
			
 
				+
			
 
				+### Mission Space
			
 
				+
			
 
				+"pick up the {color} box"
			
 
				+
			
 
				+{color} is the color of the box. Can be "red", "green", "blue", "purple",
			
 
				+"yellow" or "grey".
			
 
				+
			
 
				+### Action Space
			
 
				+
			
 
				+| Num | Name         | Action                    |
			
 
				+|-----|--------------|---------------------------|
			
 
				+| 0   | left         | Turn left                 |
			
 
				+| 1   | right        | Turn right                |
			
 
				+| 2   | forward      | Move forward              |
			
 
				+| 3   | pickup       | Pick up an object         |
			
 
				+| 4   | drop         | Unused                    |
			
 
				+| 5   | toggle       | Toggle/activate an object |
			
 
				+| 6   | done         | Unused                    |
			
 
				+
			
 
				+### Observation Encoding
			
 
				+
			
 
				+- Each tile is encoded as a 3 dimensional tuple:
			
 
				+    `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
			
 
				+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+### Rewards
			
 
				+
			
 
				+A reward of '1' is given for success, and '0' for failure.
			
 
				+
			
 
				+### Termination
			
 
				+
			
 
				+The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+1. The agent picks up the correct box.
			
 
				+2. Timeout (see `max_steps`).
			
 
				+
			
 
				+### Registered Configurations
			
 
				+
			
 
				+- `MiniGrid-Unlock-v0`
			
--- a/docs/index.md
+++ b/docs/index.md
@@ -0,0 +1,65 @@
 
				+---
			
 
				+hide-toc: true
			
 
				+firstpage:
			
 
				+lastpage:
			
 
				+---
			
 
				+
			
 
				+# MiniGrid is a simple and easily configurable grid world environments for reinforcement learning
			
 
				+
			
 
				+
			
 
				+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) 
			
 
				+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
			
 
				+
			
 
				+There are other gridworld Gym environments out there, but this one is
			
 
				+designed to be particularly simple, lightweight and fast. The code has very few
			
 
				+dependencies, making it less likely to break or fail to install. It loads no
			
 
				+external sprites/textures, and it can run at up to 5000 FPS on a Core i7
			
 
				+laptop, which means you can run your experiments faster. A known-working RL
			
 
				+implementation can be found [in this repository](https://github.com/lcswillems/torch-rl).
			
 
				+
			
 
				+Requirements:
			
 
				+- Python 3.7 to 3.10
			
 
				+- OpenAI Gym v0.26
			
 
				+- NumPy 1.18+
			
 
				+- Matplotlib (optional, only needed for display) - 3.0+
			
 
				+
			
 
				+Please use this bibtex if you want to cite this repository in your publications:
			
 
				+
			
 
				+```
			
 
				+@misc{minigrid,
			
 
				+  author = {Chevalier-Boisvert, Maxime and Willems, Lucas and Pal, Suman},
			
 
				+  title = {Minimalistic Gridworld Environment for Gymnasium},
			
 
				+  year = {2018},
			
 
				+  publisher = {GitHub},
			
 
				+  journal = {GitHub repository},
			
 
				+  howpublished = {\url{https://github.com/Farama-Foundation/MiniGrid}},
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+```{toctree}
			
 
				+:hidden:
			
 
				+:caption: Introduction
			
 
				+
			
 
				+content/installation
			
 
				+content/basic_usage
			
 
				+api/wrappers
			
 
				+content/pubs
			
 
				+```
			
 
				+
			
 
				+
			
 
				+```{toctree}
			
 
				+:hidden:
			
 
				+:caption: Environments
			
 
				+
			
 
				+environments/design
			
 
				+environments/index
			
 
				+```
			
 
				+
			
 
				+```{toctree}
			
 
				+:hidden:
			
 
				+:caption: Development
			
 
				+
			
 
				+Github <https://github.com/Farama-Foundation/MiniGrid>
			
 
				+Donate <https://farama.org/donations>
			
 
				+Contribute to the Docs <404>
			
 
				+```
			
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
 
				+@ECHO OFF
			
 
				+
			
 
				+pushd %~dp0
			
 
				+
			
 
				+REM Command file for Sphinx documentation
			
 
				+
			
 
				+if "%SPHINXBUILD%" == "" (
			
 
				+	set SPHINXBUILD=sphinx-build
			
 
				+)
			
 
				+set SOURCEDIR=source
			
 
				+set BUILDDIR=_build
			
 
				+
			
 
				+if "%1" == "" goto help
			
 
				+
			
 
				+%SPHINXBUILD% >NUL 2>NUL
			
 
				+if errorlevel 9009 (
			
 
				+	echo.
			
 
				+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
			
 
				+	echo.installed, then set the SPHINXBUILD environment variable to point
			
 
				+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
			
 
				+	echo.may add the Sphinx directory to PATH.
			
 
				+	echo.
			
 
				+	echo.If you don't have Sphinx installed, grab it from
			
 
				+	echo.https://www.sphinx-doc.org/
			
 
				+	exit /b 1
			
 
				+)
			
 
				+
			
 
				+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
			
 
				+goto end
			
 
				+
			
 
				+:help
			
 
				+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
			
 
				+
			
 
				+:end
			
 
				+popd
			
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -0,0 +1,7 @@
 
				+sphinx==4.2.0
			
 
				+myst-parser
			
 
				+furo==2022.6.21
			
 
				+moviepy
			
 
				+pygame
			
 
				+pygments==2.11.2
			
 
				+sphinx-notfound-page
			
--- a/docs/scripts/gen_mds.py
+++ b/docs/scripts/gen_mds.py
@@ -0,0 +1,78 @@
 
				+__author__ = "Feng Gu"
			
 
				+__email__ = "contact@fenggu.me"
			
 
				+
			
 
				+"""
			
 
				+   isort:skip_file
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import re
			
 
				+
			
 
				+from gym.envs.registration import registry
			
 
				+from tqdm import tqdm
			
 
				+
			
 
				+from utils import trim
			
 
				+
			
 
				+LAYOUT = "env"
			
 
				+
			
 
				+pattern = re.compile(r"(?<!^)(?=[A-Z])")
			
 
				+
			
 
				+all_envs = list(registry.values())
			
 
				+
			
 
				+filtered_envs_by_type = {}
			
 
				+
			
 
				+# Obtain filtered list
			
 
				+for env_spec in tqdm(all_envs):
			
 
				+    # gym_minigrid.envs:Env
			
 
				+    split = env_spec.entry_point.split(".")
			
 
				+    # ignore gymnasium.envs.env_type:Env
			
 
				+    env_module = split[0]
			
 
				+    if env_module != "gym_minigrid":
			
 
				+        continue
			
 
				+
			
 
				+    env_name = split[1]
			
 
				+    filtered_envs_by_type[env_name] = env_spec
			
 
				+
			
 
				+
			
 
				+filtered_envs = {
			
 
				+    k.split(":")[1]: v
			
 
				+    for k, v in sorted(
			
 
				+        filtered_envs_by_type.items(),
			
 
				+        key=lambda item: item[1].entry_point.split(".")[1],
			
 
				+    )
			
 
				+}
			
 
				+
			
 
				+for env_name, env_spec in filtered_envs.items():
			
 
				+    made = env_spec.make()
			
 
				+
			
 
				+    docstring = trim(made.unwrapped.__doc__)
			
 
				+
			
 
				+    pascal_env_name = env_spec.id.split("-")[1]
			
 
				+    snake_env_name = pattern.sub("_", pascal_env_name).lower()
			
 
				+    title_env_name = snake_env_name.replace("_", " ").title()
			
 
				+
			
 
				+    v_path = os.path.join(
			
 
				+        os.path.dirname(os.path.dirname(__file__)),
			
 
				+        "source",
			
 
				+        "environments",
			
 
				+        snake_env_name + ".md",
			
 
				+    )
			
 
				+
			
 
				+    front_matter = f"""---
			
 
				+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
			
 
				+title: {title_env_name}
			
 
				+---
			
 
				+"""
			
 
				+    title = f"# {title_env_name}"
			
 
				+
			
 
				+    if docstring is None:
			
 
				+        docstring = "No information provided"
			
 
				+    all_text = f"""{front_matter}
			
 
				+
			
 
				+{title}
			
 
				+
			
 
				+{docstring}
			
 
				+"""
			
 
				+    file = open(v_path, "w+", encoding="utf-8")
			
 
				+    file.write(all_text)
			
 
				+    file.close()
			
--- a/docs/scripts/move404.py
+++ b/docs/scripts/move404.py
@@ -0,0 +1,14 @@
 
				+import sys
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    if len(sys.argv) < 2:
			
 
				+        print("Provide a path")
			
 
				+    filePath = sys.argv[1]
			
 
				+
			
 
				+    with open(filePath, "r+") as fp:
			
 
				+        content = fp.read()
			
 
				+        content = content.replace('href="../', 'href="/').replace('src="../', 'src="/')
			
 
				+        fp.seek(0)
			
 
				+        fp.truncate()
			
 
				+
			
 
				+        fp.write(content)
			
--- a/docs/scripts/utils.py
+++ b/docs/scripts/utils.py
@@ -0,0 +1,44 @@
 
				+# stolen from python docs
			
 
				+def trim(docstring):
			
 
				+    if not docstring:
			
 
				+        return ""
			
 
				+    # Convert tabs to spaces (following the normal Python rules)
			
 
				+    # and split into a list of lines:
			
 
				+    lines = docstring.expandtabs().splitlines()
			
 
				+    # Determine minimum indentation (first line doesn't count):
			
 
				+    indent = 232323
			
 
				+    for line in lines[1:]:
			
 
				+        stripped = line.lstrip()
			
 
				+        if stripped:
			
 
				+            indent = min(indent, len(line) - len(stripped))
			
 
				+    # Remove indentation (first line is special):
			
 
				+    trimmed = [lines[0].strip()]
			
 
				+    if indent < 232323:
			
 
				+        for line in lines[1:]:
			
 
				+            trimmed.append(line[indent:].rstrip())
			
 
				+    # Strip off trailing and leading blank lines:
			
 
				+    while trimmed and not trimmed[-1]:
			
 
				+        trimmed.pop()
			
 
				+    while trimmed and not trimmed[0]:
			
 
				+        trimmed.pop(0)
			
 
				+    # Return a single string:
			
 
				+    return "\n".join(trimmed)
			
 
				+
			
 
				+
			
 
				+# dont want envs which contain these
			
 
				+kill_strs = [
			
 
				+    "eterministic",
			
 
				+    "ALE",
			
 
				+    "-ram",
			
 
				+    "Frameskip",
			
 
				+    "Hard",
			
 
				+    "LanderContinu",
			
 
				+    "8x8",
			
 
				+    "uessing",
			
 
				+    "otter",
			
 
				+    "oinflip",
			
 
				+    "hain",
			
 
				+    "oulette",
			
 
				+    "DomainRandom",
			
 
				+    "RacingDiscrete",
			
 
				+]