Browse Source

Documentations (#242)

Feng 2 years ago
parent
commit
f2a30bd1b2
92 changed files with 2893 additions and 0 deletions
  1. 41 0
      .github/workflows/gh-pages.yml
  2. 8 0
      .gitignore
  3. 1 0
      CNAME
  4. 6 0
      docs/.gitignore
  5. 8 0
      docs/404.md
  6. 21 0
      docs/LICENSE
  7. 20 0
      docs/Makefile
  8. 40 0
      docs/README.md
  9. 53 0
      docs/_static/css/custom.css
  10. 35 0
      docs/_static/css/env_pages.css
  11. 147 0
      docs/_static/img/favicon.svg
  12. BIN
      docs/_static/img/figures/BlockedUnlockPickup.png
  13. BIN
      docs/_static/img/figures/DistShift1.png
  14. BIN
      docs/_static/img/figures/DistShift2.png
  15. BIN
      docs/_static/img/figures/KeyCorridorS3R1.png
  16. BIN
      docs/_static/img/figures/KeyCorridorS3R2.png
  17. BIN
      docs/_static/img/figures/KeyCorridorS3R3.png
  18. BIN
      docs/_static/img/figures/KeyCorridorS4R3.png
  19. BIN
      docs/_static/img/figures/KeyCorridorS5R3.png
  20. BIN
      docs/_static/img/figures/KeyCorridorS6R3.png
  21. BIN
      docs/_static/img/figures/LavaCrossingS11N5.png
  22. BIN
      docs/_static/img/figures/LavaCrossingS9N1.png
  23. BIN
      docs/_static/img/figures/LavaCrossingS9N2.png
  24. BIN
      docs/_static/img/figures/LavaCrossingS9N3.png
  25. BIN
      docs/_static/img/figures/LavaGapS6.png
  26. BIN
      docs/_static/img/figures/ObstructedMaze-1Dl.png
  27. BIN
      docs/_static/img/figures/ObstructedMaze-1Dlh.png
  28. BIN
      docs/_static/img/figures/ObstructedMaze-1Dlhb.png
  29. BIN
      docs/_static/img/figures/ObstructedMaze-1Q.png
  30. BIN
      docs/_static/img/figures/ObstructedMaze-2Dl.png
  31. BIN
      docs/_static/img/figures/ObstructedMaze-2Dlh.png
  32. BIN
      docs/_static/img/figures/ObstructedMaze-2Dlhb.png
  33. BIN
      docs/_static/img/figures/ObstructedMaze-2Q.png
  34. BIN
      docs/_static/img/figures/ObstructedMaze-4Q.png
  35. BIN
      docs/_static/img/figures/SimpleCrossingS11N5.png
  36. BIN
      docs/_static/img/figures/SimpleCrossingS9N1.png
  37. BIN
      docs/_static/img/figures/SimpleCrossingS9N2.png
  38. BIN
      docs/_static/img/figures/SimpleCrossingS9N3.png
  39. BIN
      docs/_static/img/figures/Unlock.png
  40. BIN
      docs/_static/img/figures/UnlockPickup.png
  41. BIN
      docs/_static/img/figures/door-key-curriculum.gif
  42. BIN
      docs/_static/img/figures/door-key-env.png
  43. BIN
      docs/_static/img/figures/dynamic_obstacles.gif
  44. BIN
      docs/_static/img/figures/empty-env.png
  45. BIN
      docs/_static/img/figures/fetch-env.png
  46. BIN
      docs/_static/img/figures/four-rooms-env.png
  47. BIN
      docs/_static/img/figures/gotodoor-6x6.mp4
  48. BIN
      docs/_static/img/figures/gotodoor-6x6.png
  49. BIN
      docs/_static/img/figures/multi-room.gif
  50. 1 0
      docs/_static/img/github_icon.svg
  51. 130 0
      docs/_static/img/minigrid-github.svg
  52. 185 0
      docs/_static/img/minigrid-text.svg
  53. BIN
      docs/_static/img/minigrid-white.png
  54. 151 0
      docs/_static/img/minigrid-white.svg
  55. BIN
      docs/_static/img/minigrid.png
  56. 147 0
      docs/_static/img/minigrid.svg
  57. 42 0
      docs/_templates/base.html
  58. 207 0
      docs/_templates/page.html
  59. 33 0
      docs/api/wrappers.md
  60. 86 0
      docs/conf.py
  61. 32 0
      docs/content/basic_usage.md
  62. 21 0
      docs/content/installation.md
  63. 50 0
      docs/content/pubs.md
  64. 59 0
      docs/environments/blocked_unlock_pickup.md
  65. 39 0
      docs/environments/design.md
  66. 58 0
      docs/environments/dist_shift2.md
  67. 56 0
      docs/environments/door_key.md
  68. 62 0
      docs/environments/dynamic.md
  69. 62 0
      docs/environments/empty.md
  70. 66 0
      docs/environments/fetch.md
  71. 53 0
      docs/environments/four_rooms.md
  72. 59 0
      docs/environments/go_to_door.md
  73. 10 0
      docs/environments/go_to_object.md
  74. 37 0
      docs/environments/index.md
  75. 69 0
      docs/environments/key_corridor_s6_r3.md
  76. 60 0
      docs/environments/lava_gap_s7.md
  77. 57 0
      docs/environments/locked_room.md
  78. 60 0
      docs/environments/memory_s7.md
  79. 59 0
      docs/environments/multi_room.md
  80. 11 0
      docs/environments/obstructed_maze.md
  81. 10 0
      docs/environments/playground.md
  82. 62 0
      docs/environments/put_near.md
  83. 55 0
      docs/environments/red_blue_doors.md
  84. 76 0
      docs/environments/simple_crossing_s11_n5.md
  85. 51 0
      docs/environments/unlock.md
  86. 54 0
      docs/environments/unlock_pickup.md
  87. 65 0
      docs/index.md
  88. 35 0
      docs/make.bat
  89. 7 0
      docs/requirements.txt
  90. 78 0
      docs/scripts/gen_mds.py
  91. 14 0
      docs/scripts/move404.py
  92. 44 0
      docs/scripts/utils.py

+ 41 - 0
.github/workflows/gh-pages.yml

@@ -0,0 +1,41 @@
+name: Deploy Docs
+on:
+  push:
+    branches: [master]
+
+permissions:
+  contents: write
+
+jobs:
+  docs:
+    name: Generate Website
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+
+      - name: Install dependencies
+        run: pip install -r docs/requirements.txt && git clone https://github.com/Farama-Foundation/MiniGrid.git && pip install ./MiniGrid
+
+      - name: Build Envs Docs
+        run: python docs/scripts/gen_mds.py
+
+      - name: Build
+        run: sphinx-build -b dirhtml -v docs _build
+        
+      - name: Move 404
+        run: mv _build/404/index.html _build/404.html
+
+      - name: Update 404 links
+        run: python docs/scripts/move404.py _build/404.html
+
+      - name: Remove .doctrees
+        run: rm -r _build/.doctrees
+
+      - name: Upload to GitHub Pages
+        uses: JamesIves/github-pages-deploy-action@v4
+        with:
+          folder: _build
+          clean-exclude: | 
+            *.*.*/

+ 8 - 0
.gitignore

@@ -7,3 +7,11 @@ trained_models
 build/*
 dist/*
 .idea/
+
+#docs
+_build/*
+.DS_Store
+_site
+.jekyll-cache
+__pycache__
+.vscode/

+ 1 - 0
CNAME

@@ -0,0 +1 @@
+fenggu.me

+ 6 - 0
docs/.gitignore

@@ -0,0 +1,6 @@
+.DS_Store
+_site
+.jekyll-cache
+__pycache__
+_build/
+.vscode/

+ 8 - 0
docs/404.md

@@ -0,0 +1,8 @@
+---
+hide-toc: true
+:orphan:
+---
+
+# 404
+
+## Page Not Found

+ 21 - 0
docs/LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Farama Foundation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 20 - 0
docs/Makefile

@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

+ 40 - 0
docs/README.md

@@ -0,0 +1,40 @@
+# MiniGrid-docs
+
+
+This repo contains the [NEW website]() for [MiniGrid](https://github.com/Farama-Foundation/MiniGrid). This site is currently in Beta and we are in the process of adding/editing information. 
+
+
+The documentation uses Sphinx. However, the documentation is written in regular md, NOT rst.
+
+If you are modifying a non-environment page or an atari environment page, please PR this repo. Otherwise, follow the steps below:
+
+## Instructions for modifying environment pages
+
+### Editing an environment page
+
+If you are editing an Atari environment, directly edit the md file in this repository. 
+
+Otherwise, fork Gym and edit the docstring in the environment's Python file. Then, pip install your Gym fork and run `docs/scripts/gen_mds.py` in this repo. This will automatically generate a md documentation file for the environment.
+
+## Build the Documentation
+
+Install the required packages and Gym (or your fork):
+
+```
+pip install -r requirements.txt
+pip install gym
+```
+
+To build the documentation once:
+
+```
+cd docs
+make dirhtml
+```
+
+To rebuild the documentation automatically every time a change is made:
+
+```
+cd docs
+sphinx-autobuild -b dirhtml ./source build/html
+```

+ 53 - 0
docs/_static/css/custom.css

@@ -0,0 +1,53 @@
+h1 {
+    font-size: 2.25rem;
+}
+
+h2 {
+    font-size: 1.75rem;
+}
+
+h3 {
+    font-size: 1.45rem;
+}
+
+.cookie_alert {
+    position: fixed;
+    display: flex;
+    width: 100%;
+    min-height: 70px;
+    background-color: var(--color-background-secondary);
+    color: var(--color-foreground-primary);
+    border-top: 1px solid var(--color-background-border);
+    bottom: 0;
+    left: 0;
+    z-index: 99999;
+}
+
+.cookie_container {
+    display: flex;
+    align-items: center;
+    width: 700px;
+    max-width: calc(100% - 28px);
+    margin: auto;
+}
+
+.cookie_alert p {
+    flex: 1;
+}
+
+.cookie_alert button {
+    background-color: transparent;
+    border: none;
+    width: 26px;
+    height: 26px;
+    padding: 0;
+    cursor: pointer;
+}
+
+.cookie_alert button svg .cls-1 {
+     stroke: var(--color-foreground-primary);
+}
+
+.cookie_alert button svg {
+    width: 100%;
+}

+ 35 - 0
docs/_static/css/env_pages.css

@@ -0,0 +1,35 @@
+.env-grid {
+    display: flex;
+    flex-wrap: wrap;
+    justify-content: center;
+    width: 100%;
+    box-sizing: border-box;
+}
+.env-grid__cell {
+    display: flex;
+    flex-direction: column;
+    width: 180px;
+    height: 180px;
+    padding: 10px;
+}
+.cell__image-container {
+    display: flex;
+    height: 148px;
+    justify-content: center;
+}
+.cell__image-container img {
+    max-height: 100%;
+}
+.cell__title {
+    display: flex;
+    justify-content: center;
+    text-align: center;
+    align-items: flex-end;
+    height: 32px;
+    line-height: 16px;
+}
+.more-btn {
+    width: 240px;
+    margin: 12px auto;
+    display: block;
+}

File diff suppressed because it is too large
+ 147 - 0
docs/_static/img/favicon.svg


BIN
docs/_static/img/figures/BlockedUnlockPickup.png


BIN
docs/_static/img/figures/DistShift1.png


BIN
docs/_static/img/figures/DistShift2.png


BIN
docs/_static/img/figures/KeyCorridorS3R1.png


BIN
docs/_static/img/figures/KeyCorridorS3R2.png


BIN
docs/_static/img/figures/KeyCorridorS3R3.png


BIN
docs/_static/img/figures/KeyCorridorS4R3.png


BIN
docs/_static/img/figures/KeyCorridorS5R3.png


BIN
docs/_static/img/figures/KeyCorridorS6R3.png


BIN
docs/_static/img/figures/LavaCrossingS11N5.png


BIN
docs/_static/img/figures/LavaCrossingS9N1.png


BIN
docs/_static/img/figures/LavaCrossingS9N2.png


BIN
docs/_static/img/figures/LavaCrossingS9N3.png


BIN
docs/_static/img/figures/LavaGapS6.png


BIN
docs/_static/img/figures/ObstructedMaze-1Dl.png


BIN
docs/_static/img/figures/ObstructedMaze-1Dlh.png


BIN
docs/_static/img/figures/ObstructedMaze-1Dlhb.png


BIN
docs/_static/img/figures/ObstructedMaze-1Q.png


BIN
docs/_static/img/figures/ObstructedMaze-2Dl.png


BIN
docs/_static/img/figures/ObstructedMaze-2Dlh.png


BIN
docs/_static/img/figures/ObstructedMaze-2Dlhb.png


BIN
docs/_static/img/figures/ObstructedMaze-2Q.png


BIN
docs/_static/img/figures/ObstructedMaze-4Q.png


BIN
docs/_static/img/figures/SimpleCrossingS11N5.png


BIN
docs/_static/img/figures/SimpleCrossingS9N1.png


BIN
docs/_static/img/figures/SimpleCrossingS9N2.png


BIN
docs/_static/img/figures/SimpleCrossingS9N3.png


BIN
docs/_static/img/figures/Unlock.png


BIN
docs/_static/img/figures/UnlockPickup.png


BIN
docs/_static/img/figures/door-key-curriculum.gif


BIN
docs/_static/img/figures/door-key-env.png


BIN
docs/_static/img/figures/dynamic_obstacles.gif


BIN
docs/_static/img/figures/empty-env.png


BIN
docs/_static/img/figures/fetch-env.png


BIN
docs/_static/img/figures/four-rooms-env.png


BIN
docs/_static/img/figures/gotodoor-6x6.mp4


BIN
docs/_static/img/figures/gotodoor-6x6.png


BIN
docs/_static/img/figures/multi-room.gif


File diff suppressed because it is too large
+ 1 - 0
docs/_static/img/github_icon.svg


File diff suppressed because it is too large
+ 130 - 0
docs/_static/img/minigrid-github.svg


File diff suppressed because it is too large
+ 185 - 0
docs/_static/img/minigrid-text.svg


BIN
docs/_static/img/minigrid-white.png


File diff suppressed because it is too large
+ 151 - 0
docs/_static/img/minigrid-white.svg


BIN
docs/_static/img/minigrid.png


File diff suppressed because it is too large
+ 147 - 0
docs/_static/img/minigrid.svg


+ 42 - 0
docs/_templates/base.html

@@ -0,0 +1,42 @@
+{% extends "furo/base.html" %}
+
+{%- block regular_scripts -%}
+{{ super() }}
+
+<script>
+    (() => {
+        if (!localStorage.getItem("shownCookieAlert")) {
+            const boxElem = document.createElement("div");
+            boxElem.classList.add("cookie_alert");
+            const containerElem = document.createElement("div");
+            containerElem.classList.add("cookie_container");
+            const textElem = document.createElement("p");
+            textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
+                                Google Analytics</a> to collect statistics. You can disable it by blocking
+                                the JavaScript coming from www.google-analytics.com.`;
+                                containerElem.appendChild(textElem);
+            const closeBtn = document.createElement("button");
+            closeBtn.innerHTML = `<?xml version="1.0" ?><svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg"><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-linejoin:round;stroke-width:2px;}</style></defs><title/><g id="cross"><line class="cls-1" x1="7" x2="25" y1="7" y2="25"/><line class="cls-1" x1="7" x2="25" y1="25" y2="7"/></g></svg>`
+            closeBtn.onclick = () => {
+                localStorage.setItem("shownCookieAlert", "true");
+                boxElem.style.display = "none";
+            }
+            containerElem.appendChild(closeBtn);
+            boxElem.appendChild(containerElem);
+            document.body.appendChild(boxElem);
+        }
+    })()
+
+</script>
+
+<!-- Google tag (gtag.js) -->
+<script async src="https://www.googletagmanager.com/gtag/js?id=G-JGXSLW7N06"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'G-JGXSLW7N06');
+</script>
+
+{%- endblock regular_scripts -%}

File diff suppressed because it is too large
+ 207 - 0
docs/_templates/page.html


+ 33 - 0
docs/api/wrappers.md

@@ -0,0 +1,33 @@
+---
+title: Wrappers
+lastpage:
+---
+
+# Wrappers
+
+MiniGrid is built to support tasks involving natural language and sparse rewards.
+The observations are dictionaries, with an 'image' field, partially observable
+view of the environment, a 'mission' field which is a textual string
+describing the objective the agent should reach to get a reward, and a 'direction'
+field which can be used as an optional compass. Using dictionaries makes it
+easy for you to add additional information to observations
+if you need to, without having to encode everything into a single tensor.
+
+There are a variety of wrappers to change the observation format available in [minigrid/wrappers.py](../../../../minigrid/wrappers.py). 
+If your RL code expects one single tensor for observations, take a look at `FlatObsWrapper`. 
+There is also an `ImgObsWrapper` that gets rid of the 'mission' field in observations, leaving only the image field tensor.
+
+Please note that the default observation format is a partially observable view of the environment using a
+compact and efficient encoding, with 3 input values per visible grid cell, 7x7x3 values total.
+These values are **not pixels**. If you want to obtain an array of RGB pixels as observations instead,
+use the `RGBImgPartialObsWrapper`. You can use it as follows:
+
+```python
+import gym
+from minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
+
+env = gym.make('MiniGrid-Empty-8x8-v0')
+env = RGBImgPartialObsWrapper(env) # Get pixel observations
+env = ImgObsWrapper(env) # Get rid of the 'mission' field
+obs, _ = env.reset() # This now produces an RGB tensor only
+```

+ 86 - 0
docs/conf.py

@@ -0,0 +1,86 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- Project information -----------------------------------------------------
+# TODO: change to minigrid version
+# from TODO import __version__ as minigrid_version
+
+import os
+import sys
+
+project = "MiniGrid"
+copyright = "2022"
+author = "Farama Foundation"
+
+# The full version, including alpha/beta/rc tags
+# TODO: change to minigrid version
+release = "1.2.1"
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.napoleon",
+    "sphinx.ext.doctest",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.githubpages",
+    "myst_parser",
+    "notfound.extension",
+]
+
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".md": "markdown",
+}
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# Napoleon settings
+napoleon_use_ivar = True
+napoleon_use_admonition_for_references = True
+# See https://github.com/sphinx-doc/sphinx/issues/9119
+napoleon_custom_sections = [("Returns", "params_style")]
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "furo"
+html_title = "MiniGrid Documentation"
+html_baseurl = "https://fenggu.me/"
+html_copy_source = False
+html_favicon = "_static/img/minigrid-white.png"
+html_theme_options = {
+    "light_logo": "img/minigrid.png",
+    "dark_logo": "img/minigrid-white.png",
+}
+html_static_path = ["_static"]
+html_css_files = [
+    "css/custom.css",
+]
+
+notfound_urls_prefix = "/MiniGrid/"

+ 32 - 0
docs/content/basic_usage.md

@@ -0,0 +1,32 @@
+---
+layout: "contents"
+title: Basic Usage
+firstpage:
+---
+
+# Basic Usage
+
+## Basic Usage
+
+There is a UI application which allows you to manually control the agent with the arrow keys:
+```
+./minigrid/manual_control.py
+```
+
+The environment being run can be selected with the `--env` option, eg:
+```
+./minigrid/manual_control.py --env MiniGrid-Empty-8x8-v0
+```
+
+
+## Reinforcement Learning
+
+If you want to train an agent with reinforcement learning, I recommend using the code found in the [torch-rl](https://github.com/lcswillems/torch-rl) repository. 
+This code has been tested and is known to work with this environment. The default hyper-parameters are also known to converge.
+
+A sample training command is:
+
+``` {code-block} python
+cd torch-rl
+python3 -m scripts.train --env MiniGrid-Empty-8x8-v0 --algo ppo
+```

+ 21 - 0
docs/content/installation.md

@@ -0,0 +1,21 @@
+---
+layout: "contents"
+title: Installation
+firstpage:
+---
+
+# Installation
+
+There is now a [pip package](https://pypi.org/project/minigrid/) available, which is updated periodically:
+
+```
+pip install gym-minigrid
+```
+
+Alternatively, to get the latest version of MiniGrid, you can clone this repository and install the dependencies with `pip3`:
+
+```
+git clone https://github.com/Farama-Foundation/MiniGrid
+cd MiniGrid
+pip install -e .
+```

+ 50 - 0
docs/content/pubs.md

@@ -0,0 +1,50 @@
+---
+layout: "contents"
+title: Publications
+firstpage:
+---
+
+# List of publications
+## List of publications & submissions using MiniGrid or BabyAI (please open a pull request to add missing entries):
+- [History Compression via Language Models in Reinforcement Learning.](https://proceedings.mlr.press/v162/paischer22a.html) (Johannes Kepler University Linz, PMLR 2022)
+- [Leveraging Approximate Symbolic Models for Reinforcement Learning via Skill Diversity](https://arxiv.org/abs/2202.02886) (Arizona State University, ICML 2022)
+- [How to Stay Curious while avoiding Noisy TVs using Aleatoric Uncertainty Estimation](https://proceedings.mlr.press/v162/mavor-parker22a.html) (University College London, Boston University, ICML 2022)
+- [In a Nutshell, the Human Asked for This: Latent Goals for Following Temporal Specifications](https://openreview.net/pdf?id=rUwm9wCjURV) (Imperial College London, ICLR 2022)
+- [Interesting Object, Curious Agent: Learning Task-Agnostic Exploration](https://arxiv.org/abs/2111.13119) (Meta AI Research, NeurIPS 2021)
+- [Safe Policy Optimization with Local Generalized Linear Function Approximations](https://arxiv.org/abs/2111.04894) (IBM Research, Tsinghua University, NeurIPS 2021)
+- [A Consciousness-Inspired Planning Agent for Model-Based Reinforcement Learning](https://arxiv.org/abs/2106.02097) (Mila, McGill University, NeurIPS 2021)
+- [SPOTTER: Extending Symbolic Planning Operators through Targeted Reinforcement Learning](http://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1118.pdf) (Tufts University, SIFT, AAMAS 2021)
+- [Grid-to-Graph: Flexible Spatial Relational Inductive Biases for Reinforcement Learning](https://arxiv.org/abs/2102.04220) (UCL, AAMAS 2021)
+- [Rank the Episodes: A Simple Approach for Exploration in Procedurally-Generated Environments](https://openreview.net/forum?id=MtEE0CktZht) (Texas A&M University, Kuai Inc., ICLR 2021)
+- [Adversarially Guided Actor-Critic](https://openreview.net/forum?id=_mQp5cr_iNy) (INRIA, Google Brain, ICLR 2021)
+- [Information-theoretic Task Selection for Meta-Reinforcement Learning](https://papers.nips.cc/paper/2020/file/ec3183a7f107d1b8dbb90cb3c01ea7d5-Paper.pdf) (University of Leeds, NeurIPS 2020)
+- [BeBold: Exploration Beyond the Boundary of Explored Regions](https://arxiv.org/pdf/2012.08621.pdf) (UCB, December 2020)
+- [Approximate Information State for Approximate Planning and Reinforcement Learning in Partially Observed Systems](https://arxiv.org/abs/2010.08843) (McGill, October 2020)
+- [Prioritized Level Replay](https://arxiv.org/pdf/2010.03934.pdf) (FAIR, October 2020)
+- [AllenAct: A Framework for Embodied AI Research](https://arxiv.org/pdf/2008.12760.pdf) (Allen Institute for AI, August 2020)
+- [Learning with AMIGO: Adversarially Motivated Intrinsic Goals](https://arxiv.org/pdf/2006.12122.pdf) (MIT, FAIR, ICLR 2021)
+- [RIDE: Rewarding Impact-Driven Exploration for Procedurally-Generated Environments](https://openreview.net/forum?id=rkg-TJBFPB) (FAIR, ICLR 2020)
+- [Learning to Request Guidance in Emergent Communication](https://arxiv.org/pdf/1912.05525.pdf) (University of Amsterdam, Dec 2019)
+- [Working Memory Graphs](https://arxiv.org/abs/1911.07141) (MSR, Nov 2019)
+- [Fast Task-Adaptation for Tasks Labeled Using Natural Language in Reinforcement Learning](https://arxiv.org/pdf/1910.04040.pdf) (Oct 2019, University of Antwerp)
+- [Generalization in Reinforcement Learning with Selective Noise Injection and Information Bottleneck](https://arxiv.org/abs/1910.12911) (MSR, NeurIPS, Oct 2019)
+- [Recurrent Independent Mechanisms](https://arxiv.org/pdf/1909.10893.pdf) (Mila, Sept 2019) 
+- [Learning Effective Subgoals with Multi-Task Hierarchical Reinforcement Learning](http://surl.tirl.info/proceedings/SURL-2019_paper_10.pdf) (Tsinghua University, August 2019)
+- [Mastering emergent language: learning to guide in simulated navigation](https://arxiv.org/abs/1908.05135) (University of Amsterdam, Aug 2019)
+- [Transfer Learning by Modeling a Distribution over Policies](https://arxiv.org/abs/1906.03574) (Mila, June 2019)
+- [Reinforcement Learning with Competitive Ensembles of Information-Constrained Primitives](https://arxiv.org/abs/1906.10667) (Mila, June 2019)
+- [Learning distant cause and effect using only local and immediate credit assignment](https://arxiv.org/abs/1905.11589) (Incubator 491, May 2019)
+- [Practical Open-Loop Optimistic Planning](https://arxiv.org/abs/1904.04700) (INRIA, April 2019)
+- [Learning World Graphs to Accelerate Hierarchical Reinforcement Learning](https://arxiv.org/abs/1907.00664) (Salesforce Research, 2019)
+- [Variational State Encoding as Intrinsic Motivation in Reinforcement Learning](https://mila.quebec/wp-content/uploads/2019/05/WebPage.pdf) (Mila, TARL 2019)
+- [Unsupervised Discovery of Decision States Through Intrinsic Control](https://tarl2019.github.io/assets/papers/modhe2019unsupervised.pdf) (Georgia Tech, TARL 2019)
+- [Modeling the Long Term Future in Model-Based Reinforcement Learning](https://openreview.net/forum?id=SkgQBn0cF7) (Mila, ICLR 2019)
+- [Unifying Ensemble Methods for Q-learning via Social Choice Theory](https://arxiv.org/pdf/1902.10646.pdf) (Max Planck Institute, Feb 2019)
+- [Planning Beyond The Sensing Horizon Using a Learned Context](https://personalrobotics.cs.washington.edu/workshops/mlmp2018/assets/docs/18_CameraReadySubmission.pdf) (MLMP@IROS, 2018)
+- [Guiding Policies with Language via Meta-Learning](https://arxiv.org/abs/1811.07882) (UC Berkeley, Nov 2018)
+- [On the Complexity of Exploration in Goal-Driven Navigation](https://arxiv.org/abs/1811.06889) (CMU, NeurIPS, Nov 2018)
+- [Transfer and Exploration via the Information Bottleneck](https://openreview.net/forum?id=rJg8yhAqKm) (Mila, Nov 2018)
+- [Creating safer reward functions for reinforcement learning agents in the gridworld](https://gupea.ub.gu.se/bitstream/2077/62445/1/gupea_2077_62445_1.pdf) (University of Gothenburg, 2018)
+- [BabyAI: First Steps Towards Grounded Language Learning With a Human In the Loop](https://arxiv.org/abs/1810.08272) (Mila, ICLR, Oct 2018)
+
+This environment has been built as part of work done at [Mila](https://mila.quebec). The Dynamic obstacles environment has been added as part of work done at [IAS in TU Darmstadt](https://www.ias.informatik.tu-darmstadt.de/) and the University of Genoa for mobile robot navigation with dynamic obstacles.

+ 59 - 0
docs/environments/blocked_unlock_pickup.md

@@ -0,0 +1,59 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Blocked Unlock Pickup
+---
+
+
+# Blocked Unlock Pickup
+
+### Description
+
+The agent has to pick up a box which is placed in another room, behind a
+locked door. The door is also blocked by a ball which the agent has to move
+before it can unlock the door. Hence, the agent has to learn to move the
+ball, pick up the key, open the door and pick up the object in the other
+room. This environment can be solved without relying on language.
+
+### Mission Space
+
+"pick up the {color} {type}"
+
+{color} is the color of the box. Can be "red", "green", "blue", "purple",
+"yellow" or "grey".
+
+{type} is the type of the object. Can be "box" or "key".
+
+### Action Space
+
+| Num | Name         | Action            |
+|-----|--------------|-------------------|
+| 0   | left         | Turn left         |
+| 1   | right        | Turn right        |
+| 2   | forward      | Move forward      |
+| 3   | pickup       | Pick up an object |
+| 4   | drop         | Unused            |
+| 5   | toggle       | Unused            |
+| 6   | done         | Unused            |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent picks up the correct box.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-BlockedUnlockPickup-v0`

+ 39 - 0
docs/environments/design.md

@@ -0,0 +1,39 @@
+---
+layout: "contents"
+title: Design
+firstpage:
+---
+
+# General Structure
+
+Structure of the world:
+- The world is an NxM grid of tiles
+- Each tile in the grid world contains zero or one object
+  - Cells that do not contain an object have the value `None`
+- Each object has an associated discrete color (string)
+- Each object has an associated type (string)
+  - Provided object types are: wall, floor, lava, door, key, ball, box and goal
+- The agent can pick up and carry exactly one object (eg: ball or key)
+- To open a locked door, the agent has to be carrying a key matching the door's color
+
+Actions in the basic environment:
+- Turn left
+- Turn right
+- Move forward
+- Pick up an object
+- Drop the object being carried
+- Toggle (open doors, interact with objects)
+- Done (task completed, optional)
+
+Default tile/observation encoding:
+- Each tile is encoded as a 3 dimensional tuple: `(OBJECT_IDX, COLOR_IDX, STATE)` 
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in [minigrid/minigrid.py](minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+By default, sparse rewards are given for reaching a green goal tile. A
+reward of 1 is given for success, and zero for failure. There is also an
+environment-specific time step limit for completing the task.
+You can define your own reward function by creating a class derived
+from `MiniGridEnv`. Extending the environment with new object types or new actions
+should be very easy. If you wish to do this, you should take a look at the
+[minigrid/minigrid.py](minigrid/minigrid.py) source file.

+ 58 - 0
docs/environments/dist_shift2.md

@@ -0,0 +1,58 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Dist Shift2
+---
+
+
+# Dist Shift2
+
+### Description
+
+This environment is based on one of the DeepMind [AI safety gridworlds]
+(https://github.com/deepmind/ai-safety-gridworlds). The agent starts in the
+top-left corner and must reach the goal which is in the top-right corner,
+but has to avoid stepping into lava on its way. The aim of this environment
+is to test an agent's ability to generalize. There are two slightly
+different variants of the environment, so that the agent can be trained on
+one variant and tested on the other.
+
+### Mission Space
+
+"get to the green goal square"
+
+### Action Space
+
+| Num | Name         | Action       |
+|-----|--------------|--------------|
+| 0   | left         | Turn left    |
+| 1   | right        | Turn right   |
+| 2   | forward      | Move forward |
+| 3   | pickup       | Unused       |
+| 4   | drop         | Unused       |
+| 5   | toggle       | Unused       |
+| 6   | done         | Unused       |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. The agent falls into lava.
+3. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-DistShift1-v0`
+- `MiniGrid-DistShift2-v0`

+ 56 - 0
docs/environments/door_key.md

@@ -0,0 +1,56 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Door Key
+---
+
+
+# Door Key
+
+### Description
+
+This environment has a key that the agent must pick up in order to unlock a
+goal and then get to the green goal square. This environment is difficult,
+because of the sparse reward, to solve using classical RL algorithms. It is
+useful to experiment with curiosity or curriculum learning.
+
+### Mission Space
+
+"use the key to open the door and then get to the goal"
+
+### Action Space
+
+| Num | Name         | Action                    |
+|-----|--------------|---------------------------|
+| 0   | left         | Turn left                 |
+| 1   | right        | Turn right                |
+| 2   | forward      | Move forward              |
+| 3   | pickup       | Pick up an object         |
+| 4   | drop         | Unused                    |
+| 5   | toggle       | Toggle/activate an object |
+| 6   | done         | Unused                    |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-DoorKey-5x5-v0`
+- `MiniGrid-DoorKey-6x6-v0`
+- `MiniGrid-DoorKey-8x8-v0`
+- `MiniGrid-DoorKey-16x16-v0`

+ 62 - 0
docs/environments/dynamic.md

@@ -0,0 +1,62 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Dynamic
+---
+
+
+# Dynamic
+
+### Description
+
+This environment is an empty room with moving obstacles.
+The goal of the agent is to reach the green goal square without colliding
+with any obstacle. A large penalty is subtracted if the agent collides with
+an obstacle and the episode finishes. This environment is useful to test
+Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in
+Partial Observability.
+
+### Mission Space
+
+"get to the green goal square"
+
+### Action Space
+
+| Num | Name         | Action       |
+|-----|--------------|--------------|
+| 0   | left         | Turn left    |
+| 1   | right        | Turn right   |
+| 2   | forward      | Move forward |
+| 3   | pickup       | Unused       |
+| 4   | drop         | Unused       |
+| 5   | toggle       | Unused       |
+| 6   | done         | Unused       |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure. A '-1' penalty is
+subtracted if the agent collides with an obstacle.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. The agent collides with an obstacle.
+3. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-Dynamic-Obstacles-5x5-v0`
+- `MiniGrid-Dynamic-Obstacles-Random-5x5-v0`
+- `MiniGrid-Dynamic-Obstacles-6x6-v0`
+- `MiniGrid-Dynamic-Obstacles-Random-6x6-v0`
+- `MiniGrid-Dynamic-Obstacles-8x8-v0`
+- `MiniGrid-Dynamic-Obstacles-16x16-v0`

+ 62 - 0
docs/environments/empty.md

@@ -0,0 +1,62 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Empty
+---
+
+
+# Empty
+
+### Description
+
+This environment is an empty room, and the goal of the agent is to reach the
+green goal square, which provides a sparse reward. A small penalty is
+subtracted for the number of steps to reach the goal. This environment is
+useful, with small rooms, to validate that your RL algorithm works
+correctly, and with large rooms to experiment with sparse rewards and
+exploration. The random variants of the environment have the agent starting
+at a random position for each episode, while the regular variants have the
+agent always starting in the corner opposite to the goal.
+
+### Mission Space
+
+"get to the green goal square"
+
+### Action Space
+
+| Num | Name         | Action       |
+|-----|--------------|--------------|
+| 0   | left         | Turn left    |
+| 1   | right        | Turn right   |
+| 2   | forward      | Move forward |
+| 3   | pickup       | Unused       |
+| 4   | drop         | Unused       |
+| 5   | toggle       | Unused       |
+| 6   | done         | Unused       |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-Empty-5x5-v0`
+- `MiniGrid-Empty-Random-5x5-v0`
+- `MiniGrid-Empty-6x6-v0`
+- `MiniGrid-Empty-Random-6x6-v0`
+- `MiniGrid-Empty-8x8-v0`
+- `MiniGrid-Empty-16x16-v0`

+ 66 - 0
docs/environments/fetch.md

@@ -0,0 +1,66 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Fetch
+---
+
+
+# Fetch
+
+### Description
+
+This environment has multiple objects of assorted types and colors. The
+agent receives a textual string as part of its observation telling it which
+object to pick up. Picking up the wrong object terminates the episode with
+zero reward.
+
+### Mission Space
+
+"{syntax} {color} {type}"
+
+{syntax} is one of the following: "get a", "go get a", "fetch a",
+"go fetch a", "you must fetch a".
+
+{color} is the color of the box. Can be "red", "green", "blue", "purple",
+"yellow" or "grey".
+
+{type} is the type of the object. Can be "key" or "ball".
+
+### Action Space
+
+| Num | Name         | Action               |
+|-----|--------------|----------------------|
+| 0   | left         | Turn left            |
+| 1   | right        | Turn right           |
+| 2   | forward      | Move forward         |
+| 3   | pickup       | Pick up an object    |
+| 4   | drop         | Unused               |
+| 5   | toggle       | Unused               |
+| 6   | done         | Unused               |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent picks up the correct object.
+2. The agent picks up the wrong object.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+N: number of objects to be generated.
+
+- `MiniGrid-Fetch-5x5-N2-v0`
+- `MiniGrid-Fetch-6x6-N2-v0`
+- `MiniGrid-Fetch-8x8-N3-v0`

+ 53 - 0
docs/environments/four_rooms.md

@@ -0,0 +1,53 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Four Rooms
+---
+
+
+# Four Rooms
+
+### Description
+
+Classic four room reinforcement learning environment. The agent must
+navigate in a maze composed of four rooms interconnected by 4 gaps in the
+walls. To obtain a reward, the agent must reach the green goal square. Both
+the agent and the goal square are randomly placed in any of the four rooms.
+
+### Mission Space
+
+"reach the goal"
+
+### Action Space
+
+| Num | Name         | Action       |
+|-----|--------------|--------------|
+| 0   | left         | Turn left    |
+| 1   | right        | Turn right   |
+| 2   | forward      | Move forward |
+| 3   | pickup       | Unused       |
+| 4   | drop         | Unused       |
+| 5   | toggle       | Unused       |
+| 6   | done         | Unused       |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-FourRooms-v0`

+ 59 - 0
docs/environments/go_to_door.md

@@ -0,0 +1,59 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Go To Door
+---
+
+
+# Go To Door
+
+### Description
+
+This environment is a room with four doors, one on each wall. The agent
+receives a textual (mission) string as input, telling it which door to go
+to, (eg: "go to the red door"). It receives a positive reward for performing
+the `done` action next to the correct door, as indicated in the mission
+string.
+
+### Mission Space
+
+"go to the {color} door"
+
+{color} is the color of the door. Can be "red", "green", "blue", "purple",
+"yellow" or "grey".
+
+### Action Space
+
+| Num | Name         | Action               |
+|-----|--------------|----------------------|
+| 0   | left         | Turn left            |
+| 1   | right        | Turn right           |
+| 2   | forward      | Move forward         |
+| 3   | pickup       | Unused               |
+| 4   | drop         | Unused               |
+| 5   | toggle       | Unused               |
+| 6   | done         | Done completing task |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent stands next the correct door performing the `done` action.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-GoToDoor-5x5-v0`
+- `MiniGrid-GoToDoor-6x6-v0`
+- `MiniGrid-GoToDoor-8x8-v0`

+ 10 - 0
docs/environments/go_to_object.md

@@ -0,0 +1,10 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Go To Object
+---
+
+
+# Go To Object
+
+Environment in which the agent is instructed to go to a given object
+named using an English text string

+ 37 - 0
docs/environments/index.md

@@ -0,0 +1,37 @@
+---
+firstpage:
+lastpage:
+---
+
+# Included environments
+
+The environments listed below are implemented in the [minigrid/envs](../../../../minigrid/envs) directory.
+Each environment provides one or more configurations registered with OpenAI gym. Each environment
+is also programmatically tunable in terms of size/complexity, which is useful for curriculum learning
+or to fine-tune difficulty.
+
+```{toctree}
+:hidden:
+empty
+four_rooms
+door_key
+multi_room
+fetch
+go_to_door
+put_near
+red_blue_doors
+memory_s7
+locked_room
+key_corridor_s6_r3
+unlock
+unlock_pickup
+blocked_unlock_pickup
+obstructed_maze
+dist_shift2
+lava_gap_s7
+simple_crossing_s11_n5
+dynamic
+go_to_object
+playground
+
+```

+ 69 - 0
docs/environments/key_corridor_s6_r3.md

@@ -0,0 +1,69 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Key Corridor S6 R3
+---
+
+
+# Key Corridor S6 R3
+
+### Description
+
+This environment is similar to the locked room environment, but there are
+multiple registered environment configurations of increasing size,
+making it easier to use curriculum learning to train an agent to solve it.
+The agent has to pick up an object which is behind a locked door. The key is
+hidden in another room, and the agent has to explore the environment to find
+it. The mission string does not give the agent any clues as to where the
+key is placed. This environment can be solved without relying on language.
+
+### Mission Space
+
+"pick up the {color} {obj_type}"
+
+{color} is the color of the object. Can be "red", "green", "blue", "purple",
+"yellow" or "grey".
+
+{type} is the type of the object. Can be "ball" or "key".
+
+### Action Space
+
+| Num | Name         | Action            |
+|-----|--------------|-------------------|
+| 0   | left         | Turn left         |
+| 1   | right        | Turn right        |
+| 2   | forward      | Move forward      |
+| 3   | pickup       | Pick up an object |
+| 4   | drop         | Unused            |
+| 5   | toggle       | Unused            |
+| 6   | done         | Unused            |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent picks up the correct object.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+S: room size.
+R: Number of rows.
+
+- `MiniGrid-KeyCorridorS3R1-v0`
+- `MiniGrid-KeyCorridorS3R2-v0`
+- `MiniGrid-KeyCorridorS3R3-v0`
+- `MiniGrid-KeyCorridorS4R3-v0`
+- `MiniGrid-KeyCorridorS5R3-v0`
+- `MiniGrid-KeyCorridorS6R3-v0`

+ 60 - 0
docs/environments/lava_gap_s7.md

@@ -0,0 +1,60 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Lava Gap S7
+---
+
+
+# Lava Gap S7
+
+### Description
+
+The agent has to reach the green goal square at the opposite corner of the
+room, and must pass through a narrow gap in a vertical strip of deadly lava.
+Touching the lava terminate the episode with a zero reward. This environment
+is useful for studying safety and safe exploration.
+
+### Mission Space
+
+Depending on the `obstacle_type` parameter:
+- `Lava`: "avoid the lava and get to the green goal square"
+- otherwise: "find the opening and get to the green goal square"
+
+### Action Space
+
+| Num | Name         | Action       |
+|-----|--------------|--------------|
+| 0   | left         | Turn left    |
+| 1   | right        | Turn right   |
+| 2   | forward      | Move forward |
+| 3   | pickup       | Unused       |
+| 4   | drop         | Unused       |
+| 5   | toggle       | Unused       |
+| 6   | done         | Unused       |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. The agent falls into lava.
+3. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+S: size of map SxS.
+
+- `MiniGrid-LavaGapS5-v0`
+- `MiniGrid-LavaGapS6-v0`
+- `MiniGrid-LavaGapS7-v0`

+ 57 - 0
docs/environments/locked_room.md

@@ -0,0 +1,57 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Locked Room
+---
+
+
+# Locked Room
+
+### Description
+
+The environment has six rooms, one of which is locked. The agent receives
+a textual mission string as input, telling it which room to go to in order
+to get the key that opens the locked room. It then has to go into the locked
+room in order to reach the final goal. This environment is extremely
+difficult to solve with vanilla reinforcement learning alone.
+
+### Mission Space
+
+"get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
+
+{lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
+"blue", "purple", "yellow" or "grey".
+
+### Action Space
+
+| Num | Name         | Action                    |
+|-----|--------------|---------------------------|
+| 0   | left         | Turn left                 |
+| 1   | right        | Turn right                |
+| 2   | forward      | Move forward              |
+| 3   | pickup       | Pick up an object         |
+| 4   | drop         | Unused                    |
+| 5   | toggle       | Toggle/activate an object |
+| 6   | done         | Unused                    |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-LockedRoom-v0`

+ 60 - 0
docs/environments/memory_s7.md

@@ -0,0 +1,60 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Memory S7
+---
+
+
+# Memory S7
+
+### Description
+
+This environment is a memory test. The agent starts in a small room where it
+sees an object. It then has to go through a narrow hallway which ends in a
+split. At each end of the split there is an object, one of which is the same
+as the object in the starting room. The agent has to remember the initial
+object, and go to the matching object at split.
+
+### Mission Space
+
+"go to the matching object at the end of the hallway"
+
+### Action Space
+
+| Num | Name         | Action                    |
+|-----|--------------|---------------------------|
+| 0   | left         | Turn left                 |
+| 1   | right        | Turn right                |
+| 2   | forward      | Move forward              |
+| 3   | pickup       | Pick up an object         |
+| 4   | drop         | Unused                    |
+| 5   | toggle       | Toggle/activate an object |
+| 6   | done         | Unused                    |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the correct matching object.
+2. The agent reaches the wrong matching object.
+3. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+S: size of map SxS.
+
+- `MiniGrid-MemoryS17Random-v0`
+- `MiniGrid-MemoryS13Random-v0`
+- `MiniGrid-MemoryS13-v0`
+- `MiniGrid-MemoryS11-v0`

+ 59 - 0
docs/environments/multi_room.md

@@ -0,0 +1,59 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Multi Room
+---
+
+
+# Multi Room
+
+### Description
+
+This environment has a series of connected rooms with doors that must be
+opened in order to get to the next room. The final room has the green goal
+square the agent must get to. This environment is extremely difficult to
+solve using RL alone. However, by gradually increasing the number of rooms
+and building a curriculum, the environment can be solved.
+
+### Mission Space
+
+"traverse the rooms to get to the goal"
+
+### Action Space
+
+| Num | Name         | Action                    |
+|-----|--------------|---------------------------|
+| 0   | left         | Turn left                 |
+| 1   | right        | Turn right                |
+| 2   | forward      | Move forward              |
+| 3   | pickup       | Unused                    |
+| 4   | drop         | Unused                    |
+| 5   | toggle       | Toggle/activate an object |
+| 6   | done         | Unused                    |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+S: size of map SxS.
+N: number of rooms.
+
+- `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
+- `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
+- `MiniGrid-MultiRoom-N6-v0` (six rooms)

+ 11 - 0
docs/environments/obstructed_maze.md

@@ -0,0 +1,11 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Obstructed Maze
+---
+
+
+# Obstructed Maze
+
+A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
+are locked, doors are obstructed by a ball and keys are hidden in
+boxes.

+ 10 - 0
docs/environments/playground.md

@@ -0,0 +1,10 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Playground
+---
+
+
+# Playground
+
+Environment with multiple rooms and random objects.
+This environment has no specific goals or rewards.

+ 62 - 0
docs/environments/put_near.md

@@ -0,0 +1,62 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Put Near
+---
+
+
+# Put Near
+
+### Description
+
+The agent is instructed through a textual string to pick up an object and
+place it next to another object. This environment is easy to solve with two
+objects, but difficult to solve with more, as it involves both textual
+understanding and spatial reasoning involving multiple objects.
+
+### Mission Space
+
+"put the {move_color} {move_type} near the {target_color} {target_type}"
+
+{move_color} and {target_color} can be "red", "green", "blue", "purple",
+"yellow" or "grey".
+
+{move_type} and {target_type} Can be "box", "ball" or "key".
+
+### Action Space
+
+| Num | Name         | Action            |
+|-----|--------------|-------------------|
+| 0   | left         | Turn left         |
+| 1   | right        | Turn right        |
+| 2   | forward      | Move forward      |
+| 3   | pickup       | Pick up an object |
+| 4   | drop         | Drop an object    |
+| 5   | toggle       | Unused            |
+| 6   | done         | Unused            |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent picks up the wrong object.
+2. The agent drop the correct object near the target.
+3. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+N: number of objects.
+
+- `MiniGrid-PutNear-6x6-N2-v0`
+- `MiniGrid-PutNear-8x8-N3-v0`

+ 55 - 0
docs/environments/red_blue_doors.md

@@ -0,0 +1,55 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Red Blue Doors
+---
+
+
+# Red Blue Doors
+
+### Description
+
+The agent is randomly placed within a room with one red and one blue door
+facing opposite directions. The agent has to open the red door and then open
+the blue door, in that order. Note that, surprisingly, this environment is
+solvable without memory.
+
+### Mission Space
+
+"open the red door then the blue door"
+
+### Action Space
+
+| Num | Name         | Action                    |
+|-----|--------------|---------------------------|
+| 0   | left         | Turn left                 |
+| 1   | right        | Turn right                |
+| 2   | forward      | Move forward              |
+| 3   | pickup       | Unused                    |
+| 4   | drop         | Unused                    |
+| 5   | toggle       | Toggle/activate an object |
+| 6   | done         | Unused                    |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent opens the blue door having already opened the red door.
+2. The agent opens the blue door without having opened the red door yet.
+3. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-RedBlueDoors-6x6-v0`
+- `MiniGrid-RedBlueDoors-8x8-v0`

+ 76 - 0
docs/environments/simple_crossing_s11_n5.md

@@ -0,0 +1,76 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Simple Crossing S11 N5
+---
+
+
+# Simple Crossing S11 N5
+
+### Description
+
+Depending on the `obstacle_type` parameter:
+- `Lava` - The agent has to reach the green goal square on the other corner
+    of the room while avoiding rivers of deadly lava which terminate the
+    episode in failure. Each lava stream runs across the room either
+    horizontally or vertically, and has a single crossing point which can be
+    safely used; Luckily, a path to the goal is guaranteed to exist. This
+    environment is useful for studying safety and safe exploration.
+- otherwise - Similar to the `LavaCrossing` environment, the agent has to
+    reach the green goal square on the other corner of the room, however
+    lava is replaced by walls. This MDP is therefore much easier and maybe
+    useful for quickly testing your algorithms.
+
+### Mission Space
+Depending on the `obstacle_type` parameter:
+- `Lava` - "avoid the lava and get to the green goal square"
+- otherwise - "find the opening and get to the green goal square"
+
+### Action Space
+
+| Num | Name         | Action       |
+|-----|--------------|--------------|
+| 0   | left         | Turn left    |
+| 1   | right        | Turn right   |
+| 2   | forward      | Move forward |
+| 3   | pickup       | Unused       |
+| 4   | drop         | Unused       |
+| 5   | toggle       | Unused       |
+| 6   | done         | Unused       |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent reaches the goal.
+2. The agent falls into lava.
+3. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+S: size of the map SxS.
+N: number of valid crossings across lava or walls from the starting position
+to the goal
+
+- `Lava` :
+    - `MiniGrid-LavaCrossingS9N1-v0`
+    - `MiniGrid-LavaCrossingS9N2-v0`
+    - `MiniGrid-LavaCrossingS9N3-v0`
+    - `MiniGrid-LavaCrossingS11N5-v0`
+
+- otherwise :
+    - `MiniGrid-SimpleCrossingS9N1-v0`
+    - `MiniGrid-SimpleCrossingS9N2-v0`
+    - `MiniGrid-SimpleCrossingS9N3-v0`
+    - `MiniGrid-SimpleCrossingS11N5-v0`

+ 51 - 0
docs/environments/unlock.md

@@ -0,0 +1,51 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Unlock
+---
+
+
+# Unlock
+
+### Description
+
+The agent has to open a locked door. This environment can be solved without
+relying on language.
+
+### Mission Space
+
+"open the door"
+
+### Action Space
+
+| Num | Name         | Action                    |
+|-----|--------------|---------------------------|
+| 0   | left         | Turn left                 |
+| 1   | right        | Turn right                |
+| 2   | forward      | Move forward              |
+| 3   | pickup       | Unused                    |
+| 4   | drop         | Unused                    |
+| 5   | toggle       | Toggle/activate an object |
+| 6   | done         | Unused                    |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent opens the door.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-Unlock-v0`

+ 54 - 0
docs/environments/unlock_pickup.md

@@ -0,0 +1,54 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Unlock Pickup
+---
+
+
+# Unlock Pickup
+
+### Description
+
+The agent has to pick up a box which is placed in another room, behind a
+locked door. This environment can be solved without relying on language.
+
+### Mission Space
+
+"pick up the {color} box"
+
+{color} is the color of the box. Can be "red", "green", "blue", "purple",
+"yellow" or "grey".
+
+### Action Space
+
+| Num | Name         | Action                    |
+|-----|--------------|---------------------------|
+| 0   | left         | Turn left                 |
+| 1   | right        | Turn right                |
+| 2   | forward      | Move forward              |
+| 3   | pickup       | Pick up an object         |
+| 4   | drop         | Unused                    |
+| 5   | toggle       | Toggle/activate an object |
+| 6   | done         | Unused                    |
+
+### Observation Encoding
+
+- Each tile is encoded as a 3 dimensional tuple:
+    `(OBJECT_IDX, COLOR_IDX, STATE)`
+- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+    [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+### Rewards
+
+A reward of '1' is given for success, and '0' for failure.
+
+### Termination
+
+The episode ends if any one of the following conditions is met:
+
+1. The agent picks up the correct box.
+2. Timeout (see `max_steps`).
+
+### Registered Configurations
+
+- `MiniGrid-Unlock-v0`

+ 65 - 0
docs/index.md

@@ -0,0 +1,65 @@
+---
+hide-toc: true
+firstpage:
+lastpage:
+---
+
+# MiniGrid is a simple and easily configurable grid world environments for reinforcement learning
+
+
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) 
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
+There are other gridworld Gym environments out there, but this one is
+designed to be particularly simple, lightweight and fast. The code has very few
+dependencies, making it less likely to break or fail to install. It loads no
+external sprites/textures, and it can run at up to 5000 FPS on a Core i7
+laptop, which means you can run your experiments faster. A known-working RL
+implementation can be found [in this repository](https://github.com/lcswillems/torch-rl).
+
+Requirements:
+- Python 3.7 to 3.10
+- OpenAI Gym v0.26
+- NumPy 1.18+
+- Matplotlib (optional, only needed for display) - 3.0+
+
+Please use this bibtex if you want to cite this repository in your publications:
+
+```
+@misc{minigrid,
+  author = {Chevalier-Boisvert, Maxime and Willems, Lucas and Pal, Suman},
+  title = {Minimalistic Gridworld Environment for Gymnasium},
+  year = {2018},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/Farama-Foundation/MiniGrid}},
+}
+```
+
+```{toctree}
+:hidden:
+:caption: Introduction
+
+content/installation
+content/basic_usage
+api/wrappers
+content/pubs
+```
+
+
+```{toctree}
+:hidden:
+:caption: Environments
+
+environments/design
+environments/index
+```
+
+```{toctree}
+:hidden:
+:caption: Development
+
+Github <https://github.com/Farama-Foundation/MiniGrid>
+Donate <https://farama.org/donations>
+Contribute to the Docs <404>
+```

+ 35 - 0
docs/make.bat

@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd

+ 7 - 0
docs/requirements.txt

@@ -0,0 +1,7 @@
+sphinx==4.2.0
+myst-parser
+furo==2022.6.21
+moviepy
+pygame
+pygments==2.11.2
+sphinx-notfound-page

+ 78 - 0
docs/scripts/gen_mds.py

@@ -0,0 +1,78 @@
+__author__ = "Feng Gu"
+__email__ = "contact@fenggu.me"
+
+"""
+   isort:skip_file
+"""
+
+import os
+import re
+
+from gym.envs.registration import registry
+from tqdm import tqdm
+
+from utils import trim
+
+LAYOUT = "env"
+
+pattern = re.compile(r"(?<!^)(?=[A-Z])")
+
+all_envs = list(registry.values())
+
+filtered_envs_by_type = {}
+
+# Obtain filtered list
+for env_spec in tqdm(all_envs):
+    # gym_minigrid.envs:Env
+    split = env_spec.entry_point.split(".")
+    # ignore gymnasium.envs.env_type:Env
+    env_module = split[0]
+    if env_module != "gym_minigrid":
+        continue
+
+    env_name = split[1]
+    filtered_envs_by_type[env_name] = env_spec
+
+
+filtered_envs = {
+    k.split(":")[1]: v
+    for k, v in sorted(
+        filtered_envs_by_type.items(),
+        key=lambda item: item[1].entry_point.split(".")[1],
+    )
+}
+
+for env_name, env_spec in filtered_envs.items():
+    made = env_spec.make()
+
+    docstring = trim(made.unwrapped.__doc__)
+
+    pascal_env_name = env_spec.id.split("-")[1]
+    snake_env_name = pattern.sub("_", pascal_env_name).lower()
+    title_env_name = snake_env_name.replace("_", " ").title()
+
+    v_path = os.path.join(
+        os.path.dirname(os.path.dirname(__file__)),
+        "source",
+        "environments",
+        snake_env_name + ".md",
+    )
+
+    front_matter = f"""---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: {title_env_name}
+---
+"""
+    title = f"# {title_env_name}"
+
+    if docstring is None:
+        docstring = "No information provided"
+    all_text = f"""{front_matter}
+
+{title}
+
+{docstring}
+"""
+    file = open(v_path, "w+", encoding="utf-8")
+    file.write(all_text)
+    file.close()

+ 14 - 0
docs/scripts/move404.py

@@ -0,0 +1,14 @@
+import sys
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Provide a path")
+    filePath = sys.argv[1]
+
+    with open(filePath, "r+") as fp:
+        content = fp.read()
+        content = content.replace('href="../', 'href="/').replace('src="../', 'src="/')
+        fp.seek(0)
+        fp.truncate()
+
+        fp.write(content)

+ 44 - 0
docs/scripts/utils.py

@@ -0,0 +1,44 @@
+# stolen from python docs
+def trim(docstring):
+    if not docstring:
+        return ""
+    # Convert tabs to spaces (following the normal Python rules)
+    # and split into a list of lines:
+    lines = docstring.expandtabs().splitlines()
+    # Determine minimum indentation (first line doesn't count):
+    indent = 232323
+    for line in lines[1:]:
+        stripped = line.lstrip()
+        if stripped:
+            indent = min(indent, len(line) - len(stripped))
+    # Remove indentation (first line is special):
+    trimmed = [lines[0].strip()]
+    if indent < 232323:
+        for line in lines[1:]:
+            trimmed.append(line[indent:].rstrip())
+    # Strip off trailing and leading blank lines:
+    while trimmed and not trimmed[-1]:
+        trimmed.pop()
+    while trimmed and not trimmed[0]:
+        trimmed.pop(0)
+    # Return a single string:
+    return "\n".join(trimmed)
+
+
+# dont want envs which contain these
+kill_strs = [
+    "eterministic",
+    "ALE",
+    "-ram",
+    "Frameskip",
+    "Hard",
+    "LanderContinu",
+    "8x8",
+    "uessing",
+    "otter",
+    "oinflip",
+    "hain",
+    "oulette",
+    "DomainRandom",
+    "RacingDiscrete",
+]