Sfoglia il codice sorgente

Merge branch 'master' into pre-commit

# Conflicts:
#	.pre-commit-config.yaml
#	README.md
#	benchmark.py
#	gym_minigrid/envs/blockedunlockpickup.py
#	gym_minigrid/envs/crossing.py
#	gym_minigrid/envs/distshift.py
#	gym_minigrid/envs/dynamicobstacles.py
#	gym_minigrid/envs/empty.py
#	gym_minigrid/envs/fetch.py
#	gym_minigrid/envs/fourrooms.py
#	gym_minigrid/envs/gotodoor.py
#	gym_minigrid/envs/gotoobject.py
#	gym_minigrid/envs/keycorridor.py
#	gym_minigrid/envs/lavagap.py
#	gym_minigrid/envs/lockedroom.py
#	gym_minigrid/envs/multiroom.py
#	gym_minigrid/envs/obstructedmaze.py
#	gym_minigrid/envs/putnear.py
#	gym_minigrid/envs/redbluedoors.py
#	gym_minigrid/envs/unlock.py
#	gym_minigrid/envs/unlockpickup.py
#	gym_minigrid/minigrid.py
#	gym_minigrid/roomgrid.py
#	gym_minigrid/wrappers.py
#	manual_control.py
#	run_tests.py
#	setup.py
StringTheory 2 anni fa
parent
commit
d9fa55fd69
42 ha cambiato i file con 1220 aggiunte e 686 eliminazioni
  1. 27 0
      .github/ISSUE_TEMPLATE/bug.md
  2. 33 0
      .github/ISSUE_TEMPLATE/proposal.md
  3. 12 0
      .github/ISSUE_TEMPLATE/question.md
  4. 45 0
      .github/PULL_REQUEST_TEMPLATE.md
  5. 62 0
      .github/stale.yml
  6. 19 0
      .github/workflows/build.yml
  7. 17 0
      .github/workflows/pre-commit.yml
  8. 0 10
      .travis.yml
  9. 13 0
      CODE_OF_CONDUCT.rst
  10. 1 1
      README.md
  11. 1 1
      benchmark.py
  12. 3 3
      gym_minigrid/envs/blockedunlockpickup.py
  13. 18 18
      gym_minigrid/envs/crossing.py
  14. 13 5
      gym_minigrid/envs/distshift.py
  15. 13 8
      gym_minigrid/envs/doorkey.py
  16. 17 11
      gym_minigrid/envs/dynamicobstacles.py
  17. 13 4
      gym_minigrid/envs/empty.py
  18. 11 5
      gym_minigrid/envs/fetch.py
  19. 3 2
      gym_minigrid/envs/fourrooms.py
  20. 10 5
      gym_minigrid/envs/gotodoor.py
  21. 9 3
      gym_minigrid/envs/gotoobject.py
  22. 45 21
      gym_minigrid/envs/keycorridor.py
  23. 8 8
      gym_minigrid/envs/lavagap.py
  24. 6 2
      gym_minigrid/envs/lockedroom.py
  25. 16 16
      gym_minigrid/envs/memory.py
  26. 95 41
      gym_minigrid/envs/multiroom.py
  27. 64 87
      gym_minigrid/envs/obstructedmaze.py
  28. 2 2
      gym_minigrid/envs/playground_v0.py
  29. 31 25
      gym_minigrid/envs/putnear.py
  30. 19 15
      gym_minigrid/envs/redbluedoors.py
  31. 10 8
      gym_minigrid/envs/unlock.py
  32. 11 9
      gym_minigrid/envs/unlockpickup.py
  33. 221 177
      gym_minigrid/minigrid.py
  34. 47 42
      gym_minigrid/roomgrid.py
  35. 1 1
      gym_minigrid/window.py
  36. 194 81
      gym_minigrid/wrappers.py
  37. 5 6
      manual_control.py
  38. 12 0
      py.Dockerfile
  39. 3 0
      requirements.txt
  40. 66 60
      run_tests.py
  41. 23 9
      setup.py
  42. 1 0
      test_requirements.txt

+ 27 - 0
.github/ISSUE_TEMPLATE/bug.md

@@ -0,0 +1,27 @@
+---
+name: Bug Report
+about: Submit a bug report
+title: "[Bug Report] Bug title"
+
+---
+
+If you are submitting a bug report, please fill in the following details and use the tag [bug].
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**Code example**
+Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful.
+
+**System Info**
+Describe the characteristic of your environment:
+ * Describe how `gym-minigrid` was installed (pip, docker, source, ...)
+ * What OS/version of Linux you're using. Note that while we will accept PRs to improve Window's support, we do not officially support it.
+ * Python version
+
+**Additional context**
+Add any other context about the problem here.
+
+### Checklist
+
+- [ ] I have checked that there is no similar [issue](https://github.com/Farama-Foundation/gym-minigrid/issues) in the repo (**required**)

+ 33 - 0
.github/ISSUE_TEMPLATE/proposal.md

@@ -0,0 +1,33 @@
+---
+name: Proposal
+about: Propose changes that are not fixes bugs
+title: "[Proposal] Proposal title"
+---
+
+
+
+### Proposal 
+
+A clear and concise description of the proposal.
+
+### Motivation
+
+Please outline the motivation for the proposal.
+Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]".
+If this is related to another GitHub issue, please link here too.
+
+### Pitch
+
+A clear and concise description of what you want to happen.
+
+### Alternatives
+
+A clear and concise description of any alternative solutions or features you've considered, if any.
+
+### Additional context
+
+Add any other context or screenshots about the feature request here.
+
+### Checklist
+
+- [ ] I have checked that there is no similar [issue](https://github.com/Farama-Foundation/gym-minigrid/issues) in the repo (**required**)

+ 12 - 0
.github/ISSUE_TEMPLATE/question.md

@@ -0,0 +1,12 @@
+---
+name: Question
+about: Ask a question
+title: "[Question] Question title"
+---
+
+
+### Question
+
+If you're a beginner and have basic questions, please ask on [r/reinforcementlearning](https://www.reddit.com/r/reinforcementlearning/) or in the [RL Discord](https://discord.com/invite/xhfNqQv) (if you're new please use the beginners channel). Basic questions that are not bugs or feature requests will be closed without reply, because GitHub issues are not an appropriate venue for these.
+
+Advanced/nontrivial questions, especially in areas where documentation is lacking, are very much welcome.

+ 45 - 0
.github/PULL_REQUEST_TEMPLATE.md

@@ -0,0 +1,45 @@
+# Description
+
+Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
+
+Fixes # (issue)
+
+## Type of change
+
+Please delete options that are not relevant.
+
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] This change requires a documentation update
+
+### Screenshots
+Please attach before and after screenshots of the change if applicable.
+
+<!--
+Example:
+
+| Before | After |
+| ------ | ----- |
+| _gif/png before_ | _gif/png after_ |
+
+
+To upload images to a PR -- simply drag and drop an image while in edit mode and it should upload the image directly. You can then paste that source into the above before/after sections.
+-->
+
+# Checklist:
+
+- [ ] I have run the [`pre-commit` checks](https://pre-commit.com/) with `pre-commit run --all-files` (see `CONTRIBUTING.md` instructions to set it up)
+- [ ] I have commented my code, particularly in hard-to-understand areas
+- [ ] I have made corresponding changes to the documentation
+- [ ] My changes generate no new warnings
+- [ ] I have added tests that prove my fix is effective or that my feature works
+- [ ] New and existing unit tests pass locally with my changes
+
+<!--
+As you go through the checklist above, you can mark something as done by putting an x character in it
+
+For example,
+- [x] I have done this task
+- [ ] I have not done this task
+-->

+ 62 - 0
.github/stale.yml

@@ -0,0 +1,62 @@
+# Configuration for probot-stale - https://github.com/probot/stale
+
+# Number of days of inactivity before an Issue or Pull Request becomes stale
+daysUntilStale: 60
+
+# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
+# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
+daysUntilClose: 14
+
+# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
+onlyLabels:
+  - more-information-needed
+
+# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
+exemptLabels:
+  - pinned
+  - security
+  - "[Status] Maybe Later"
+
+# Set to true to ignore issues in a project (defaults to false)
+exemptProjects: true
+
+# Set to true to ignore issues in a milestone (defaults to false)
+exemptMilestones: true
+
+# Set to true to ignore issues with an assignee (defaults to false)
+exemptAssignees: true
+
+# Label to use when marking as stale
+staleLabel: stale
+
+# Comment to post when marking as stale. Set to `false` to disable
+markComment: >
+  This issue has been automatically marked as stale because it has not had
+  recent activity. It will be closed if no further activity occurs. Thank you
+  for your contributions.
+
+# Comment to post when removing the stale label.
+# unmarkComment: >
+#   Your comment here.
+
+# Comment to post when closing a stale Issue or Pull Request.
+# closeComment: >
+#   Your comment here.
+
+# Limit the number of actions per hour, from 1-30. Default is 30
+limitPerRun: 30
+
+# Limit to only `issues` or `pulls`
+only: issues
+
+# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
+# pulls:
+#   daysUntilStale: 30
+#   markComment: >
+#     This pull request has been automatically marked as stale because it has not had
+#     recent activity. It will be closed if no further activity occurs. Thank you
+#     for your contributions.
+
+# issues:
+#   exemptLabels:
+#     - confirmed

+ 19 - 0
.github/workflows/build.yml

@@ -0,0 +1,19 @@
+name: build
+on: [pull_request, push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.6', '3.7', '3.8', '3.9', '3.10']
+    steps:
+      - uses: actions/checkout@v2
+      - run: |
+           docker build -f py.Dockerfile \
+             --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
+             --tag gym-minigrid-docker .
+      
+      # TODO: Add and fix tests for pytest
+      # - name: Run tests
+      #   run: docker run gym-docker pytest

+ 17 - 0
.github/workflows/pre-commit.yml

@@ -0,0 +1,17 @@
+# https://pre-commit.com
+# This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file.
+name: pre-commit
+on:
+  pull_request:
+  push:
+    branches: [master]
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+      - run: pip install pre-commit
+      - run: pre-commit --version
+      - run: pre-commit install
+      - run: pre-commit run --all-files

+ 0 - 10
.travis.yml

@@ -1,10 +0,0 @@
-language: python
-python:
-  - "3.5"
-
-# command to install dependencies
-install:
-  - pip3 install -e .
-
-# command to run tests
-script: ./run_tests.py

+ 13 - 0
CODE_OF_CONDUCT.rst

@@ -0,0 +1,13 @@
+Farama Foundation is dedicated to providing a harassment-free experience for
+everyone, regardless of gender, gender identity and expression, sexual
+orientation, disability, physical appearance, body size, age, race, or
+religion. We do not tolerate harassment of participants in any form.
+
+This code of conduct applies to all Farama Foundation spaces (including Gist
+comments) both online and off. Anyone who violates this code of
+conduct may be sanctioned or expelled from these spaces at the
+discretion of the Farama Foundation team.
+
+We may add additional rules over time, which will be made clearly
+available to participants. Participants are responsible for knowing
+and abiding by these rules.

+ 1 - 1
README.md

@@ -11,7 +11,7 @@ implementation can be found [in this repository](https://github.com/lcswillems/t
 
 Requirements:
 - Python 3.7+
-- OpenAI Gym
+- OpenAI Gym 0.25
 - NumPy
 - Matplotlib (optional, only needed for display)
 

+ 1 - 1
benchmark.py

@@ -18,7 +18,7 @@ parser.add_argument("--num_resets", default=200)
 parser.add_argument("--num_frames", default=5000)
 args = parser.parse_args()
 
-env = gym.make(args.env_name)
+env = gym.make(args.env_name, render_mode='rgb_array')
 
 # Benchmark env.reset
 t0 = time.time()

+ 3 - 3
gym_minigrid/envs/blockedunlockpickup.py

@@ -9,14 +9,14 @@ class BlockedUnlockPickup(RoomGrid):
     in another room
     """
 
-    def __init__(self, seed=None):
+    def __init__(self, **kwargs):
         room_size = 6
         super().__init__(
             num_rows=1,
             num_cols=2,
             room_size=room_size,
-            max_steps=16 * room_size**2,
-            seed=seed,
+            max_steps=16*room_size**2,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):

+ 18 - 18
gym_minigrid/envs/crossing.py

@@ -9,7 +9,7 @@ class CrossingEnv(MiniGridEnv):
     Environment with wall or lava obstacles, sparse reward.
     """
 
-    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):
+    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, **kwargs):
         self.num_crossings = num_crossings
         self.obstacle_type = obstacle_type
         super().__init__(
@@ -17,7 +17,7 @@ class CrossingEnv(MiniGridEnv):
             max_steps=4 * size * size,
             # Set this to True for maximum speed
             see_through_walls=False,
-            seed=None,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -86,23 +86,23 @@ class CrossingEnv(MiniGridEnv):
 
 
 class LavaCrossingEnv(CrossingEnv):
-    def __init__(self):
-        super().__init__(size=9, num_crossings=1)
+    def __init__(self, **kwargs):
+        super().__init__(size=9, num_crossings=1, **kwargs)
 
 
 class LavaCrossingS9N2Env(CrossingEnv):
-    def __init__(self):
-        super().__init__(size=9, num_crossings=2)
+    def __init__(self, **kwargs):
+        super().__init__(size=9, num_crossings=2, **kwargs)
 
 
 class LavaCrossingS9N3Env(CrossingEnv):
-    def __init__(self):
-        super().__init__(size=9, num_crossings=3)
+    def __init__(self, **kwargs):
+        super().__init__(size=9, num_crossings=3, **kwargs)
 
 
 class LavaCrossingS11N5Env(CrossingEnv):
-    def __init__(self):
-        super().__init__(size=11, num_crossings=5)
+    def __init__(self, **kwargs):
+        super().__init__(size=11, num_crossings=5, **kwargs)
 
 
 register(
@@ -126,23 +126,23 @@ register(
 
 
 class SimpleCrossingEnv(CrossingEnv):
-    def __init__(self):
-        super().__init__(size=9, num_crossings=1, obstacle_type=Wall)
+    def __init__(self, **kwargs):
+        super().__init__(size=9, num_crossings=1, obstacle_type=Wall, **kwargs)
 
 
 class SimpleCrossingS9N2Env(CrossingEnv):
-    def __init__(self):
-        super().__init__(size=9, num_crossings=2, obstacle_type=Wall)
+    def __init__(self, **kwargs):
+        super().__init__(size=9, num_crossings=2, obstacle_type=Wall, **kwargs)
 
 
 class SimpleCrossingS9N3Env(CrossingEnv):
-    def __init__(self):
-        super().__init__(size=9, num_crossings=3, obstacle_type=Wall)
+    def __init__(self, **kwargs):
+        super().__init__(size=9, num_crossings=3, obstacle_type=Wall, **kwargs)
 
 
 class SimpleCrossingS11N5Env(CrossingEnv):
-    def __init__(self):
-        super().__init__(size=11, num_crossings=5, obstacle_type=Wall)
+    def __init__(self, **kwargs):
+        super().__init__(size=11, num_crossings=5, obstacle_type=Wall, **kwargs)
 
 
 register(

+ 13 - 5
gym_minigrid/envs/distshift.py

@@ -8,7 +8,13 @@ class DistShiftEnv(MiniGridEnv):
     """
 
     def __init__(
-        self, width=9, height=7, agent_start_pos=(1, 1), agent_start_dir=0, strip2_row=2
+        self,
+        width=9,
+        height=7,
+        agent_start_pos=(1,1),
+        agent_start_dir=0,
+        strip2_row=2,
+        **kwargs
     ):
         self.agent_start_pos = agent_start_pos
         self.agent_start_dir = agent_start_dir
@@ -21,6 +27,8 @@ class DistShiftEnv(MiniGridEnv):
             max_steps=4 * width * height,
             # Set this to True for maximum speed
             see_through_walls=True,
+            see_through_walls=True,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -49,13 +57,13 @@ class DistShiftEnv(MiniGridEnv):
 
 
 class DistShift1(DistShiftEnv):
-    def __init__(self):
-        super().__init__(strip2_row=2)
+    def __init__(self, **kwargs):
+        super().__init__(strip2_row=2, **kwargs)
 
 
 class DistShift2(DistShiftEnv):
-    def __init__(self):
-        super().__init__(strip2_row=5)
+    def __init__(self, **kwargs):
+        super().__init__(strip2_row=5, **kwargs)
 
 
 register(id="MiniGrid-DistShift1-v0", entry_point="gym_minigrid.envs:DistShift1")

+ 13 - 8
gym_minigrid/envs/doorkey.py

@@ -7,10 +7,12 @@ class DoorKeyEnv(MiniGridEnv):
     Environment with a door and key, sparse reward
     """
 
-    def __init__(self, size=8, max_steps=None):
+    def __init__(self, size=8, **kwargs):
+        if 'max_steps' not in kwargs:
+            kwargs['max_steps'] = 10 * size * size
         super().__init__(
             grid_size=size,
-            max_steps=10*size*size if max_steps is None else max_steps
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -42,18 +44,21 @@ class DoorKeyEnv(MiniGridEnv):
 
 
 class DoorKeyEnv5x5(DoorKeyEnv):
-    def __init__(self, max_steps=None):
-        super().__init__(size=5, max_steps=max_steps)
+    def __init__(self, **kwargs):
+        super().__init__(size=5, **kwargs)
+
 
 
 class DoorKeyEnv6x6(DoorKeyEnv):
-    def __init__(self, max_steps=None):
-        super().__init__(size=6, max_steps=max_steps)
+    def __init__(self, **kwargs):
+        super().__init__(size=6, **kwargs)
+
 
 
 class DoorKeyEnv16x16(DoorKeyEnv):
-    def __init__(self, max_steps=None):
-        super().__init__(size=16, max_steps=max_steps)
+    def __init__(self, **kwargs):
+        super().__init__(size=16, **kwargs)
+
 
 
 register(id="MiniGrid-DoorKey-5x5-v0", entry_point="gym_minigrid.envs:DoorKeyEnv5x5")

+ 17 - 11
gym_minigrid/envs/dynamicobstacles.py

@@ -12,7 +12,12 @@ class DynamicObstaclesEnv(MiniGridEnv):
     """
 
     def __init__(
-        self, size=8, agent_start_pos=(1, 1), agent_start_dir=0, n_obstacles=4
+            self,
+            size=8,
+            agent_start_pos=(1, 1),
+            agent_start_dir=0,
+            n_obstacles=4,
+            **kwargs
     ):
         self.agent_start_pos = agent_start_pos
         self.agent_start_dir = agent_start_dir
@@ -27,6 +32,7 @@ class DynamicObstaclesEnv(MiniGridEnv):
             max_steps=4 * size * size,
             # Set this to True for maximum speed
             see_through_walls=True,
+            **kwargs
         )
         # Allow only 3 actions permitted: left, right, forward
         self.action_space = gym.spaces.Discrete(self.actions.forward + 1)
@@ -92,28 +98,28 @@ class DynamicObstaclesEnv(MiniGridEnv):
 
 
 class DynamicObstaclesEnv5x5(DynamicObstaclesEnv):
-    def __init__(self):
-        super().__init__(size=5, n_obstacles=2)
+    def __init__(self, **kwargs):
+        super().__init__(size=5, n_obstacles=2, **kwargs)
 
 
 class DynamicObstaclesRandomEnv5x5(DynamicObstaclesEnv):
-    def __init__(self):
-        super().__init__(size=5, agent_start_pos=None, n_obstacles=2)
+    def __init__(self, **kwargs):
+        super().__init__(size=5, agent_start_pos=None, n_obstacles=2, **kwargs)
 
 
 class DynamicObstaclesEnv6x6(DynamicObstaclesEnv):
-    def __init__(self):
-        super().__init__(size=6, n_obstacles=3)
+    def __init__(self, **kwargs):
+        super().__init__(size=6, n_obstacles=3, **kwargs)
 
 
 class DynamicObstaclesRandomEnv6x6(DynamicObstaclesEnv):
-    def __init__(self):
-        super().__init__(size=6, agent_start_pos=None, n_obstacles=3)
+    def __init__(self, **kwargs):
+        super().__init__(size=6, agent_start_pos=None, n_obstacles=3, **kwargs)
 
 
 class DynamicObstaclesEnv16x16(DynamicObstaclesEnv):
-    def __init__(self):
-        super().__init__(size=16, n_obstacles=8)
+    def __init__(self, **kwargs):
+        super().__init__(size=16, n_obstacles=8, **kwargs)
 
 
 register(

+ 13 - 4
gym_minigrid/envs/empty.py

@@ -12,6 +12,7 @@ class EmptyEnv(MiniGridEnv):
         size=8,
         agent_start_pos=(1, 1),
         agent_start_dir=0,
+        **kwargs
     ):
         self.agent_start_pos = agent_start_pos
         self.agent_start_dir = agent_start_dir
@@ -21,6 +22,7 @@ class EmptyEnv(MiniGridEnv):
             max_steps=4 * size * size,
             # Set this to True for maximum speed
             see_through_walls=True,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -49,8 +51,9 @@ class EmptyEnv5x5(EmptyEnv):
 
 
 class EmptyRandomEnv5x5(EmptyEnv):
-    def __init__(self):
-        super().__init__(size=5, agent_start_pos=None)
+    def __init__(self, **kwargs):
+        super().__init__(size=5, agent_start_pos=None, **kwargs)
+
 
 
 class EmptyEnv6x6(EmptyEnv):
@@ -59,8 +62,9 @@ class EmptyEnv6x6(EmptyEnv):
 
 
 class EmptyRandomEnv6x6(EmptyEnv):
-    def __init__(self):
-        super().__init__(size=6, agent_start_pos=None)
+    def __init__(self, **kwargs):
+        super().__init__(size=6, agent_start_pos=None, **kwargs)
+
 
 
 class EmptyEnv16x16(EmptyEnv):
@@ -68,6 +72,11 @@ class EmptyEnv16x16(EmptyEnv):
         super().__init__(size=16, **kwargs)
 
 
+register(
+    id='MiniGrid-Empty-5x5-v0',
+    entry_point='gym_minigrid.envs:EmptyEnv5x5'
+)
+
 register(id="MiniGrid-Empty-5x5-v0", entry_point="gym_minigrid.envs:EmptyEnv5x5")
 
 register(

+ 11 - 5
gym_minigrid/envs/fetch.py

@@ -8,7 +8,12 @@ class FetchEnv(MiniGridEnv):
     named using English text strings
     """
 
-    def __init__(self, size=8, numObjs=3):
+    def __init__(
+        self,
+        size=8,
+        numObjs=3,
+        **kwargs
+    ):
         self.numObjs = numObjs
 
         super().__init__(
@@ -16,6 +21,7 @@ class FetchEnv(MiniGridEnv):
             max_steps=5 * size**2,
             # Set this to True for maximum speed
             see_through_walls=True,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -86,13 +92,13 @@ class FetchEnv(MiniGridEnv):
 
 
 class FetchEnv5x5N2(FetchEnv):
-    def __init__(self):
-        super().__init__(size=5, numObjs=2)
+    def __init__(self, **kwargs):
+        super().__init__(size=5, numObjs=2, **kwargs)
 
 
 class FetchEnv6x6N2(FetchEnv):
-    def __init__(self):
-        super().__init__(size=6, numObjs=2)
+    def __init__(self, **kwargs):
+        super().__init__(size=6, numObjs=2, **kwargs)
 
 
 register(id="MiniGrid-Fetch-5x5-N2-v0", entry_point="gym_minigrid.envs:FetchEnv5x5N2")

+ 3 - 2
gym_minigrid/envs/fourrooms.py

@@ -9,10 +9,10 @@ class FourRoomsEnv(MiniGridEnv):
     Can specify agent and goal position, if not it set at random.
     """
 
-    def __init__(self, agent_pos=None, goal_pos=None):
+    def __init__(self, agent_pos=None, goal_pos=None, **kwargs):
         self._agent_default_pos = agent_pos
         self._goal_default_pos = goal_pos
-        super().__init__(grid_size=19, max_steps=100)
+        super().__init__(grid_size=19, max_steps=100, **kwargs)
 
     def _gen_grid(self, width, height):
         # Create the grid
@@ -64,6 +64,7 @@ class FourRoomsEnv(MiniGridEnv):
         else:
             self.place_obj(Goal())
 
+        self.mission = 'reach the goal'
         self.mission = "Reach the goal"
 
     def step(self, action):

+ 10 - 5
gym_minigrid/envs/gotodoor.py

@@ -8,7 +8,11 @@ class GoToDoorEnv(MiniGridEnv):
     named using an English text string
     """
 
-    def __init__(self, size=5):
+    def __init__(
+        self,
+        size=5,
+        **kwargs
+    ):
         assert size >= 5
 
         super().__init__(
@@ -16,6 +20,7 @@ class GoToDoorEnv(MiniGridEnv):
             max_steps=5 * size**2,
             # Set this to True for maximum speed
             see_through_walls=True,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -80,13 +85,13 @@ class GoToDoorEnv(MiniGridEnv):
 
 
 class GoToDoor8x8Env(GoToDoorEnv):
-    def __init__(self):
-        super().__init__(size=8)
+    def __init__(self, **kwargs):
+        super().__init__(size=8, **kwargs)
 
 
 class GoToDoor6x6Env(GoToDoorEnv):
-    def __init__(self):
-        super().__init__(size=6)
+    def __init__(self, **kwargs):
+        super().__init__(size=6, **kwargs)
 
 
 register(id="MiniGrid-GoToDoor-5x5-v0", entry_point="gym_minigrid.envs:GoToDoorEnv")

+ 9 - 3
gym_minigrid/envs/gotoobject.py

@@ -8,7 +8,12 @@ class GoToObjectEnv(MiniGridEnv):
     named using an English text string
     """
 
-    def __init__(self, size=6, numObjs=2):
+    def __init__(
+        self,
+        size=6,
+        numObjs=2,
+        **kwargs
+    ):
         self.numObjs = numObjs
 
         super().__init__(
@@ -16,6 +21,7 @@ class GoToObjectEnv(MiniGridEnv):
             max_steps=5 * size**2,
             # Set this to True for maximum speed
             see_through_walls=True,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -82,8 +88,8 @@ class GoToObjectEnv(MiniGridEnv):
 
 
 class GotoEnv8x8N2(GoToObjectEnv):
-    def __init__(self):
-        super().__init__(size=8, numObjs=2)
+    def __init__(self, **kwargs):
+        super().__init__(size=8, numObjs=2, **kwargs)
 
 
 register(

+ 45 - 21
gym_minigrid/envs/keycorridor.py

@@ -8,14 +8,20 @@ class KeyCorridor(RoomGrid):
     random room.
     """
 
-    def __init__(self, num_rows=3, obj_type="ball", room_size=6, seed=None):
+    def __init__(
+        self,
+        num_rows=3,
+        obj_type="ball",
+        room_size=6,
+        **kwargs
+    ):
         self.obj_type = obj_type
 
         super().__init__(
             room_size=room_size,
             num_rows=num_rows,
-            max_steps=30 * room_size**2,
-            seed=seed,
+            max_steps=30*room_size**2,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -55,34 +61,52 @@ class KeyCorridor(RoomGrid):
 
 
 class KeyCorridorS3R1(KeyCorridor):
-    def __init__(self, seed=None):
-        super().__init__(room_size=3, num_rows=1, seed=seed)
-
+    def __init__(self, **kwargs):
+        super().__init__(
+            room_size=3,
+            num_rows=1,
+            **kwargs
+        )
 
 class KeyCorridorS3R2(KeyCorridor):
-    def __init__(self, seed=None):
-        super().__init__(room_size=3, num_rows=2, seed=seed)
-
+    def __init__(self, **kwargs):
+        super().__init__(
+            room_size=3,
+            num_rows=2,
+            **kwargs
+        )
 
 class KeyCorridorS3R3(KeyCorridor):
-    def __init__(self, seed=None):
-        super().__init__(room_size=3, num_rows=3, seed=seed)
-
+    def __init__(self, **kwargs):
+        super().__init__(
+            room_size=3,
+            num_rows=3,
+            **kwargs
+        )
 
 class KeyCorridorS4R3(KeyCorridor):
-    def __init__(self, seed=None):
-        super().__init__(room_size=4, num_rows=3, seed=seed)
-
+    def __init__(self,  **kwargs):
+        super().__init__(
+            room_size=4,
+            num_rows=3,
+            **kwargs
+        )
 
 class KeyCorridorS5R3(KeyCorridor):
-    def __init__(self, seed=None):
-        super().__init__(room_size=5, num_rows=3, seed=seed)
-
+    def __init__(self, **kwargs):
+        super().__init__(
+            room_size=5,
+            num_rows=3,
+            **kwargs
+        )
 
 class KeyCorridorS6R3(KeyCorridor):
-    def __init__(self, seed=None):
-        super().__init__(room_size=6, num_rows=3, seed=seed)
-
+    def __init__(self,  **kwargs):
+        super().__init__(
+            room_size=6,
+            num_rows=3,
+            **kwargs
+        )
 
 register(
     id="MiniGrid-KeyCorridorS3R1-v0", entry_point="gym_minigrid.envs:KeyCorridorS3R1"

+ 8 - 8
gym_minigrid/envs/lavagap.py

@@ -10,14 +10,14 @@ class LavaGapEnv(MiniGridEnv):
     This environment is similar to LavaCrossing but simpler in structure.
     """
 
-    def __init__(self, size, obstacle_type=Lava, seed=None):
+    def __init__(self, size, obstacle_type=Lava, **kwargs):
         self.obstacle_type = obstacle_type
         super().__init__(
             grid_size=size,
             max_steps=4 * size * size,
             # Set this to True for maximum speed
             see_through_walls=False,
-            seed=None,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -59,18 +59,18 @@ class LavaGapEnv(MiniGridEnv):
 
 
 class LavaGapS5Env(LavaGapEnv):
-    def __init__(self):
-        super().__init__(size=5)
+    def __init__(self, **kwargs):
+        super().__init__(size=5, **kwargs)
 
 
 class LavaGapS6Env(LavaGapEnv):
-    def __init__(self):
-        super().__init__(size=6)
+    def __init__(self, **kwargs):
+        super().__init__(size=6, **kwargs)
 
 
 class LavaGapS7Env(LavaGapEnv):
-    def __init__(self):
-        super().__init__(size=7)
+    def __init__(self, **kwargs):
+        super().__init__(size=7, **kwargs)
 
 
 register(id="MiniGrid-LavaGapS5-v0", entry_point="gym_minigrid.envs:LavaGapS5Env")

+ 6 - 2
gym_minigrid/envs/lockedroom.py

@@ -22,8 +22,12 @@ class LockedRoom(MiniGridEnv):
     named using an English text string
     """
 
-    def __init__(self, size=19):
-        super().__init__(grid_size=size, max_steps=10 * size)
+    def __init__(
+        self,
+        size=19,
+        **kwargs
+    ):
+        super().__init__(grid_size=size, max_steps=10*size, **kwargs)
 
     def _gen_grid(self, width, height):
         # Create the grid

+ 16 - 16
gym_minigrid/envs/memory.py

@@ -14,17 +14,17 @@ class MemoryEnv(MiniGridEnv):
 
     def __init__(
         self,
-        seed,
         size=8,
-        random_length=False,
+        random_length=False, 
+        **kwargs
     ):
         self.random_length = random_length
         super().__init__(
-            seed=seed,
             grid_size=size,
             max_steps=5 * size**2,
             # Set this to True for maximum speed
-            see_through_walls=False,
+            see_through_walls=False, 
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -102,8 +102,8 @@ class MemoryEnv(MiniGridEnv):
 
 
 class MemoryS17Random(MemoryEnv):
-    def __init__(self, seed=None):
-        super().__init__(seed=seed, size=17, random_length=True)
+    def __init__(self, **kwargs):
+        super().__init__(size=17, random_length=True, **kwargs)
 
 
 register(
@@ -113,8 +113,8 @@ register(
 
 
 class MemoryS13Random(MemoryEnv):
-    def __init__(self, seed=None):
-        super().__init__(seed=seed, size=13, random_length=True)
+    def __init__(self, **kwargs):
+        super().__init__(size=13, random_length=True, **kwargs)
 
 
 register(
@@ -124,8 +124,8 @@ register(
 
 
 class MemoryS13(MemoryEnv):
-    def __init__(self, seed=None):
-        super().__init__(seed=seed, size=13)
+    def __init__(self, **kwargs):
+        super().__init__(size=13, **kwargs)
 
 
 register(
@@ -135,8 +135,8 @@ register(
 
 
 class MemoryS11(MemoryEnv):
-    def __init__(self, seed=None):
-        super().__init__(seed=seed, size=11)
+    def __init__(self, **kwargs):
+        super().__init__(size=11, **kwargs)
 
 
 register(
@@ -146,8 +146,8 @@ register(
 
 
 class MemoryS9(MemoryEnv):
-    def __init__(self, seed=None):
-        super().__init__(seed=seed, size=9)
+    def __init__(self, **kwargs):
+        super().__init__(size=9, **kwargs)
 
 
 register(
@@ -157,8 +157,8 @@ register(
 
 
 class MemoryS7(MemoryEnv):
-    def __init__(self, seed=None):
-        super().__init__(seed=seed, size=7)
+    def __init__(self, **kwargs):
+        super().__init__(size=7, **kwargs)
 
 
 register(

+ 95 - 41
gym_minigrid/envs/multiroom.py

@@ -1,21 +1,29 @@
-from gym_minigrid.minigrid import COLOR_NAMES, Door, Goal, Grid, MiniGridEnv, Wall
+from gym_minigrid.minigrid import *
 from gym_minigrid.register import register
 
-
 class Room:
-    def __init__(self, top, size, entryDoorPos, exitDoorPos):
+    def __init__(self,
+        top,
+        size,
+        entryDoorPos,
+        exitDoorPos
+    ):
         self.top = top
         self.size = size
         self.entryDoorPos = entryDoorPos
         self.exitDoorPos = exitDoorPos
 
-
 class MultiRoomEnv(MiniGridEnv):
     """
     Environment with multiple rooms (subgoals)
     """
 
-    def __init__(self, minNumRooms, maxNumRooms, maxRoomSize=10):
+    def __init__(self,
+        minNumRooms,
+        maxNumRooms,
+        maxRoomSize=10,
+        **kwargs
+    ):
         assert minNumRooms > 0
         assert maxNumRooms >= minNumRooms
         assert maxRoomSize >= 4
@@ -26,18 +34,25 @@ class MultiRoomEnv(MiniGridEnv):
 
         self.rooms = []
 
-        super().__init__(grid_size=25, max_steps=self.maxNumRooms * 20)
+        super(MultiRoomEnv, self).__init__(
+            grid_size=25,
+            max_steps=self.maxNumRooms * 20,
+            **kwargs
+        )
 
     def _gen_grid(self, width, height):
         roomList = []
 
         # Choose a random number of rooms to generate
-        numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)
+        numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms+1)
 
         while len(roomList) < numRooms:
             curRoomList = []
 
-            entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))
+            entryDoorPos = (
+                self._rand_int(0, width - 2),
+                self._rand_int(0, width - 2)
+            )
 
             # Recursively place the rooms
             self._placeRoom(
@@ -46,7 +61,7 @@ class MultiRoomEnv(MiniGridEnv):
                 minSz=4,
                 maxSz=self.maxRoomSize,
                 entryDoorWall=2,
-                entryDoorPos=entryDoorPos,
+                entryDoorPos=entryDoorPos
             )
 
             if len(curRoomList) > len(roomList):
@@ -92,7 +107,7 @@ class MultiRoomEnv(MiniGridEnv):
                 self.grid.set(*room.entryDoorPos, entryDoor)
                 prevDoorColor = doorColor
 
-                prevRoom = roomList[idx - 1]
+                prevRoom = roomList[idx-1]
                 prevRoom.exitDoorPos = room.entryDoorPos
 
         # Randomize the starting agent position and direction
@@ -101,12 +116,20 @@ class MultiRoomEnv(MiniGridEnv):
         # Place the final goal in the last room
         self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
 
-        self.mission = "traverse the rooms to get to the goal"
-
-    def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall, entryDoorPos):
+        self.mission = 'traverse the rooms to get to the goal'
+
+    def _placeRoom(
+        self,
+        numLeft,
+        roomList,
+        minSz,
+        maxSz,
+        entryDoorWall,
+        entryDoorPos
+    ):
         # Choose the room size randomly
-        sizeX = self._rand_int(minSz, maxSz + 1)
-        sizeY = self._rand_int(minSz, maxSz + 1)
+        sizeX = self._rand_int(minSz, maxSz+1)
+        sizeY = self._rand_int(minSz, maxSz+1)
 
         # The first room will be at the door position
         if len(roomList) == 0:
@@ -142,18 +165,22 @@ class MultiRoomEnv(MiniGridEnv):
 
         # If the room intersects with previous rooms, can't place it here
         for room in roomList[:-1]:
-            nonOverlap = (
-                topX + sizeX < room.top[0]
-                or room.top[0] + room.size[0] <= topX
-                or topY + sizeY < room.top[1]
-                or room.top[1] + room.size[1] <= topY
-            )
+            nonOverlap = \
+                topX + sizeX < room.top[0] or \
+                room.top[0] + room.size[0] <= topX or \
+                topY + sizeY < room.top[1] or \
+                room.top[1] + room.size[1] <= topY
 
             if not nonOverlap:
                 return False
 
         # Add this room to the list
-        roomList.append(Room((topX, topY), (sizeX, sizeY), entryDoorPos, None))
+        roomList.append(Room(
+            (topX, topY),
+            (sizeX, sizeY),
+            entryDoorPos,
+            None
+        ))
 
         # If this was the last room, stop
         if numLeft == 1:
@@ -163,7 +190,7 @@ class MultiRoomEnv(MiniGridEnv):
         for i in range(0, 8):
 
             # Pick which wall to place the out door on
-            wallSet = {0, 1, 2, 3}
+            wallSet = set((0, 1, 2, 3))
             wallSet.remove(entryDoorWall)
             exitDoorWall = self._rand_elem(sorted(wallSet))
             nextEntryWall = (exitDoorWall + 2) % 4
@@ -171,16 +198,28 @@ class MultiRoomEnv(MiniGridEnv):
             # Pick the exit door position
             # Exit on right wall
             if exitDoorWall == 0:
-                exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))
+                exitDoorPos = (
+                    topX + sizeX - 1,
+                    topY + self._rand_int(1, sizeY - 1)
+                )
             # Exit on south wall
             elif exitDoorWall == 1:
-                exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
+                exitDoorPos = (
+                    topX + self._rand_int(1, sizeX - 1),
+                    topY + sizeY - 1
+                )
             # Exit on left wall
             elif exitDoorWall == 2:
-                exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))
+                exitDoorPos = (
+                    topX,
+                    topY + self._rand_int(1, sizeY - 1)
+                )
             # Exit on north wall
             elif exitDoorWall == 3:
-                exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)
+                exitDoorPos = (
+                    topX + self._rand_int(1, sizeX - 1),
+                    topY
+                )
             else:
                 assert False
 
@@ -191,7 +230,7 @@ class MultiRoomEnv(MiniGridEnv):
                 minSz=minSz,
                 maxSz=maxSz,
                 entryDoorWall=nextEntryWall,
-                entryDoorPos=exitDoorPos,
+                entryDoorPos=exitDoorPos
             )
 
             if success:
@@ -199,28 +238,43 @@ class MultiRoomEnv(MiniGridEnv):
 
         return True
 
-
 class MultiRoomEnvN2S4(MultiRoomEnv):
-    def __init__(self):
-        super().__init__(minNumRooms=2, maxNumRooms=2, maxRoomSize=4)
-
+    def __init__(self, **kwargs):
+        super().__init__(
+            minNumRooms=2,
+            maxNumRooms=2,
+            maxRoomSize=4,
+            **kwargs
+        )
 
 class MultiRoomEnvN4S5(MultiRoomEnv):
-    def __init__(self):
-        super().__init__(minNumRooms=4, maxNumRooms=4, maxRoomSize=5)
-
+    def __init__(self, **kwargs):
+        super().__init__(
+            minNumRooms=4,
+            maxNumRooms=4,
+            maxRoomSize=5,
+            **kwargs
+        )
 
 class MultiRoomEnvN6(MultiRoomEnv):
-    def __init__(self):
-        super().__init__(minNumRooms=6, maxNumRooms=6)
-
+    def __init__(self, **kwargs):
+        super().__init__(
+            minNumRooms=6,
+            maxNumRooms=6,
+            **kwargs
+        )
 
 register(
-    id="MiniGrid-MultiRoom-N2-S4-v0", entry_point="gym_minigrid.envs:MultiRoomEnvN2S4"
+    id='MiniGrid-MultiRoom-N2-S4-v0',
+    entry_point='gym_minigrid.envs:MultiRoomEnvN2S4'
 )
 
 register(
-    id="MiniGrid-MultiRoom-N4-S5-v0", entry_point="gym_minigrid.envs:MultiRoomEnvN4S5"
+    id='MiniGrid-MultiRoom-N4-S5-v0',
+    entry_point='gym_minigrid.envs:MultiRoomEnvN4S5'
 )
 
-register(id="MiniGrid-MultiRoom-N6-v0", entry_point="gym_minigrid.envs:MultiRoomEnvN6")
+register(
+    id='MiniGrid-MultiRoom-N6-v0',
+    entry_point='gym_minigrid.envs:MultiRoomEnvN6'
+)

+ 64 - 87
gym_minigrid/envs/obstructedmaze.py

@@ -1,7 +1,6 @@
-from gym_minigrid.minigrid import COLOR_NAMES, DIR_TO_VEC, Ball, Box, Key
-from gym_minigrid.register import register
+from gym_minigrid.minigrid import *
 from gym_minigrid.roomgrid import RoomGrid
-
+from gym_minigrid.register import register
 
 class ObstructedMazeEnv(RoomGrid):
     """
@@ -9,16 +8,21 @@ class ObstructedMazeEnv(RoomGrid):
     doors may be obstructed by a ball and keys may be hidden in boxes.
     """
 
-    def __init__(self, num_rows, num_cols, num_rooms_visited, seed=None):
+    def __init__(self,
+        num_rows,
+        num_cols,
+        num_rooms_visited,
+        **kwargs
+    ):
         room_size = 6
-        max_steps = 4 * num_rooms_visited * room_size**2
+        max_steps = 4*num_rooms_visited*room_size**2
 
         super().__init__(
             room_size=room_size,
             num_rows=num_rows,
             num_cols=num_cols,
-            max_steps=max_steps,
-            seed=seed,
+            max_steps=max_steps,        
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -45,16 +49,7 @@ class ObstructedMazeEnv(RoomGrid):
 
         return obs, reward, done, info
 
-    def add_door(
-        self,
-        i,
-        j,
-        door_idx=0,
-        color=None,
-        locked=False,
-        key_in_box=False,
-        blocked=False,
-    ):
+    def add_door(self, i, j, door_idx=0, color=None, locked=False, key_in_box=False, blocked=False):
         """
         Add a door. If the door must be locked, it also adds the key.
         If the key must be hidden, it is put in a box. If the door must
@@ -66,8 +61,8 @@ class ObstructedMazeEnv(RoomGrid):
         if blocked:
             vec = DIR_TO_VEC[door_idx]
             blocking_ball = Ball(self.blocking_ball_color) if blocked else None
-            self.grid.set(door_pos[0] - vec[0], door_pos[1] - vec[1], blocking_ball)
-
+            self.grid.set(door_pos[0]-vec[0], door_pos[1]-vec[1], blocking_ball)
+            
         if locked:
             obj = Key(door.color)
             if key_in_box:
@@ -78,45 +73,41 @@ class ObstructedMazeEnv(RoomGrid):
 
         return door, door_pos
 
-
 class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
     """
     A blue ball is hidden in a 2x1 maze. A locked door separates
     rooms. Doors are obstructed by a ball and keys are hidden in boxes.
     """
 
-    def __init__(self, key_in_box=True, blocked=True, seed=None):
+    def __init__(self, key_in_box=True, blocked=True, **kwargs):
         self.key_in_box = key_in_box
         self.blocked = blocked
 
-        super().__init__(num_rows=1, num_cols=2, num_rooms_visited=2, seed=seed)
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            num_rooms_visited=2,
+            **kwargs
+        )
 
     def _gen_grid(self, width, height):
         super()._gen_grid(width, height)
 
-        self.add_door(
-            0,
-            0,
-            door_idx=0,
-            color=self.door_colors[0],
-            locked=True,
-            key_in_box=self.key_in_box,
-            blocked=self.blocked,
-        )
+        self.add_door(0, 0, door_idx=0, color=self.door_colors[0],
+                      locked=True,
+                      key_in_box=self.key_in_box,
+                      blocked=self.blocked)
 
         self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color)
         self.place_agent(0, 0)
 
-
 class ObstructedMaze_1Dl(ObstructedMaze_1Dlhb):
-    def __init__(self, seed=None):
-        super().__init__(False, False, seed)
-
+    def __init__(self, **kwargs):
+        super().__init__(False, False, **kwargs)
 
 class ObstructedMaze_1Dlh(ObstructedMaze_1Dlhb):
-    def __init__(self, seed=None):
-        super().__init__(True, False, seed)
-
+    def __init__(self, **kwargs):
+        super().__init__(True, False, **kwargs)
 
 class ObstructedMaze_Full(ObstructedMazeEnv):
     """
@@ -125,22 +116,18 @@ class ObstructedMaze_Full(ObstructedMazeEnv):
     boxes.
     """
 
-    def __init__(
-        self,
-        agent_room=(1, 1),
-        key_in_box=True,
-        blocked=True,
-        num_quarters=4,
-        num_rooms_visited=25,
-        seed=None,
-    ):
+    def __init__(self, agent_room=(1, 1), key_in_box=True, blocked=True,
+                 num_quarters=4, num_rooms_visited=25, **kwargs):
         self.agent_room = agent_room
         self.key_in_box = key_in_box
         self.blocked = blocked
         self.num_quarters = num_quarters
 
         super().__init__(
-            num_rows=3, num_cols=3, num_rooms_visited=num_rooms_visited, seed=seed
+            num_rows=3,
+            num_cols=3,
+            num_rooms_visited=num_rooms_visited,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -149,99 +136,89 @@ class ObstructedMaze_Full(ObstructedMazeEnv):
         middle_room = (1, 1)
         # Define positions of "side rooms" i.e. rooms that are neither
         # corners nor the center.
-        side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][: self.num_quarters]
+        side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][:self.num_quarters]
         for i in range(len(side_rooms)):
             side_room = side_rooms[i]
 
             # Add a door between the center room and the side room
-            self.add_door(
-                *middle_room, door_idx=i, color=self.door_colors[i], locked=False
-            )
+            self.add_door(*middle_room, door_idx=i, color=self.door_colors[i], locked=False)
 
             for k in [-1, 1]:
                 # Add a door to each side of the side room
-                self.add_door(
-                    *side_room,
-                    locked=True,
-                    door_idx=(i + k) % 4,
-                    color=self.door_colors[(i + k) % len(self.door_colors)],
-                    key_in_box=self.key_in_box,
-                    blocked=self.blocked
-                )
-
-        corners = [(2, 0), (2, 2), (0, 2), (0, 0)][: self.num_quarters]
+                self.add_door(*side_room, locked=True,
+                              door_idx=(i+k)%4,
+                              color=self.door_colors[(i+k)%len(self.door_colors)],
+                              key_in_box=self.key_in_box,
+                              blocked=self.blocked)
+
+        corners = [(2, 0), (2, 2), (0, 2), (0, 0)][:self.num_quarters]
         ball_room = self._rand_elem(corners)
 
         self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color)
         self.place_agent(*self.agent_room)
 
-
 class ObstructedMaze_2Dl(ObstructedMaze_Full):
-    def __init__(self, seed=None):
-        super().__init__((2, 1), False, False, 1, 4, seed)
-
+    def __init__(self, **kwargs):
+        super().__init__((2, 1), False, False, 1, 4, **kwargs)
 
 class ObstructedMaze_2Dlh(ObstructedMaze_Full):
-    def __init__(self, seed=None):
-        super().__init__((2, 1), True, False, 1, 4, seed)
+    def __init__(self, **kwargs):
+        super().__init__((2, 1), True, False, 1, 4, **kwargs)
 
 
 class ObstructedMaze_2Dlhb(ObstructedMaze_Full):
-    def __init__(self, seed=None):
-        super().__init__((2, 1), True, True, 1, 4, seed)
-
+    def __init__(self, **kwargs):
+        super().__init__((2, 1), True, True, 1, 4, **kwargs)
 
 class ObstructedMaze_1Q(ObstructedMaze_Full):
-    def __init__(self, seed=None):
-        super().__init__((1, 1), True, True, 1, 5, seed)
-
+    def __init__(self, **kwargs):
+        super().__init__((1, 1), True, True, 1, 5, **kwargs)
 
 class ObstructedMaze_2Q(ObstructedMaze_Full):
-    def __init__(self, seed=None):
-        super().__init__((1, 1), True, True, 2, 11, seed)
-
+    def __init__(self, **kwargs):
+        super().__init__((1, 1), True, True, 2, 11, **kwargs)
 
 register(
     id="MiniGrid-ObstructedMaze-1Dl-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_1Dl",
+    entry_point="gym_minigrid.envs:ObstructedMaze_1Dl"
 )
 
 register(
     id="MiniGrid-ObstructedMaze-1Dlh-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_1Dlh",
+    entry_point="gym_minigrid.envs:ObstructedMaze_1Dlh"
 )
 
 register(
     id="MiniGrid-ObstructedMaze-1Dlhb-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_1Dlhb",
+    entry_point="gym_minigrid.envs:ObstructedMaze_1Dlhb"
 )
 
 register(
     id="MiniGrid-ObstructedMaze-2Dl-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_2Dl",
+    entry_point="gym_minigrid.envs:ObstructedMaze_2Dl"
 )
 
 register(
     id="MiniGrid-ObstructedMaze-2Dlh-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_2Dlh",
+    entry_point="gym_minigrid.envs:ObstructedMaze_2Dlh"
 )
 
 register(
     id="MiniGrid-ObstructedMaze-2Dlhb-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_2Dlhb",
+    entry_point="gym_minigrid.envs:ObstructedMaze_2Dlhb"
 )
 
 register(
     id="MiniGrid-ObstructedMaze-1Q-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_1Q",
+    entry_point="gym_minigrid.envs:ObstructedMaze_1Q"
 )
 
 register(
     id="MiniGrid-ObstructedMaze-2Q-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_2Q",
+    entry_point="gym_minigrid.envs:ObstructedMaze_2Q"
 )
 
 register(
     id="MiniGrid-ObstructedMaze-Full-v0",
-    entry_point="gym_minigrid.envs:ObstructedMaze_Full",
-)
+    entry_point="gym_minigrid.envs:ObstructedMaze_Full"
+)

+ 2 - 2
gym_minigrid/envs/playground_v0.py

@@ -8,8 +8,8 @@ class PlaygroundV0(MiniGridEnv):
     This environment has no specific goals or rewards.
     """
 
-    def __init__(self):
-        super().__init__(grid_size=19, max_steps=100)
+    def __init__(self, **kwargs):
+        super().__init__(grid_size=19, max_steps=100, **kwargs)
 
     def _gen_grid(self, width, height):
         # Create the grid

+ 31 - 25
gym_minigrid/envs/putnear.py

@@ -1,21 +1,26 @@
-from gym_minigrid.minigrid import COLOR_NAMES, Ball, Box, Grid, Key, MiniGridEnv
+from gym_minigrid.minigrid import *
 from gym_minigrid.register import register
 
-
 class PutNearEnv(MiniGridEnv):
     """
     Environment in which the agent is instructed to place an object near
     another object through a natural language string.
     """
 
-    def __init__(self, size=6, numObjs=2):
+    def __init__(
+        self,
+        size=6,
+        numObjs=2, 
+        **kwargs
+    ):
         self.numObjs = numObjs
 
         super().__init__(
             grid_size=size,
-            max_steps=5 * size,
+            max_steps=5*size,
             # Set this to True for maximum speed
-            see_through_walls=True,
+            see_through_walls=True, 
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -23,12 +28,12 @@ class PutNearEnv(MiniGridEnv):
 
         # Generate the surrounding walls
         self.grid.horz_wall(0, 0)
-        self.grid.horz_wall(0, height - 1)
+        self.grid.horz_wall(0, height-1)
         self.grid.vert_wall(0, 0)
-        self.grid.vert_wall(width - 1, 0)
+        self.grid.vert_wall(width-1, 0)
 
         # Types and colors of objects we can generate
-        types = ["key", "ball", "box"]
+        types = ['key', 'ball', 'box']
 
         objs = []
         objPos = []
@@ -50,11 +55,11 @@ class PutNearEnv(MiniGridEnv):
             if (objType, objColor) in objs:
                 continue
 
-            if objType == "key":
+            if objType == 'key':
                 obj = Key(objColor)
-            elif objType == "ball":
+            elif objType == 'ball':
                 obj = Ball(objColor)
-            elif objType == "box":
+            elif objType == 'box':
                 obj = Box(objColor)
 
             pos = self.place_obj(obj, reject_fn=near_obj)
@@ -78,11 +83,11 @@ class PutNearEnv(MiniGridEnv):
         self.target_type, self.target_color = objs[targetIdx]
         self.target_pos = objPos[targetIdx]
 
-        self.mission = "put the {} {} near the {} {}".format(
+        self.mission = 'put the %s %s near the %s %s' % (
             self.moveColor,
             self.move_type,
             self.target_color,
-            self.target_type,
+            self.target_type
         )
 
     def step(self, action):
@@ -96,10 +101,7 @@ class PutNearEnv(MiniGridEnv):
 
         # If we picked up the wrong object, terminate the episode
         if action == self.actions.pickup and self.carrying:
-            if (
-                self.carrying.type != self.move_type
-                or self.carrying.color != self.moveColor
-            ):
+            if self.carrying.type != self.move_type or self.carrying.color != self.moveColor:
                 done = True
 
         # If successfully dropping an object near the target
@@ -111,12 +113,16 @@ class PutNearEnv(MiniGridEnv):
 
         return obs, reward, done, info
 
-
 class PutNear8x8N3(PutNearEnv):
-    def __init__(self):
-        super().__init__(size=8, numObjs=3)
-
-
-register(id="MiniGrid-PutNear-6x6-N2-v0", entry_point="gym_minigrid.envs:PutNearEnv")
-
-register(id="MiniGrid-PutNear-8x8-N3-v0", entry_point="gym_minigrid.envs:PutNear8x8N3")
+    def __init__(self, **kwargs):
+        super().__init__(size=8, numObjs=3, **kwargs)
+
+register(
+    id='MiniGrid-PutNear-6x6-N2-v0',
+    entry_point='gym_minigrid.envs:PutNearEnv'
+)
+
+register(
+    id='MiniGrid-PutNear-8x8-N3-v0',
+    entry_point='gym_minigrid.envs:PutNear8x8N3'
+)

+ 19 - 15
gym_minigrid/envs/redbluedoors.py

@@ -1,7 +1,6 @@
-from gym_minigrid.minigrid import Door, Grid, MiniGridEnv
+from gym_minigrid.minigrid import *
 from gym_minigrid.register import register
 
-
 class RedBlueDoorEnv(MiniGridEnv):
     """
     Single room with red and blue doors on opposite sides.
@@ -9,31 +8,36 @@ class RedBlueDoorEnv(MiniGridEnv):
     obtain a reward.
     """
 
-    def __init__(self, size=8):
+    def __init__(self, size=8, **kwargs):
         self.size = size
 
-        super().__init__(width=2 * size, height=size, max_steps=20 * size * size)
+        super().__init__(
+            width=2*size,
+            height=size,
+            max_steps=20*size*size,
+            **kwargs
+        )
 
     def _gen_grid(self, width, height):
         # Create an empty grid
         self.grid = Grid(width, height)
 
         # Generate the grid walls
-        self.grid.wall_rect(0, 0, 2 * self.size, self.size)
-        self.grid.wall_rect(self.size // 2, 0, self.size, self.size)
+        self.grid.wall_rect(0, 0, 2*self.size, self.size)
+        self.grid.wall_rect(self.size//2, 0, self.size, self.size)
 
         # Place the agent in the top-left corner
-        self.place_agent(top=(self.size // 2, 0), size=(self.size, self.size))
+        self.place_agent(top=(self.size//2, 0), size=(self.size, self.size))
 
         # Add a red door at a random position in the left wall
         pos = self._rand_int(1, self.size - 1)
         self.red_door = Door("red")
-        self.grid.set(self.size // 2, pos, self.red_door)
+        self.grid.set(self.size//2, pos, self.red_door)
 
         # Add a blue door at a random position in the right wall
         pos = self._rand_int(1, self.size - 1)
         self.blue_door = Door("blue")
-        self.grid.set(self.size // 2 + self.size - 1, pos, self.blue_door)
+        self.grid.set(self.size//2 + self.size - 1, pos, self.blue_door)
 
         # Generate the mission string
         self.mission = "open the red door then the blue door"
@@ -62,16 +66,16 @@ class RedBlueDoorEnv(MiniGridEnv):
 
         return obs, reward, done, info
 
-
 class RedBlueDoorEnv6x6(RedBlueDoorEnv):
-    def __init__(self):
-        super().__init__(size=6)
-
+    def __init__(self, **kwargs):
+        super().__init__(size=6, **kwargs)
 
 register(
-    id="MiniGrid-RedBlueDoors-6x6-v0", entry_point="gym_minigrid.envs:RedBlueDoorEnv6x6"
+    id='MiniGrid-RedBlueDoors-6x6-v0',
+    entry_point='gym_minigrid.envs:RedBlueDoorEnv6x6'
 )
 
 register(
-    id="MiniGrid-RedBlueDoors-8x8-v0", entry_point="gym_minigrid.envs:RedBlueDoorEnv"
+    id='MiniGrid-RedBlueDoors-8x8-v0',
+    entry_point='gym_minigrid.envs:RedBlueDoorEnv'
 )

+ 10 - 8
gym_minigrid/envs/unlock.py

@@ -1,20 +1,20 @@
-from gym_minigrid.register import register
+from gym_minigrid.minigrid import Ball
 from gym_minigrid.roomgrid import RoomGrid
-
+from gym_minigrid.register import register
 
 class Unlock(RoomGrid):
     """
     Unlock a door
     """
 
-    def __init__(self, seed=None):
+    def __init__(self, **kwargs):
         room_size = 6
         super().__init__(
             num_rows=1,
             num_cols=2,
             room_size=room_size,
-            max_steps=8 * room_size**2,
-            seed=seed,
+            max_steps=8*room_size**2,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -23,7 +23,7 @@ class Unlock(RoomGrid):
         # Make sure the two rooms are directly connected by a locked door
         door, _ = self.add_door(0, 0, 0, locked=True)
         # Add a key to unlock the door
-        self.add_object(0, 0, "key", door.color)
+        self.add_object(0, 0, 'key', door.color)
 
         self.place_agent(0, 0)
 
@@ -40,5 +40,7 @@ class Unlock(RoomGrid):
 
         return obs, reward, done, info
 
-
-register(id="MiniGrid-Unlock-v0", entry_point="gym_minigrid.envs:Unlock")
+register(
+    id='MiniGrid-Unlock-v0',
+    entry_point='gym_minigrid.envs:Unlock'
+)

+ 11 - 9
gym_minigrid/envs/unlockpickup.py

@@ -1,20 +1,20 @@
-from gym_minigrid.register import register
+from gym_minigrid.minigrid import Ball
 from gym_minigrid.roomgrid import RoomGrid
-
+from gym_minigrid.register import register
 
 class UnlockPickup(RoomGrid):
     """
     Unlock a door, then pick up a box in another room
     """
 
-    def __init__(self, seed=None):
+    def __init__(self, **kwargs):
         room_size = 6
         super().__init__(
             num_rows=1,
             num_cols=2,
             room_size=room_size,
-            max_steps=8 * room_size**2,
-            seed=seed,
+            max_steps=8*room_size**2,
+            **kwargs
         )
 
     def _gen_grid(self, width, height):
@@ -25,12 +25,12 @@ class UnlockPickup(RoomGrid):
         # Make sure the two rooms are directly connected by a locked door
         door, _ = self.add_door(0, 0, 0, locked=True)
         # Add a key to unlock the door
-        self.add_object(0, 0, "key", door.color)
+        self.add_object(0, 0, 'key', door.color)
 
         self.place_agent(0, 0)
 
         self.obj = obj
-        self.mission = f"pick up the {obj.color} {obj.type}"
+        self.mission = "pick up the %s %s" % (obj.color, obj.type)
 
     def step(self, action):
         obs, reward, done, info = super().step(action)
@@ -42,5 +42,7 @@ class UnlockPickup(RoomGrid):
 
         return obs, reward, done, info
 
-
-register(id="MiniGrid-UnlockPickup-v0", entry_point="gym_minigrid.envs:UnlockPickup")
+register(
+    id='MiniGrid-UnlockPickup-v0',
+    entry_point='gym_minigrid.envs:UnlockPickup'
+)

+ 221 - 177
gym_minigrid/minigrid.py

@@ -1,65 +1,61 @@
-import hashlib
 import math
-from enum import IntEnum
-
+import hashlib
+import string
 import gym
+from enum import IntEnum
 import numpy as np
-from gym import spaces
-from gym.utils import seeding
-
-from gym_minigrid.rendering import (
-    downsample,
-    fill_coords,
-    highlight_img,
-    point_in_circle,
-    point_in_line,
-    point_in_rect,
-    point_in_triangle,
-    rotate_fn,
-)
+from gym import error, spaces, utils
+from .rendering import *
 
 # Size in pixels of a tile in the full-scale human view
 TILE_PIXELS = 32
 
 # Map of color names to RGB values
 COLORS = {
-    "red": np.array([255, 0, 0]),
-    "green": np.array([0, 255, 0]),
-    "blue": np.array([0, 0, 255]),
-    "purple": np.array([112, 39, 195]),
-    "yellow": np.array([255, 255, 0]),
-    "grey": np.array([100, 100, 100]),
+    'red': np.array([255, 0, 0]),
+    'green': np.array([0, 255, 0]),
+    'blue': np.array([0, 0, 255]),
+    'purple': np.array([112, 39, 195]),
+    'yellow': np.array([255, 255, 0]),
+    'grey': np.array([100, 100, 100])
 }
 
 COLOR_NAMES = sorted(list(COLORS.keys()))
 
 # Used to map colors to integers
-COLOR_TO_IDX = {"red": 0, "green": 1, "blue": 2, "purple": 3, "yellow": 4, "grey": 5}
+COLOR_TO_IDX = {
+    'red': 0,
+    'green': 1,
+    'blue': 2,
+    'purple': 3,
+    'yellow': 4,
+    'grey': 5
+}
 
 IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
 
 # Map of object type to integers
 OBJECT_TO_IDX = {
-    "unseen": 0,
-    "empty": 1,
-    "wall": 2,
-    "floor": 3,
-    "door": 4,
-    "key": 5,
-    "ball": 6,
-    "box": 7,
-    "goal": 8,
-    "lava": 9,
-    "agent": 10,
+    'unseen': 0,
+    'empty': 1,
+    'wall': 2,
+    'floor': 3,
+    'door': 4,
+    'key': 5,
+    'ball': 6,
+    'box': 7,
+    'goal': 8,
+    'lava': 9,
+    'agent': 10,
 }
 
 IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
 
 # Map of state names to integers
 STATE_TO_IDX = {
-    "open": 0,
-    "closed": 1,
-    "locked": 2,
+    'open': 0,
+    'closed': 1,
+    'locked': 2,
 }
 
 # Map of agent direction indices to vectors
@@ -124,28 +120,28 @@ class WorldObj:
         obj_type = IDX_TO_OBJECT[type_idx]
         color = IDX_TO_COLOR[color_idx]
 
-        if obj_type == "empty" or obj_type == "unseen":
+        if obj_type == 'empty' or obj_type == 'unseen':
             return None
 
         # State, 0: open, 1: closed, 2: locked
         is_open = state == 0
         is_locked = state == 2
 
-        if obj_type == "wall":
+        if obj_type == 'wall':
             v = Wall(color)
-        elif obj_type == "floor":
+        elif obj_type == 'floor':
             v = Floor(color)
-        elif obj_type == "ball":
+        elif obj_type == 'ball':
             v = Ball(color)
-        elif obj_type == "key":
+        elif obj_type == 'key':
             v = Key(color)
-        elif obj_type == "box":
+        elif obj_type == 'box':
             v = Box(color)
-        elif obj_type == "door":
+        elif obj_type == 'door':
             v = Door(color, is_open, is_locked)
-        elif obj_type == "goal":
+        elif obj_type == 'goal':
             v = Goal()
-        elif obj_type == "lava":
+        elif obj_type == 'lava':
             v = Lava()
         else:
             assert False, "unknown object type in decode '%s'" % obj_type
@@ -159,7 +155,7 @@ class WorldObj:
 
 class Goal(WorldObj):
     def __init__(self):
-        super().__init__("goal", "green")
+        super().__init__('goal', 'green')
 
     def can_overlap(self):
         return True
@@ -173,8 +169,8 @@ class Floor(WorldObj):
     Colored floor tile the agent can walk over
     """
 
-    def __init__(self, color="blue"):
-        super().__init__("floor", color)
+    def __init__(self, color='blue'):
+        super().__init__('floor', color)
 
     def can_overlap(self):
         return True
@@ -187,7 +183,7 @@ class Floor(WorldObj):
 
 class Lava(WorldObj):
     def __init__(self):
-        super().__init__("lava", "red")
+        super().__init__('lava', 'red')
 
     def can_overlap(self):
         return True
@@ -202,15 +198,19 @@ class Lava(WorldObj):
         for i in range(3):
             ylo = 0.3 + 0.2 * i
             yhi = 0.4 + 0.2 * i
-            fill_coords(img, point_in_line(0.1, ylo, 0.3, yhi, r=0.03), (0, 0, 0))
-            fill_coords(img, point_in_line(0.3, yhi, 0.5, ylo, r=0.03), (0, 0, 0))
-            fill_coords(img, point_in_line(0.5, ylo, 0.7, yhi, r=0.03), (0, 0, 0))
-            fill_coords(img, point_in_line(0.7, yhi, 0.9, ylo, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(
+                0.1, ylo, 0.3, yhi, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(
+                0.3, yhi, 0.5, ylo, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(
+                0.5, ylo, 0.7, yhi, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(
+                0.7, yhi, 0.9, ylo, r=0.03), (0, 0, 0))
 
 
 class Wall(WorldObj):
-    def __init__(self, color="grey"):
-        super().__init__("wall", color)
+    def __init__(self, color='grey'):
+        super().__init__('wall', color)
 
     def see_behind(self):
         return False
@@ -221,7 +221,7 @@ class Wall(WorldObj):
 
 class Door(WorldObj):
     def __init__(self, color, is_open=False, is_locked=False):
-        super().__init__("door", color)
+        super().__init__('door', color)
         self.is_open = is_open
         self.is_locked = is_locked
 
@@ -268,7 +268,8 @@ class Door(WorldObj):
         # Door frame and door
         if self.is_locked:
             fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
-            fill_coords(img, point_in_rect(0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c))
+            fill_coords(img, point_in_rect(
+                0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c))
 
             # Draw key slot
             fill_coords(img, point_in_rect(0.52, 0.75, 0.50, 0.56), c)
@@ -283,8 +284,8 @@ class Door(WorldObj):
 
 
 class Key(WorldObj):
-    def __init__(self, color="blue"):
-        super().__init__("key", color)
+    def __init__(self, color='blue'):
+        super(Key, self).__init__('key', color)
 
     def can_pickup(self):
         return True
@@ -305,8 +306,8 @@ class Key(WorldObj):
 
 
 class Ball(WorldObj):
-    def __init__(self, color="blue"):
-        super().__init__("ball", color)
+    def __init__(self, color='blue'):
+        super(Ball, self).__init__('ball', color)
 
     def can_pickup(self):
         return True
@@ -317,7 +318,7 @@ class Ball(WorldObj):
 
 class Box(WorldObj):
     def __init__(self, color, contains=None):
-        super().__init__("box", color)
+        super(Box, self).__init__('box', color)
         self.contains = contains
 
     def can_pickup(self):
@@ -381,7 +382,6 @@ class Grid:
 
     def copy(self):
         from copy import deepcopy
-
         return deepcopy(self)
 
     def set(self, i, j, v):
@@ -408,9 +408,9 @@ class Grid:
 
     def wall_rect(self, x, y, w, h):
         self.horz_wall(x, y, w)
-        self.horz_wall(x, y + h - 1, w)
+        self.horz_wall(x, y+h-1, w)
         self.vert_wall(x, y, h)
-        self.vert_wall(x + w - 1, y, h)
+        self.vert_wall(x+w-1, y, h)
 
     def rotate_left(self):
         """
@@ -438,7 +438,8 @@ class Grid:
                 x = topX + i
                 y = topY + j
 
-                if x >= 0 and x < self.width and y >= 0 and y < self.height:
+                if x >= 0 and x < self.width and \
+                   y >= 0 and y < self.height:
                     v = self.get(x, y)
                 else:
                     v = Wall()
@@ -449,7 +450,12 @@ class Grid:
 
     @classmethod
     def render_tile(
-        cls, obj, agent_dir=None, highlight=False, tile_size=TILE_PIXELS, subdivs=3
+        cls,
+        obj,
+        agent_dir=None,
+        highlight=False,
+        tile_size=TILE_PIXELS,
+        subdivs=3
     ):
         """
         Render a tile and cache the result
@@ -462,15 +468,14 @@ class Grid:
         if key in cls.tile_cache:
             return cls.tile_cache[key]
 
-        img = np.zeros(
-            shape=(tile_size * subdivs, tile_size * subdivs, 3), dtype=np.uint8
-        )
+        img = np.zeros(shape=(tile_size * subdivs,
+                       tile_size * subdivs, 3), dtype=np.uint8)
 
         # Draw the grid lines (top and left edges)
         fill_coords(img, point_in_rect(0, 0.031, 0, 1), (100, 100, 100))
         fill_coords(img, point_in_rect(0, 1, 0, 0.031), (100, 100, 100))
 
-        if obj is not None:
+        if obj != None:
             obj.render(img)
 
         # Overlay the agent on top
@@ -482,7 +487,8 @@ class Grid:
             )
 
             # Rotate the agent based on its direction
-            tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5, theta=0.5 * math.pi * agent_dir)
+            tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5,
+                               theta=0.5*math.pi*agent_dir)
             fill_coords(img, tri_fn, (255, 0, 0))
 
         # Highlight the cell if needed
@@ -497,7 +503,13 @@ class Grid:
 
         return img
 
-    def render(self, tile_size, agent_pos=None, agent_dir=None, highlight_mask=None):
+    def render(
+        self,
+        tile_size,
+        agent_pos=None,
+        agent_dir=None,
+        highlight_mask=None
+    ):
         """
         Render this grid at a given scale
         :param r: target renderer object
@@ -505,7 +517,8 @@ class Grid:
         """
 
         if highlight_mask is None:
-            highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool)
+            highlight_mask = np.zeros(
+                shape=(self.width, self.height), dtype=bool)
 
         # Compute the total grid size
         width_px = self.width * tile_size
@@ -523,13 +536,13 @@ class Grid:
                     cell,
                     agent_dir=agent_dir if agent_here else None,
                     highlight=highlight_mask[i, j],
-                    tile_size=tile_size,
+                    tile_size=tile_size
                 )
 
                 ymin = j * tile_size
-                ymax = (j + 1) * tile_size
+                ymax = (j+1) * tile_size
                 xmin = i * tile_size
-                xmax = (i + 1) * tile_size
+                xmax = (i+1) * tile_size
                 img[ymin:ymax, xmin:xmax, :] = tile_img
 
         return img
@@ -542,7 +555,7 @@ class Grid:
         if vis_mask is None:
             vis_mask = np.ones((self.width, self.height), dtype=bool)
 
-        array = np.zeros((self.width, self.height, 3), dtype="uint8")
+        array = np.zeros((self.width, self.height, 3), dtype='uint8')
 
         for i in range(self.width):
             for j in range(self.height):
@@ -550,7 +563,7 @@ class Grid:
                     v = self.get(i, j)
 
                     if v is None:
-                        array[i, j, 0] = OBJECT_TO_IDX["empty"]
+                        array[i, j, 0] = OBJECT_TO_IDX['empty']
                         array[i, j, 1] = 0
                         array[i, j, 2] = 0
 
@@ -576,7 +589,7 @@ class Grid:
                 type_idx, color_idx, state = array[i, j]
                 v = WorldObj.decode(type_idx, color_idx, state)
                 grid.set(i, j, v)
-                vis_mask[i, j] = type_idx != OBJECT_TO_IDX["unseen"]
+                vis_mask[i, j] = (type_idx != OBJECT_TO_IDX['unseen'])
 
         return grid, vis_mask
 
@@ -586,7 +599,7 @@ class Grid:
         mask[agent_pos[0], agent_pos[1]] = True
 
         for j in reversed(range(0, grid.height)):
-            for i in range(0, grid.width - 1):
+            for i in range(0, grid.width-1):
                 if not mask[i, j]:
                     continue
 
@@ -594,10 +607,10 @@ class Grid:
                 if cell and not cell.see_behind():
                     continue
 
-                mask[i + 1, j] = True
+                mask[i+1, j] = True
                 if j > 0:
-                    mask[i + 1, j - 1] = True
-                    mask[i, j - 1] = True
+                    mask[i+1, j-1] = True
+                    mask[i, j-1] = True
 
             for i in reversed(range(1, grid.width)):
                 if not mask[i, j]:
@@ -607,10 +620,10 @@ class Grid:
                 if cell and not cell.see_behind():
                     continue
 
-                mask[i - 1, j] = True
+                mask[i-1, j] = True
                 if j > 0:
-                    mask[i - 1, j - 1] = True
-                    mask[i, j - 1] = True
+                    mask[i-1, j-1] = True
+                    mask[i, j-1] = True
 
         for j in range(0, grid.height):
             for i in range(0, grid.width):
@@ -625,7 +638,13 @@ class MiniGridEnv(gym.Env):
     2D grid world game environment
     """
 
-    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 10}
+    metadata = {
+        # Deprecated: use 'render_modes' instead
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 10,  # Deprecated: use 'render_fps' instead
+        'render_modes': ['human', 'rgb_array'],
+        'render_fps': 10
+    }
 
     # Enumeration of possible actions
     class Actions(IntEnum):
@@ -651,12 +670,13 @@ class MiniGridEnv(gym.Env):
         height=None,
         max_steps=100,
         see_through_walls=False,
-        seed=1337,
         agent_view_size=7,
+        render_mode=None,
+        **kwargs
     ):
         # Can't set both grid_size and width/height
         if grid_size:
-            assert width is None and height is None
+            assert width == None and height == None
             width = grid_size
             height = grid_size
 
@@ -677,9 +697,18 @@ class MiniGridEnv(gym.Env):
             low=0,
             high=255,
             shape=(self.agent_view_size, self.agent_view_size, 3),
-            dtype="uint8",
+            dtype='uint8'
         )
-        self.observation_space = spaces.Dict({"image": self.observation_space})
+        self.observation_space = spaces.Dict({
+            'image': self.observation_space,
+            'direction': spaces.Discrete(4),
+            'mission': spaces.Text(max_length=200,
+                                   charset=string.ascii_letters + string.digits + ' .,!-'
+                                   )
+        })
+
+        # render mode
+        self.render_mode = render_mode
 
         # Range of possible rewards
         self.reward_range = (0, 1)
@@ -697,20 +726,16 @@ class MiniGridEnv(gym.Env):
         self.agent_pos = None
         self.agent_dir = None
 
-        # Initialize the RNG
-        self.seed(seed=seed)
-
         # Initialize the state
         self.reset()
 
-    def reset(self):
+    def reset(self, *, seed=None, return_info=False, options=None):
+        super().reset(seed=seed)
         # Current position and direction of the agent
         self.agent_pos = None
         self.agent_dir = None
 
         # Generate a new random grid at the start of each episode
-        # To keep the same grid for each episode, call env.seed() with
-        # the same seed before calling env.reset()
         self._gen_grid(self.width, self.height)
 
         # These fields should be defined by _gen_grid
@@ -731,20 +756,16 @@ class MiniGridEnv(gym.Env):
         obs = self.gen_obs()
         return obs
 
-    def seed(self, seed=1337):
-        # Seed the random number generator
-        self.np_random, _ = seeding.np_random(seed)
-        return [seed]
-
     def hash(self, size=16):
         """Compute a hash that uniquely identifies the current state of the environment.
         :param size: Size of the hashing
         """
         sample_hash = hashlib.sha256()
 
-        to_encode = [self.grid.encode().tolist(), self.agent_pos, self.agent_dir]
+        to_encode = [self.grid.encode().tolist(), self.agent_pos,
+                     self.agent_dir]
         for item in to_encode:
-            sample_hash.update(str(item).encode("utf8"))
+            sample_hash.update(str(item).encode('utf8'))
 
         return sample_hash.hexdigest()[:size]
 
@@ -761,20 +782,28 @@ class MiniGridEnv(gym.Env):
 
         # Map of object types to short string
         OBJECT_TO_STR = {
-            "wall": "W",
-            "floor": "F",
-            "door": "D",
-            "key": "K",
-            "ball": "A",
-            "box": "B",
-            "goal": "G",
-            "lava": "V",
+            'wall': 'W',
+            'floor': 'F',
+            'door': 'D',
+            'key': 'K',
+            'ball': 'A',
+            'box': 'B',
+            'goal': 'G',
+            'lava': 'V',
         }
 
+        # Short string for opened door
+        OPENDED_DOOR_IDS = '_'
+
         # Map agent's direction to short string
-        AGENT_DIR_TO_STR = {0: ">", 1: "V", 2: "<", 3: "^"}
+        AGENT_DIR_TO_STR = {
+            0: '>',
+            1: 'V',
+            2: '<',
+            3: '^'
+        }
 
-        str = ""
+        str = ''
 
         for j in range(self.grid.height):
 
@@ -785,23 +814,23 @@ class MiniGridEnv(gym.Env):
 
                 c = self.grid.get(i, j)
 
-                if c is None:
-                    str += "  "
+                if c == None:
+                    str += '  '
                     continue
 
-                if c.type == "door":
+                if c.type == 'door':
                     if c.is_open:
-                        str += "__"
+                        str += '__'
                     elif c.is_locked:
-                        str += "L" + c.color[0].upper()
+                        str += 'L' + c.color[0].upper()
                     else:
-                        str += "D" + c.color[0].upper()
+                        str += 'D' + c.color[0].upper()
                     continue
 
                 str += OBJECT_TO_STR[c.type] + c.color[0].upper()
 
             if j < self.grid.height - 1:
-                str += "\n"
+                str += '\n'
 
         return str
 
@@ -820,7 +849,7 @@ class MiniGridEnv(gym.Env):
         Generate random integer in [low,high[
         """
 
-        return self.np_random.randint(low, high)
+        return self.np_random.integers(low, high)
 
     def _rand_float(self, low, high):
         """
@@ -834,7 +863,7 @@ class MiniGridEnv(gym.Env):
         Generate random boolean value
         """
 
-        return self.np_random.randint(0, 2) == 0
+        return (self.np_random.integers(0, 2) == 0)
 
     def _rand_elem(self, iterable):
         """
@@ -875,11 +904,17 @@ class MiniGridEnv(gym.Env):
         """
 
         return (
-            self.np_random.randint(xLow, xHigh),
-            self.np_random.randint(yLow, yHigh),
+            self.np_random.integers(xLow, xHigh),
+            self.np_random.integers(yLow, yHigh)
         )
 
-    def place_obj(self, obj, top=None, size=None, reject_fn=None, max_tries=math.inf):
+    def place_obj(self,
+                  obj,
+                  top=None,
+                  size=None,
+                  reject_fn=None,
+                  max_tries=math.inf
+                  ):
         """
         Place an object at an empty position in the grid
 
@@ -902,19 +937,17 @@ class MiniGridEnv(gym.Env):
             # This is to handle with rare cases where rejection sampling
             # gets stuck in an infinite loop
             if num_tries > max_tries:
-                raise RecursionError("rejection sampling failed in place_obj")
+                raise RecursionError('rejection sampling failed in place_obj')
 
             num_tries += 1
 
-            pos = np.array(
-                (
-                    self._rand_int(top[0], min(top[0] + size[0], self.grid.width)),
-                    self._rand_int(top[1], min(top[1] + size[1], self.grid.height)),
-                )
-            )
+            pos = np.array((
+                self._rand_int(top[0], min(top[0] + size[0], self.grid.width)),
+                self._rand_int(top[1], min(top[1] + size[1], self.grid.height))
+            ))
 
             # Don't place the object on top of another object
-            if self.grid.get(*pos) is not None:
+            if self.grid.get(*pos) != None:
                 continue
 
             # Don't place the object where the agent is
@@ -944,7 +977,13 @@ class MiniGridEnv(gym.Env):
         obj.init_pos = (i, j)
         obj.cur_pos = (i, j)
 
-    def place_agent(self, top=None, size=None, rand_dir=True, max_tries=math.inf):
+    def place_agent(
+        self,
+        top=None,
+        size=None,
+        rand_dir=True,
+        max_tries=math.inf
+    ):
         """
         Set the agent's starting point at an empty position in the grid
         """
@@ -999,46 +1038,49 @@ class MiniGridEnv(gym.Env):
         # Compute the absolute coordinates of the top-left view corner
         sz = self.agent_view_size
         hs = self.agent_view_size // 2
-        tx = ax + (dx * (sz - 1)) - (rx * hs)
-        ty = ay + (dy * (sz - 1)) - (ry * hs)
+        tx = ax + (dx * (sz-1)) - (rx * hs)
+        ty = ay + (dy * (sz-1)) - (ry * hs)
 
         lx = i - tx
         ly = j - ty
 
         # Project the coordinates of the object relative to the top-left
         # corner onto the agent's own coordinate system
-        vx = rx * lx + ry * ly
-        vy = -(dx * lx + dy * ly)
+        vx = (rx*lx + ry*ly)
+        vy = -(dx*lx + dy*ly)
 
         return vx, vy
 
-    def get_view_exts(self):
+    def get_view_exts(self, agent_view_size=None):
         """
         Get the extents of the square set of tiles visible to the agent
         Note: the bottom extent indices are not included in the set
+        if agent_view_size is None, use self.agent_view_size
         """
 
+        agent_view_size = agent_view_size or self.agent_view_size
+
         # Facing right
         if self.agent_dir == 0:
             topX = self.agent_pos[0]
-            topY = self.agent_pos[1] - self.agent_view_size // 2
+            topY = self.agent_pos[1] - agent_view_size // 2
         # Facing down
         elif self.agent_dir == 1:
-            topX = self.agent_pos[0] - self.agent_view_size // 2
+            topX = self.agent_pos[0] - agent_view_size // 2
             topY = self.agent_pos[1]
         # Facing left
         elif self.agent_dir == 2:
-            topX = self.agent_pos[0] - self.agent_view_size + 1
-            topY = self.agent_pos[1] - self.agent_view_size // 2
+            topX = self.agent_pos[0] - agent_view_size + 1
+            topY = self.agent_pos[1] - agent_view_size // 2
         # Facing up
         elif self.agent_dir == 3:
-            topX = self.agent_pos[0] - self.agent_view_size // 2
-            topY = self.agent_pos[1] - self.agent_view_size + 1
+            topX = self.agent_pos[0] - agent_view_size // 2
+            topY = self.agent_pos[1] - agent_view_size + 1
         else:
             assert False, "invalid agent direction"
 
-        botX = topX + self.agent_view_size
-        botY = topY + self.agent_view_size
+        botX = topX + agent_view_size
+        botY = topY + agent_view_size
 
         return (topX, topY, botX, botY)
 
@@ -1072,7 +1114,7 @@ class MiniGridEnv(gym.Env):
         vx, vy = coordinates
 
         obs = self.gen_obs()
-        obs_grid, _ = Grid.decode(obs["image"])
+        obs_grid, _ = Grid.decode(obs['image'])
         obs_cell = obs_grid.get(vx, vy)
         world_cell = self.grid.get(x, y)
 
@@ -1102,12 +1144,12 @@ class MiniGridEnv(gym.Env):
 
         # Move forward
         elif action == self.actions.forward:
-            if fwd_cell is None or fwd_cell.can_overlap():
+            if fwd_cell == None or fwd_cell.can_overlap():
                 self.agent_pos = fwd_pos
-            if fwd_cell is not None and fwd_cell.type == "goal":
+            if fwd_cell != None and fwd_cell.type == 'goal':
                 done = True
                 reward = self._reward()
-            if fwd_cell is not None and fwd_cell.type == "lava":
+            if fwd_cell != None and fwd_cell.type == 'lava':
                 done = True
 
         # Pick up an object
@@ -1144,16 +1186,19 @@ class MiniGridEnv(gym.Env):
 
         return obs, reward, done, {}
 
-    def gen_obs_grid(self):
+    def gen_obs_grid(self, agent_view_size=None):
         """
         Generate the sub-grid observed by the agent.
         This method also outputs a visibility mask telling us which grid
         cells the agent can actually see.
+        if agent_view_size is None, self.agent_view_size is used
         """
 
-        topX, topY, botX, botY = self.get_view_exts()
+        topX, topY, botX, botY = self.get_view_exts(agent_view_size)
 
-        grid = self.grid.slice(topX, topY, self.agent_view_size, self.agent_view_size)
+        agent_view_size = agent_view_size or self.agent_view_size
+
+        grid = self.grid.slice(topX, topY, agent_view_size, agent_view_size)
 
         for i in range(self.agent_dir + 1):
             grid = grid.rotate_left()
@@ -1161,9 +1206,8 @@ class MiniGridEnv(gym.Env):
         # Process occluders and visibility
         # Note that this incurs some performance cost
         if not self.see_through_walls:
-            vis_mask = grid.process_vis(
-                agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1)
-            )
+            vis_mask = grid.process_vis(agent_pos=(
+                agent_view_size // 2, agent_view_size - 1))
         else:
             vis_mask = np.ones(shape=(grid.width, grid.height), dtype=bool)
 
@@ -1189,18 +1233,21 @@ class MiniGridEnv(gym.Env):
         image = grid.encode(vis_mask)
 
         assert hasattr(
-            self, "mission"
-        ), "environments must define a textual mission string"
+            self, 'mission'), "environments must define a textual mission string"
 
         # Observations are dictionaries containing:
         # - an image (partially observable view of the environment)
         # - the agent's direction/orientation (acting as a compass)
         # - a textual mission string (instructions for the agent)
-        obs = {"image": image, "direction": self.agent_dir, "mission": self.mission}
+        obs = {
+            'image': image,
+            'direction': self.agent_dir,
+            'mission': self.mission
+        }
 
         return obs
 
-    def get_obs_render(self, obs, tile_size=TILE_PIXELS // 2):
+    def get_obs_render(self, obs, tile_size=TILE_PIXELS//2):
         """
         Render an agent observation for visualization
         """
@@ -1212,25 +1259,25 @@ class MiniGridEnv(gym.Env):
             tile_size,
             agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1),
             agent_dir=3,
-            highlight_mask=vis_mask,
+            highlight_mask=vis_mask
         )
 
         return img
 
-    def render(self, mode="human", close=False, highlight=True, tile_size=TILE_PIXELS):
+    def render(self, mode='human', close=False, highlight=True, tile_size=TILE_PIXELS):
         """
         Render the whole-grid human view
         """
-
+        if self.render_mode is not None:
+            mode = self.render_mode
         if close:
             if self.window:
                 self.window.close()
             return
 
-        if mode == "human" and not self.window:
+        if mode == 'human' and not self.window:
             import gym_minigrid.window
-
-            self.window = gym_minigrid.window.Window("gym_minigrid")
+            self.window = gym_minigrid.window.Window('gym_minigrid')
             self.window.show(block=False)
 
         # Compute which cells are visible to the agent
@@ -1240,11 +1287,8 @@ class MiniGridEnv(gym.Env):
         # of the agent's view area
         f_vec = self.dir_vec
         r_vec = self.right_vec
-        top_left = (
-            self.agent_pos
-            + f_vec * (self.agent_view_size - 1)
-            - r_vec * (self.agent_view_size // 2)
-        )
+        top_left = self.agent_pos + f_vec * \
+            (self.agent_view_size-1) - r_vec * (self.agent_view_size // 2)
 
         # Mask of which cells to highlight
         highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool)
@@ -1272,10 +1316,10 @@ class MiniGridEnv(gym.Env):
             tile_size,
             self.agent_pos,
             self.agent_dir,
-            highlight_mask=highlight_mask if highlight else None,
+            highlight_mask=highlight_mask if highlight else None
         )
 
-        if mode == "human":
+        if mode == 'human':
             self.window.set_caption(self.mission)
             self.window.show_img(img)
 

+ 47 - 42
gym_minigrid/roomgrid.py

@@ -1,5 +1,4 @@
-from gym_minigrid.minigrid import COLOR_NAMES, Ball, Box, Door, Grid, Key, MiniGridEnv
-
+from .minigrid import *
 
 def reject_next_to(env, pos):
     """
@@ -12,9 +11,12 @@ def reject_next_to(env, pos):
     d = abs(sx - x) + abs(sy - y)
     return d < 2
 
-
 class Room:
-    def __init__(self, top, size):
+    def __init__(
+        self,
+        top,
+        size
+    ):
         # Top-left corner and size (tuples)
         self.top = top
         self.size = size
@@ -37,7 +39,10 @@ class Room:
     def rand_pos(self, env):
         topX, topY = self.top
         sizeX, sizeY = self.size
-        return env._randPos(topX + 1, topX + sizeX - 1, topY + 1, topY + sizeY - 1)
+        return env._randPos(
+            topX + 1, topX + sizeX - 1,
+            topY + 1, topY + sizeY - 1
+        )
 
     def pos_inside(self, x, y):
         """
@@ -55,7 +60,6 @@ class Room:
 
         return True
 
-
 class RoomGrid(MiniGridEnv):
     """
     Environment with multiple rooms and random objects.
@@ -68,8 +72,8 @@ class RoomGrid(MiniGridEnv):
         num_rows=3,
         num_cols=3,
         max_steps=100,
-        seed=0,
         agent_view_size=7,
+        **kwargs
     ):
         assert room_size > 0
         assert room_size >= 3
@@ -83,15 +87,15 @@ class RoomGrid(MiniGridEnv):
         width = (room_size - 1) * num_cols + 1
 
         # By default, this environment has no mission
-        self.mission = ""
+        self.mission = ''
 
         super().__init__(
             width=width,
             height=height,
             max_steps=max_steps,
             see_through_walls=False,
-            seed=seed,
             agent_view_size=agent_view_size,
+            **kwargs
         )
 
     def room_from_pos(self, x, y):
@@ -100,8 +104,8 @@ class RoomGrid(MiniGridEnv):
         assert x >= 0
         assert y >= 0
 
-        i = x // (self.room_size - 1)
-        j = y // (self.room_size - 1)
+        i = x // (self.room_size-1)
+        j = y // (self.room_size-1)
 
         assert i < self.num_cols
         assert j < self.num_rows
@@ -126,8 +130,8 @@ class RoomGrid(MiniGridEnv):
             # For each column of rooms
             for i in range(0, self.num_cols):
                 room = Room(
-                    (i * (self.room_size - 1), j * (self.room_size - 1)),
-                    (self.room_size, self.room_size),
+                    (i * (self.room_size-1), j * (self.room_size-1)),
+                    (self.room_size, self.room_size)
                 )
                 row.append(room)
 
@@ -143,29 +147,26 @@ class RoomGrid(MiniGridEnv):
                 room = self.room_grid[j][i]
 
                 x_l, y_l = (room.top[0] + 1, room.top[1] + 1)
-                x_m, y_m = (
-                    room.top[0] + room.size[0] - 1,
-                    room.top[1] + room.size[1] - 1,
-                )
+                x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1)
 
                 # Door positions, order is right, down, left, up
                 if i < self.num_cols - 1:
-                    room.neighbors[0] = self.room_grid[j][i + 1]
+                    room.neighbors[0] = self.room_grid[j][i+1]
                     room.door_pos[0] = (x_m, self._rand_int(y_l, y_m))
                 if j < self.num_rows - 1:
-                    room.neighbors[1] = self.room_grid[j + 1][i]
+                    room.neighbors[1] = self.room_grid[j+1][i]
                     room.door_pos[1] = (self._rand_int(x_l, x_m), y_m)
                 if i > 0:
-                    room.neighbors[2] = self.room_grid[j][i - 1]
+                    room.neighbors[2] = self.room_grid[j][i-1]
                     room.door_pos[2] = room.neighbors[2].door_pos[0]
                 if j > 0:
-                    room.neighbors[3] = self.room_grid[j - 1][i]
+                    room.neighbors[3] = self.room_grid[j-1][i]
                     room.door_pos[3] = room.neighbors[3].door_pos[1]
 
         # The agent starts in the middle, facing right
         self.agent_pos = (
-            (self.num_cols // 2) * (self.room_size - 1) + (self.room_size // 2),
-            (self.num_rows // 2) * (self.room_size - 1) + (self.room_size // 2),
+            (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2),
+            (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2)
         )
         self.agent_dir = 0
 
@@ -177,7 +178,11 @@ class RoomGrid(MiniGridEnv):
         room = self.get_room(i, j)
 
         pos = self.place_obj(
-            obj, room.top, room.size, reject_fn=reject_next_to, max_tries=1000
+            obj,
+            room.top,
+            room.size,
+            reject_fn=reject_next_to,
+            max_tries=1000
         )
 
         room.objs.append(obj)
@@ -189,19 +194,19 @@ class RoomGrid(MiniGridEnv):
         Add a new object to room (i, j)
         """
 
-        if kind is None:
-            kind = self._rand_elem(["key", "ball", "box"])
+        if kind == None:
+            kind = self._rand_elem(['key', 'ball', 'box'])
 
-        if color is None:
+        if color == None:
             color = self._rand_color()
 
         # TODO: we probably want to add an Object.make helper function
-        assert kind in ["key", "ball", "box"]
-        if kind == "key":
+        assert kind in ['key', 'ball', 'box']
+        if kind == 'key':
             obj = Key(color)
-        elif kind == "ball":
+        elif kind == 'ball':
             obj = Ball(color)
-        elif kind == "box":
+        elif kind == 'box':
             obj = Box(color)
 
         return self.place_in_room(i, j, obj)
@@ -213,7 +218,7 @@ class RoomGrid(MiniGridEnv):
 
         room = self.get_room(i, j)
 
-        if door_idx is None:
+        if door_idx == None:
             # Need to make sure that there is a neighbor along this wall
             # and that there is not already a door
             while True:
@@ -221,7 +226,7 @@ class RoomGrid(MiniGridEnv):
                 if room.neighbors[door_idx] and room.doors[door_idx] is None:
                     break
 
-        if color is None:
+        if color == None:
             color = self._rand_color()
 
         if locked is None:
@@ -238,7 +243,7 @@ class RoomGrid(MiniGridEnv):
 
         neighbor = room.neighbors[door_idx]
         room.doors[door_idx] = door
-        neighbor.doors[(door_idx + 2) % 4] = door
+        neighbor.doors[(door_idx+2) % 4] = door
 
         return door, pos
 
@@ -276,16 +281,16 @@ class RoomGrid(MiniGridEnv):
 
         # Mark the rooms as connected
         room.doors[wall_idx] = True
-        neighbor.doors[(wall_idx + 2) % 4] = True
+        neighbor.doors[(wall_idx+2) % 4] = True
 
     def place_agent(self, i=None, j=None, rand_dir=True):
         """
         Place the agent in a room
         """
 
-        if i is None:
+        if i == None:
             i = self._rand_int(0, self.num_cols)
-        if j is None:
+        if j == None:
             j = self._rand_int(0, self.num_rows)
 
         room = self.room_grid[j][i]
@@ -294,7 +299,7 @@ class RoomGrid(MiniGridEnv):
         while True:
             super().place_agent(room.top, room.size, rand_dir, max_tries=1000)
             front_cell = self.grid.get(*self.front_pos)
-            if front_cell is None or front_cell.type == "wall":
+            if front_cell is None or front_cell.type == 'wall':
                 break
 
         return self.agent_pos
@@ -328,7 +333,7 @@ class RoomGrid(MiniGridEnv):
             # This is to handle rare situations where random sampling produces
             # a level that cannot be connected, producing in an infinite loop
             if num_itrs > max_itrs:
-                raise RecursionError("connect_all failed")
+                raise RecursionError('connect_all failed')
             num_itrs += 1
 
             # If all rooms are reachable, stop
@@ -372,7 +377,7 @@ class RoomGrid(MiniGridEnv):
 
         while len(dists) < num_distractors:
             color = self._rand_elem(COLOR_NAMES)
-            type = self._rand_elem(["key", "ball", "box"])
+            type = self._rand_elem(['key', 'ball', 'box'])
             obj = (type, color)
 
             if all_unique and obj in objs:
@@ -381,9 +386,9 @@ class RoomGrid(MiniGridEnv):
             # Add the object to a random room if no room specified
             room_i = i
             room_j = j
-            if room_i is None:
+            if room_i == None:
                 room_i = self._rand_int(0, self.num_cols)
-            if room_j is None:
+            if room_j == None:
                 room_j = self._rand_int(0, self.num_rows)
 
             dist, pos = self.add_object(room_i, room_j, *obj)

+ 1 - 1
gym_minigrid/window.py

@@ -21,7 +21,7 @@ class Window:
         self.fig, self.ax = plt.subplots()
 
         # Show the env name in the window title
-        self.fig.canvas.set_window_title(title)
+        self.fig.canvas.manager.set_window_title(title)
 
         # Turn off x/y axis numbering/ticks
         self.ax.xaxis.set_ticks_position("none")

+ 194 - 81
gym_minigrid/wrappers.py

@@ -2,14 +2,13 @@ import math
 import operator
 from functools import reduce
 
-import gym
 import numpy as np
-from gym import spaces
-
-from gym_minigrid.minigrid import COLOR_TO_IDX, OBJECT_TO_IDX, STATE_TO_IDX, Goal
+import gym
+from gym import error, spaces, utils
+from .minigrid import OBJECT_TO_IDX, COLOR_TO_IDX, STATE_TO_IDX, Goal
 
 
-class ReseedWrapper(gym.core.Wrapper):
+class ReseedWrapper(gym.Wrapper):
     """
     Wrapper to always regenerate an environment with the same set of seeds.
     This can be used to force an environment to always keep the same
@@ -24,15 +23,14 @@ class ReseedWrapper(gym.core.Wrapper):
     def reset(self, **kwargs):
         seed = self.seeds[self.seed_idx]
         self.seed_idx = (self.seed_idx + 1) % len(self.seeds)
-        self.env.seed(seed)
-        return self.env.reset(**kwargs)
+        return self.env.reset(seed=seed, **kwargs)
 
     def step(self, action):
         obs, reward, done, info = self.env.step(action)
         return obs, reward, done, info
 
 
-class ActionBonus(gym.core.Wrapper):
+class ActionBonus(gym.Wrapper):
     """
     Wrapper which adds an exploration bonus.
     This is a reward to encourage exploration of less
@@ -67,7 +65,7 @@ class ActionBonus(gym.core.Wrapper):
         return self.env.reset(**kwargs)
 
 
-class StateBonus(gym.core.Wrapper):
+class StateBonus(gym.Wrapper):
     """
     Adds an exploration bonus based on which positions
     are visited on the grid.
@@ -83,7 +81,7 @@ class StateBonus(gym.core.Wrapper):
         # Tuple based on which we index the counts
         # We use the position after an update
         env = self.unwrapped
-        tup = tuple(env.agent_pos)
+        tup = (tuple(env.agent_pos))
 
         # Get the count for this key
         pre_count = 0
@@ -103,20 +101,20 @@ class StateBonus(gym.core.Wrapper):
         return self.env.reset(**kwargs)
 
 
-class ImgObsWrapper(gym.core.ObservationWrapper):
+class ImgObsWrapper(gym.ObservationWrapper):
     """
     Use the image as the only observation output, no language/mission.
     """
 
     def __init__(self, env):
         super().__init__(env)
-        self.observation_space = env.observation_space.spaces["image"]
+        self.observation_space = env.observation_space.spaces['image']
 
     def observation(self, obs):
-        return obs["image"]
+        return obs['image']
 
 
-class OneHotPartialObsWrapper(gym.core.ObservationWrapper):
+class OneHotPartialObsWrapper(gym.ObservationWrapper):
     """
     Wrapper to get a one-hot encoding of a partially observable
     agent view as observation.
@@ -127,18 +125,24 @@ class OneHotPartialObsWrapper(gym.core.ObservationWrapper):
 
         self.tile_size = tile_size
 
-        obs_shape = env.observation_space["image"].shape
+        obs_shape = env.observation_space['image'].shape
 
         # Number of bits per cell
         num_bits = len(OBJECT_TO_IDX) + len(COLOR_TO_IDX) + len(STATE_TO_IDX)
 
-        self.observation_space.spaces["image"] = spaces.Box(
-            low=0, high=255, shape=(obs_shape[0], obs_shape[1], num_bits), dtype="uint8"
+        new_image_space = spaces.Box(
+            low=0,
+            high=255,
+            shape=(obs_shape[0], obs_shape[1], num_bits),
+            dtype='uint8'
         )
+        self.observation_space = spaces.Dict(
+            {**self.observation_space.spaces, 'image': new_image_space})
 
     def observation(self, obs):
-        img = obs["image"]
-        out = np.zeros(self.observation_space.spaces["image"].shape, dtype="uint8")
+        img = obs['image']
+        out = np.zeros(
+            self.observation_space.spaces['image'].shape, dtype='uint8')
 
         for i in range(img.shape[0]):
             for j in range(img.shape[1]):
@@ -150,13 +154,17 @@ class OneHotPartialObsWrapper(gym.core.ObservationWrapper):
                 out[i, j, len(OBJECT_TO_IDX) + color] = 1
                 out[i, j, len(OBJECT_TO_IDX) + len(COLOR_TO_IDX) + state] = 1
 
-        return {**obs, "image": out}
+        return {
+            **obs,
+            'image': out
+        }
 
 
-class RGBImgObsWrapper(gym.core.ObservationWrapper):
+class RGBImgObsWrapper(gym.ObservationWrapper):
     """
     Wrapper to use fully observable RGB image as observation,
     This can be used to have the agent to solve the gridworld in pixel space.
+    To use it, make the unwrapped environment with render_mode='rgb_array'.
     """
 
     def __init__(self, env, tile_size=8):
@@ -164,24 +172,32 @@ class RGBImgObsWrapper(gym.core.ObservationWrapper):
 
         self.tile_size = tile_size
 
-        self.observation_space.spaces["image"] = spaces.Box(
+        new_image_space = spaces.Box(
             low=0,
             high=255,
             shape=(self.env.width * tile_size, self.env.height * tile_size, 3),
-            dtype="uint8",
+            dtype='uint8'
         )
 
+        self.observation_space = spaces.Dict(
+            {**self.observation_space.spaces, 'image': new_image_space})
+
     def observation(self, obs):
         env = self.unwrapped
+        assert env.render_mode == 'rgb_array', env.render_mode
 
         rgb_img = env.render(
-            mode="rgb_array", highlight=False, tile_size=self.tile_size
+            highlight=False,
+            tile_size=self.tile_size
         )
 
-        return {**obs, "image": rgb_img}
+        return {
+            **obs,
+            'image': rgb_img
+        }
 
 
-class RGBImgPartialObsWrapper(gym.core.ObservationWrapper):
+class RGBImgPartialObsWrapper(gym.ObservationWrapper):
     """
     Wrapper to use partially observable RGB image as observation.
     This can be used to have the agent to solve the gridworld in pixel space.
@@ -192,23 +208,32 @@ class RGBImgPartialObsWrapper(gym.core.ObservationWrapper):
 
         self.tile_size = tile_size
 
-        obs_shape = env.observation_space.spaces["image"].shape
-        self.observation_space.spaces["image"] = spaces.Box(
+        obs_shape = env.observation_space.spaces['image'].shape
+        new_image_space = spaces.Box(
             low=0,
             high=255,
             shape=(obs_shape[0] * tile_size, obs_shape[1] * tile_size, 3),
-            dtype="uint8",
+            dtype='uint8'
         )
 
+        self.observation_space = spaces.Dict(
+            {**self.observation_space.spaces, 'image': new_image_space})
+
     def observation(self, obs):
         env = self.unwrapped
 
-        rgb_img_partial = env.get_obs_render(obs["image"], tile_size=self.tile_size)
+        rgb_img_partial = env.get_obs_render(
+            obs['image'],
+            tile_size=self.tile_size
+        )
 
-        return {**obs, "image": rgb_img_partial}
+        return {
+            **obs,
+            'image': rgb_img_partial
+        }
 
 
-class FullyObsWrapper(gym.core.ObservationWrapper):
+class FullyObsWrapper(gym.ObservationWrapper):
     """
     Fully observable gridworld using a compact grid encoding
     """
@@ -216,24 +241,107 @@ class FullyObsWrapper(gym.core.ObservationWrapper):
     def __init__(self, env):
         super().__init__(env)
 
-        self.observation_space.spaces["image"] = spaces.Box(
+        new_image_space = spaces.Box(
             low=0,
             high=255,
             shape=(self.env.width, self.env.height, 3),  # number of cells
-            dtype="uint8",
+            dtype='uint8'
         )
 
+        self.observation_space = spaces.Dict(
+            {**self.observation_space.spaces, 'image': new_image_space})
+
     def observation(self, obs):
         env = self.unwrapped
         full_grid = env.grid.encode()
-        full_grid[env.agent_pos[0]][env.agent_pos[1]] = np.array(
-            [OBJECT_TO_IDX["agent"], COLOR_TO_IDX["red"], env.agent_dir]
+        full_grid[env.agent_pos[0]][env.agent_pos[1]] = np.array([
+            OBJECT_TO_IDX['agent'],
+            COLOR_TO_IDX['red'],
+            env.agent_dir
+        ])
+
+        return {
+            **obs,
+            'image': full_grid
+        }
+
+
+class DictObservationSpaceWrapper(gym.ObservationWrapper):
+    """
+    Transforms the observation space (that has a textual component) to a fully numerical observation space,
+    where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
+    """
+
+    def __init__(self, env, max_words_in_mission=50, word_dict=None):
+        """
+        max_words_in_mission is the length of the array to represent a mission, value 0 for missing words
+        word_dict is a dictionary of words to use (keys=words, values=indices from 1 to < max_words_in_mission),
+                  if None, use the Minigrid language
+        """
+        super().__init__(env)
+
+        if word_dict is None:
+            word_dict = self.get_minigrid_words()
+
+        self.max_words_in_mission = max_words_in_mission
+        self.word_dict = word_dict
+
+        image_observation_space = spaces.Box(
+            low=0,
+            high=255,
+            shape=(self.agent_view_size, self.agent_view_size, 3),
+            dtype='uint8'
         )
+        self.observation_space = spaces.Dict({
+            'image': image_observation_space,
+            'direction': spaces.Discrete(4),
+            'mission': spaces.MultiDiscrete([len(self.word_dict.keys())]
+                                            * max_words_in_mission)
+        })
+
+    @staticmethod
+    def get_minigrid_words():
+        colors = ['red', 'green', 'blue', 'yellow', 'purple', 'grey']
+        objects = ['unseen', 'empty', 'wall', 'floor', 'box', 'key', 'ball',
+                   'door', 'goal', 'agent', 'lava']
+
+        verbs = ['pick', 'avoid', 'get', 'find', 'put',
+                 'use', 'open', 'go', 'fetch',
+                 'reach', 'unlock', 'traverse']
+
+        extra_words = ['up', 'the', 'a', 'at', ',', 'square',
+                       'and', 'then', 'to', 'of', 'rooms', 'near',
+                       'opening', 'must', 'you', 'matching', 'end',
+                       'hallway', 'object', 'from', 'room']
+
+        all_words = colors + objects + verbs + extra_words
+        assert len(all_words) == len(set(all_words))
+        return {word: i for i, word in enumerate(all_words)}
+
+    def string_to_indices(self, string, offset=1):
+        """
+        Convert a string to a list of indices.
+        """
+        indices = []
+        # adding space before and after commas
+        string = string.replace(',', ' , ')
+        for word in string.split():
+            if word in self.word_dict.keys():
+                indices.append(self.word_dict[word] + offset)
+            else:
+                raise ValueError('Unknown word: {}'.format(word))
+        return indices
 
-        return {**obs, "image": full_grid}
+    def observation(self, obs):
+        obs['mission'] = self.string_to_indices(obs['mission'])
+        assert len(obs['mission']) < self.max_words_in_mission
+        obs['mission'] += [0] * \
+            (self.max_words_in_mission - len(obs['mission']))
+
+        return obs
 
 
-class FlatObsWrapper(gym.core.ObservationWrapper):
+class FlatObsWrapper(gym.ObservationWrapper):
     """
     Encode mission strings using a one-hot scheme,
     and combine these with observed images into one flat array
@@ -245,40 +353,38 @@ class FlatObsWrapper(gym.core.ObservationWrapper):
         self.maxStrLen = maxStrLen
         self.numCharCodes = 27
 
-        imgSpace = env.observation_space.spaces["image"]
+        imgSpace = env.observation_space.spaces['image']
         imgSize = reduce(operator.mul, imgSpace.shape, 1)
 
         self.observation_space = spaces.Box(
             low=0,
             high=255,
             shape=(imgSize + self.numCharCodes * self.maxStrLen,),
-            dtype="uint8",
+            dtype='uint8'
         )
 
         self.cachedStr = None
         self.cachedArray = None
 
     def observation(self, obs):
-        image = obs["image"]
-        mission = obs["mission"]
+        image = obs['image']
+        mission = obs['mission']
 
         # Cache the last-encoded mission string
         if mission != self.cachedStr:
-            assert (
-                len(mission) <= self.maxStrLen
-            ), f"mission string too long ({len(mission)} chars)"
+            assert len(mission) <= self.maxStrLen, 'mission string too long ({} chars)'.format(
+                len(mission))
             mission = mission.lower()
 
             strArray = np.zeros(
-                shape=(self.maxStrLen, self.numCharCodes), dtype="float32"
-            )
+                shape=(self.maxStrLen, self.numCharCodes), dtype='float32')
 
             for idx, ch in enumerate(mission):
-                if ch >= "a" and ch <= "z":
-                    chNo = ord(ch) - ord("a")
-                elif ch == " ":
-                    chNo = ord("z") - ord("a") + 1
-                assert chNo < self.numCharCodes, "%s : %d" % (ch, chNo)
+                if ch >= 'a' and ch <= 'z':
+                    chNo = ord(ch) - ord('a')
+                elif ch == ' ':
+                    chNo = ord('z') - ord('a') + 1
+                assert chNo < self.numCharCodes, '%s : %d' % (ch, chNo)
                 strArray[idx, chNo] = 1
 
             self.cachedStr = mission
@@ -289,7 +395,7 @@ class FlatObsWrapper(gym.core.ObservationWrapper):
         return obs
 
 
-class ViewSizeWrapper(gym.core.Wrapper):
+class ViewSizeWrapper(gym.Wrapper):
     """
     Wrapper to customize the agent field of view size.
     This cannot be used with fully observable wrappers.
@@ -301,31 +407,41 @@ class ViewSizeWrapper(gym.core.Wrapper):
         assert agent_view_size % 2 == 1
         assert agent_view_size >= 3
 
-        # Override default view size
-        env.unwrapped.agent_view_size = agent_view_size
+        self.agent_view_size = agent_view_size
 
         # Compute observation space with specified view size
-        observation_space = gym.spaces.Box(
-            low=0, high=255, shape=(agent_view_size, agent_view_size, 3), dtype="uint8"
+        new_image_space = gym.spaces.Box(
+            low=0,
+            high=255,
+            shape=(agent_view_size, agent_view_size, 3),
+            dtype='uint8'
         )
 
-        # Override the environment's observation space
-        self.observation_space = spaces.Dict({"image": observation_space})
+        # Override the environment's observation spaceexit
+        self.observation_space = spaces.Dict(
+            {**self.observation_space.spaces, 'image': new_image_space})
 
-    def reset(self, **kwargs):
-        return self.env.reset(**kwargs)
+    def observation(self, obs):
+        env = self.unwrapped
 
-    def step(self, action):
-        return self.env.step(action)
+        grid, vis_mask = env.gen_obs_grid(self.agent_view_size)
+
+        # Encode the partially observable view into a numpy array
+        image = grid.encode(vis_mask)
 
+        return {
+            **obs,
+            'image': image
+        }
 
-class DirectionObsWrapper(gym.core.ObservationWrapper):
+
+class DirectionObsWrapper(gym.ObservationWrapper):
     """
     Provides the slope/angular direction to the goal with the observations as modeled by (y2 - y2 )/( x2 - x1)
     type = {slope , angle}
     """
 
-    def __init__(self, env, type="slope"):
+    def __init__(self, env, type='slope'):
         super().__init__(env)
         self.goal_position = None
         self.type = type
@@ -333,28 +449,23 @@ class DirectionObsWrapper(gym.core.ObservationWrapper):
     def reset(self):
         obs = self.env.reset()
         if not self.goal_position:
-            self.goal_position = [
-                x for x, y in enumerate(self.grid.grid) if isinstance(y, Goal)
-            ]
-            if (
-                len(self.goal_position) >= 1
-            ):  # in case there are multiple goals , needs to be handled for other env types
+            self.goal_position = [x for x, y in enumerate(
+                self.grid.grid) if isinstance(y, (Goal))]
+            # in case there are multiple goals , needs to be handled for other env types
+            if len(self.goal_position) >= 1:
                 self.goal_position = (
-                    int(self.goal_position[0] / self.height),
-                    self.goal_position[0] % self.width,
-                )
+                    int(self.goal_position[0]/self.height), self.goal_position[0] % self.width)
         return obs
 
     def observation(self, obs):
         slope = np.divide(
-            self.goal_position[1] - self.agent_pos[1],
-            self.goal_position[0] - self.agent_pos[0],
-        )
-        obs["goal_direction"] = np.arctan(slope) if self.type == "angle" else slope
+            self.goal_position[1] - self.agent_pos[1],  self.goal_position[0] - self.agent_pos[0])
+        obs['goal_direction'] = np.arctan(
+            slope) if self.type == 'angle' else slope
         return obs
 
 
-class SymbolicObsWrapper(gym.core.ObservationWrapper):
+class SymbolicObsWrapper(gym.ObservationWrapper):
     """
     Fully observable grid with a symbolic state representation.
     The symbol is a triple of (X, Y, IDX), where X and Y are
@@ -364,12 +475,14 @@ class SymbolicObsWrapper(gym.core.ObservationWrapper):
     def __init__(self, env):
         super().__init__(env)
 
-        self.observation_space.spaces["image"] = spaces.Box(
+        new_image_space = spaces.Box(
             low=0,
             high=max(OBJECT_TO_IDX.values()),
             shape=(self.env.width, self.env.height, 3),  # number of cells
             dtype="uint8",
         )
+        self.observation_space = spaces.Dict(
+            {**self.observation_space.spaces, 'image': new_image_space})
 
     def observation(self, obs):
         objects = np.array(
@@ -379,5 +492,5 @@ class SymbolicObsWrapper(gym.core.ObservationWrapper):
         grid = np.mgrid[:w, :h]
         grid = np.concatenate([grid, objects.reshape(1, w, h)])
         grid = np.transpose(grid, (1, 2, 0))
-        obs["image"] = grid
+        obs['image'] = grid
         return obs

+ 5 - 6
manual_control.py

@@ -8,18 +8,17 @@ from gym_minigrid.window import Window
 from gym_minigrid.wrappers import ImgObsWrapper, RGBImgPartialObsWrapper
 
 
+
 def redraw(img):
     if not args.agent_view:
-        img = env.render("rgb_array", tile_size=args.tile_size)
+        img = env.render(tile_size=args.tile_size)
 
     window.show_img(img)
 
 
 def reset():
-    if args.seed != -1:
-        env.seed(args.seed)
-
-    obs = env.reset()
+    seed = None if args.seed == -1 else args.seed
+    obs = env.reset(seed=seed)
 
     if hasattr(env, "mission"):
         print("Mission: %s" % env.mission)
@@ -95,7 +94,7 @@ parser.add_argument(
 
 args = parser.parse_args()
 
-env = gym.make(args.env)
+env = gym.make(args.env, render_mode='rgb_array')
 
 if args.agent_view:
     env = RGBImgPartialObsWrapper(env)

+ 12 - 0
py.Dockerfile

@@ -0,0 +1,12 @@
+# A Dockerfile that sets up a full Gym install with test dependencies
+ARG PYTHON_VERSION
+FROM python:$PYTHON_VERSION
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN apt-get -y update
+
+COPY . /usr/local/gym_minigrid/
+WORKDIR /usr/local/gym_minigrid/
+
+RUN pip install .[testing] --no-cache-dir

+ 3 - 0
requirements.txt

@@ -0,0 +1,3 @@
+numpy>=1.18.0
+gym>=0.25
+matplotlib>=3.0

+ 66 - 60
run_tests.py

@@ -1,48 +1,38 @@
 #!/usr/bin/env python3
 
+from pydoc import render_doc
 import random
-
-import gym
 import numpy as np
-
+import gym
 import gym_minigrid
-
-# Test specifically importing a specific environment
-from gym_minigrid.minigrid import Grid
 from gym_minigrid.register import env_list
-from gym_minigrid.wrappers import (
-    FlatObsWrapper,
-    FullyObsWrapper,
-    ImgObsWrapper,
-    OneHotPartialObsWrapper,
-    ReseedWrapper,
-    RGBImgObsWrapper,
-    RGBImgPartialObsWrapper,
-    ViewSizeWrapper,
-)
+from gym_minigrid.minigrid import Grid, OBJECT_TO_IDX
 
-##############################################################################
+# Test specifically importing a specific environment
+from gym_minigrid.envs import DoorKeyEnv
 
 # Test importing wrappers
+from gym_minigrid.wrappers import *
 
+##############################################################################
 
-print("%d environments registered" % len(env_list))
+print('%d environments registered' % len(env_list))
 
 for env_idx, env_name in enumerate(env_list):
-    print(f"testing {env_name} ({env_idx + 1}/{len(env_list)})")
+    print('testing {} ({}/{})'.format(env_name, env_idx+1, len(env_list)))
 
     # Load the gym environment
-    env = gym.make(env_name)
+    env = gym.make(env_name, render_mode='rgb_array')
     env.max_steps = min(env.max_steps, 200)
     env.reset()
-    env.render("rgb_array")
+    env.render()
 
     # Verify that the same seed always produces the same environment
     for i in range(0, 5):
         seed = 1337 + i
-        env.seed(seed)
+        _ = env.reset(seed=seed)
         grid1 = env.grid
-        env.seed(seed)
+        _ = env.reset(seed=seed)
         grid2 = env.grid
         assert grid1 == grid2
 
@@ -61,7 +51,7 @@ for env_idx, env_name in enumerate(env_list):
         assert env.agent_pos[1] < env.height
 
         # Test observation encode/decode roundtrip
-        img = obs["image"]
+        img = obs['image']
         grid, vis_mask = Grid.decode(img)
         img2 = grid.encode(vis_mask=vis_mask)
         assert np.array_equal(img, img2)
@@ -77,7 +67,7 @@ for env_idx, env_name in enumerate(env_list):
             num_episodes += 1
             env.reset()
 
-        env.render("rgb_array")
+        env.render()
 
     # Test the close method
     env.close()
@@ -100,7 +90,7 @@ for env_idx, env_name in enumerate(env_list):
     env = FullyObsWrapper(env)
     env.reset()
     obs, _, _, _ = env.step(0)
-    assert obs["image"].shape == env.observation_space.spaces["image"].shape
+    assert obs['image'].shape == env.observation_space.spaces['image'].shape
     env.close()
 
     # RGB image observation wrapper
@@ -108,7 +98,7 @@ for env_idx, env_name in enumerate(env_list):
     env = RGBImgPartialObsWrapper(env)
     env.reset()
     obs, _, _, _ = env.step(0)
-    assert obs["image"].mean() > 0
+    assert obs['image'].mean() > 0
     env.close()
 
     env = gym.make(env_name)
@@ -123,14 +113,30 @@ for env_idx, env_name in enumerate(env_list):
     env.step(0)
     env.close()
 
+    # Test the DictObservationSpaceWrapper
+    env = gym.make(env_name)
+    env = DictObservationSpaceWrapper(env)
+    env.reset()
+    mission = env.mission
+    obs, _, _, _ = env.step(0)
+    assert env.string_to_indices(mission) == [
+        value for value in obs['mission'] if value != 0]
+    env.close()
+
     # Test the wrappers return proper observation spaces.
-    wrappers = [RGBImgObsWrapper, RGBImgPartialObsWrapper, OneHotPartialObsWrapper]
+    wrappers = [
+        RGBImgObsWrapper,
+        RGBImgPartialObsWrapper,
+        OneHotPartialObsWrapper
+    ]
     for wrapper in wrappers:
-        env = wrapper(gym.make(env_name))
+        env = wrapper(gym.make(env_name, render_mode='rgb_array'))
         obs_space, wrapper_name = env.observation_space, wrapper.__name__
         assert isinstance(
-            obs_space, gym.spaces.Dict
-        ), f"Observation space for {wrapper_name} is not a Dict: {obs_space}."
+            obs_space, spaces.Dict
+        ), "Observation space for {0} is not a Dict: {1}.".format(
+            wrapper_name, obs_space
+        )
         # This should not fail either
         ImgObsWrapper(env)
         env.reset()
@@ -139,7 +145,7 @@ for env_idx, env_name in enumerate(env_list):
 
 ##############################################################################
 
-print("testing extra observations")
+print('testing extra observations')
 
 
 class EmptyEnvWithExtraObs(gym_minigrid.envs.EmptyEnv5x5):
@@ -147,20 +153,23 @@ class EmptyEnvWithExtraObs(gym_minigrid.envs.EmptyEnv5x5):
     Custom environment with an extra observation
     """
 
-    def __init__(self) -> None:
-        super().__init__()
-        self.observation_space["size"] = gym.spaces.Box(
-            low=0, high=np.iinfo(np.uint).max, shape=(2,), dtype=np.uint
+    def __init__(self, **kwargs) -> None:
+        super().__init__(**kwargs)
+        self.observation_space['size'] = spaces.Box(
+            low=0,
+            high=1000,  # gym does not like np.iinfo(np.uint).max,
+            shape=(2,),
+            dtype=np.uint
         )
 
-    def reset(self):
-        obs = super().reset()
-        obs["size"] = np.array([self.width, self.height])
+    def reset(self, **kwargs):
+        obs = super().reset(**kwargs)
+        obs['size'] = np.array([self.width, self.height], dtype=np.uint)
         return obs
 
     def step(self, action):
         obs, reward, done, info = super().step(action)
-        obs["size"] = np.array([self.width, self.height])
+        obs['size'] = np.array([self.width, self.height], dtype=np.uint)
         return obs, reward, done, info
 
 
@@ -171,37 +180,34 @@ wrappers = [
     FullyObsWrapper,
 ]
 for wrapper in wrappers:
-    env1 = wrapper(EmptyEnvWithExtraObs())
-    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0"))
-
-    env1.seed(0)
-    env2.seed(0)
-
-    obs1 = env1.reset()
-    obs2 = env2.reset()
-    assert "size" in obs1
-    assert obs1["size"].shape == (2,)
-    assert (obs1["size"] == [5, 5]).all()
+    env1 = wrapper(EmptyEnvWithExtraObs(render_mode='rgb_array'))
+    env2 = wrapper(gym.make('MiniGrid-Empty-5x5-v0', render_mode='rgb_array'))
+
+    obs1 = env1.reset(seed=0)
+    obs2 = env2.reset(seed=0)
+    assert 'size' in obs1
+    assert obs1['size'].shape == (2,)
+    assert (obs1['size'] == [5, 5]).all()
     for key in obs2:
         assert np.array_equal(obs1[key], obs2[key])
 
     obs1, reward1, done1, _ = env1.step(0)
     obs2, reward2, done2, _ = env2.step(0)
-    assert "size" in obs1
-    assert obs1["size"].shape == (2,)
-    assert (obs1["size"] == [5, 5]).all()
+    assert 'size' in obs1
+    assert obs1['size'].shape == (2,)
+    assert (obs1['size'] == [5, 5]).all()
     for key in obs2:
         assert np.array_equal(obs1[key], obs2[key])
 
 ##############################################################################
 
-print("testing agent_sees method")
-env = gym.make("MiniGrid-DoorKey-6x6-v0")
+print('testing agent_sees method')
+env = gym.make('MiniGrid-DoorKey-6x6-v0')
 goal_pos = (env.grid.width - 2, env.grid.height - 2)
 
 # Test the "in" operator on grid objects
-assert ("green", "goal") in env.grid
-assert ("blue", "key") not in env.grid
+assert ('green', 'goal') in env.grid
+assert ('blue', 'key') not in env.grid
 
 # Test the env.agent_sees() function
 env.reset()
@@ -209,8 +215,8 @@ for i in range(0, 500):
     action = random.randint(0, env.action_space.n - 1)
     obs, reward, done, info = env.step(action)
 
-    grid, _ = Grid.decode(obs["image"])
-    goal_visible = ("green", "goal") in grid
+    grid, _ = Grid.decode(obs['image'])
+    goal_visible = ('green', 'goal') in grid
 
     agent_sees_goal = env.agent_sees(*goal_pos)
     assert agent_sees_goal == goal_visible

+ 23 - 9
setup.py

@@ -11,25 +11,39 @@ with open("README.md") as fh:
         else:
             break
 
+# pytest is pinned to 7.0.1 as this is last version for python 3.6 
+extras = {
+    "testing": ["pytest==7.0.1"]
+}
+
 setup(
     name="gym_minigrid",
     author="Farama Foundation",
     author_email="jkterry@farama.org",
-    version="1.0.2",
-    keywords="memory, environment, agent, rl, gym",
-    url="https://github.com/Farama-Foundation/gym-minigrid",
-    description="Minimalistic gridworld reinforcement learning environments",
-    packages=["gym_minigrid", "gym_minigrid.envs"],
-    long_description=long_description,
-    python_requires=">=3.7, <3.11",
-    long_description_content_type="text/markdown",
-    install_requires=["gym>=0.24.0", "numpy>=1.18.0"],
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.6",
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
     ],
+    version="1.1.0",
+    keywords="memory, environment, agent, rl, gym",
+    url="https://github.com/Farama-Foundation/gym-minigrid",
+    description="Minimalistic gridworld reinforcement learning environments",
+    extras_require=extras,
+    packages=["gym_minigrid", "gym_minigrid.envs"],
+    license="Apache",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    install_requires=[
+        "gym>=0.25.0",
+        "numpy>=1.18.0",
+        "matplotlib>=3.0",
+    ],
+    python_requires=">=3.6",
+    tests_require=extras["testing"],
+    
 )

+ 1 - 0
test_requirements.txt

@@ -0,0 +1 @@
+pytest==7.0.1