2 anos atrás · 20384cfa59
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -6,14 +6,12 @@ jobs:
 
				     runs-on: ubuntu-latest
			
 
				     strategy:
			
 
				       matrix:
			
 
				-        python-version: ['3.6', '3.7', '3.8', '3.9', '3.10']
			
 
				+        python-version: ['3.7', '3.8', '3.9', '3.10']
			
 
				     steps:
			
 
				       - uses: actions/checkout@v2
			
 
				       - run: |
			
 
				            docker build -f py.Dockerfile \
			
 
				              --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
			
 
				-             --tag gym-minigrid-docker .
			
 
				-      
			
 
				-      # TODO: Add and fix tests for pytest
			
 
				-      # - name: Run tests
			
 
				-      #   run: docker run gym-docker pytest
			
 
				+             --tag gym-minigrid-docker .      
			
 
				+      - name: Run tests
			
 
				+        run: docker run gym-minigrid-docker pytest
			
--- a/gym_minigrid/benchmark.py
+++ b/gym_minigrid/benchmark.py
--- a/gym_minigrid/manual_control.py
+++ b/gym_minigrid/manual_control.py
--- a/gym_minigrid/minigrid.py
+++ b/gym_minigrid/minigrid.py
@@ -3,10 +3,12 @@ import math
 
				 import string
			
 
				 from abc import abstractmethod
			
 
				 from enum import IntEnum
			
 
				+from functools import partial
			
 
				 
			
 
				 import gym
			
 
				 import numpy as np
			
 
				 from gym import spaces
			
 
				+from gym.utils.renderer import Renderer
			
 
				 
			
 
				 # Size in pixels of a tile in the full-scale human view
			
 
				 from gym_minigrid.rendering import (
			
@@ -638,7 +640,7 @@ class MiniGridEnv(gym.Env):
 
				         # Deprecated: use 'render_modes' instead
			
 
				         "render.modes": ["human", "rgb_array"],
			
 
				         "video.frames_per_second": 10,  # Deprecated: use 'render_fps' instead
			
 
				-        "render_modes": ["human", "rgb_array"],
			
 
				+        "render_modes": ["human", "rgb_array", "single_rgb_array"],
			
 
				         "render_fps": 10,
			
 
				     }
			
 
				 
			
@@ -668,6 +670,8 @@ class MiniGridEnv(gym.Env):
 
				         see_through_walls: bool = False,
			
 
				         agent_view_size: int = 7,
			
 
				         render_mode: str = None,
			
 
				+        highlight: bool = True,
			
 
				+        tile_size: int = TILE_PIXELS,
			
 
				         **kwargs
			
 
				     ):
			
 
				         # Can't set both grid_size and width/height
			
@@ -708,6 +712,12 @@ class MiniGridEnv(gym.Env):
 
				 
			
 
				         # render mode
			
 
				         self.render_mode = render_mode
			
 
				+        render_frame = partial(
			
 
				+            self._render,
			
 
				+            highlight=highlight,
			
 
				+            tile_size=tile_size,
			
 
				+        )
			
 
				+        self.renderer = Renderer(self.render_mode, render_frame)
			
 
				 
			
 
				         # Range of possible rewards
			
 
				         self.reward_range = (0, 1)
			
@@ -753,7 +763,12 @@ class MiniGridEnv(gym.Env):
 
				         # Return first observation
			
 
				         obs = self.gen_obs()
			
 
				 
			
 
				-        return obs
			
 
				+        self.renderer.reset()
			
 
				+        self.renderer.render_step()
			
 
				+        if not return_info:
			
 
				+            return obs
			
 
				+        else:
			
 
				+            return obs, {}
			
 
				 
			
 
				     def hash(self, size=16):
			
 
				         """Compute a hash that uniquely identifies the current state of the environment.
			
@@ -1164,6 +1179,7 @@ class MiniGridEnv(gym.Env):
 
				 
			
 
				         obs = self.gen_obs()
			
 
				 
			
 
				+        self.renderer.render_step()
			
 
				         return obs, reward, done, {}
			
 
				 
			
 
				     def gen_obs_grid(self, agent_view_size=None):
			
@@ -1242,17 +1258,11 @@ class MiniGridEnv(gym.Env):
 
				 
			
 
				         return img
			
 
				 
			
 
				-    def render(self, mode="human", close=False, highlight=True, tile_size=TILE_PIXELS):
			
 
				+    def _render(self, mode="human", highlight=True, tile_size=TILE_PIXELS):
			
 
				+        assert mode in self.metadata["render_modes"]
			
 
				         """
			
 
				         Render the whole-grid human view
			
 
				         """
			
 
				-        if self.render_mode is not None:
			
 
				-            mode = self.render_mode
			
 
				-        if close:
			
 
				-            if self.window:
			
 
				-                self.window.close()
			
 
				-            return
			
 
				-
			
 
				         if mode == "human" and not self.window:
			
 
				             self.window = Window("gym_minigrid")
			
 
				             self.window.show(block=False)
			
@@ -1302,10 +1312,19 @@ class MiniGridEnv(gym.Env):
 
				         if mode == "human":
			
 
				             self.window.set_caption(self.mission)
			
 
				             self.window.show_img(img)
			
 
				+        else:
			
 
				+            return img
			
 
				 
			
 
				-        return img
			
 
				+    def render(self, mode="human", close=False, highlight=True, tile_size=TILE_PIXELS):
			
 
				+        if close:
			
 
				+            raise Exception(
			
 
				+                "Please close the rendering window using env.close(). Closing the rendering window with the render method is no longer allowed."
			
 
				+            )
			
 
				+        if self.render_mode is not None:
			
 
				+            return self.renderer.get_renders()
			
 
				+        else:
			
 
				+            return self._render(mode, highlight=highlight, tile_size=tile_size)
			
 
				 
			
 
				     def close(self):
			
 
				         if self.window:
			
 
				             self.window.close()
			
 
				-        return
			
--- a/gym_minigrid/wrappers.py
+++ b/gym_minigrid/wrappers.py
@@ -160,7 +160,6 @@ class RGBImgObsWrapper(ObservationWrapper):
 
				     """
			
 
				     Wrapper to use fully observable RGB image as observation,
			
 
				     This can be used to have the agent to solve the gridworld in pixel space.
			
 
				-    To use it, make the unwrapped environment with render_mode='rgb_array'.
			
 
				     """
			
 
				 
			
 
				     def __init__(self, env, tile_size=8):
			
@@ -181,9 +180,10 @@ class RGBImgObsWrapper(ObservationWrapper):
 
				 
			
 
				     def observation(self, obs):
			
 
				         env = self.unwrapped
			
 
				-        assert env.render_mode == "rgb_array", env.render_mode
			
 
				 
			
 
				-        rgb_img = env.render(highlight=False, tile_size=self.tile_size)
			
 
				+        rgb_img = env._render(
			
 
				+            mode="rgb_array", highlight=True, tile_size=self.tile_size
			
 
				+        )
			
 
				 
			
 
				         return {**obs, "image": rgb_img}
			
 
				 
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,8 +7,6 @@ include = [
 
				 exclude = [
			
 
				     "**/node_modules",
			
 
				     "**/__pycache__",
			
 
				-
			
 
				-   #"gym_minigrid/**",
			
 
				 ]
			
 
				 
			
 
				 strict = [
			
@@ -33,3 +31,6 @@ reportUntypedFunctionDecorator = "none"
 
				 reportMissingTypeStubs = false
			
 
				 reportUnboundVariable = "warning"
			
 
				 reportGeneralTypeIssues ="none"
			
 
				+
			
 
				+[tool.pytest.ini_options]
			
 
				+filterwarnings = ['ignore:.*step API.*:DeprecationWarning'] # TODO: to be removed when old step API is removed
			
--- a/run_tests.py
+++ b/run_tests.py
@@ -1,227 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-
			
 
				-import random
			
 
				-
			
 
				-import gym
			
 
				-import numpy as np
			
 
				-from gym import spaces
			
 
				-from gym.envs.registration import registry
			
 
				-
			
 
				-from gym_minigrid.envs.empty import EmptyEnv
			
 
				-from gym_minigrid.minigrid import Grid
			
 
				-from gym_minigrid.wrappers import (
			
 
				-    DictObservationSpaceWrapper,
			
 
				-    FlatObsWrapper,
			
 
				-    FullyObsWrapper,
			
 
				-    ImgObsWrapper,
			
 
				-    OneHotPartialObsWrapper,
			
 
				-    ReseedWrapper,
			
 
				-    RGBImgObsWrapper,
			
 
				-    RGBImgPartialObsWrapper,
			
 
				-    ViewSizeWrapper,
			
 
				-)
			
 
				-
			
 
				-env_list = [key for key in registry.keys() if key.startswith("MiniGrid")]
			
 
				-
			
 
				-
			
 
				-print("%d environments registered" % len(env_list))
			
 
				-
			
 
				-for env_idx, env_name in enumerate(env_list):
			
 
				-    print(f"testing {env_name} ({env_idx + 1}/{len(env_list)})")
			
 
				-
			
 
				-    # Load the gym environment
			
 
				-    env = gym.make(env_name, render_mode="rgb_array")
			
 
				-    env.max_steps = min(env.max_steps, 200)
			
 
				-    env.reset()
			
 
				-    env.render()
			
 
				-
			
 
				-    # Verify that the same seed always produces the same environment
			
 
				-    for i in range(0, 5):
			
 
				-        seed = 1337 + i
			
 
				-        _ = env.reset(seed=seed)
			
 
				-        grid1 = env.grid
			
 
				-        _ = env.reset(seed=seed)
			
 
				-        grid2 = env.grid
			
 
				-        assert grid1 == grid2
			
 
				-
			
 
				-    env.reset()
			
 
				-
			
 
				-    # Run for a few episodes
			
 
				-    num_episodes = 0
			
 
				-    while num_episodes < 5:
			
 
				-        # Pick a random action
			
 
				-        action = random.randint(0, env.action_space.n - 1)
			
 
				-
			
 
				-        obs, reward, done, info = env.step(action)
			
 
				-
			
 
				-        # Validate the agent position
			
 
				-        assert env.agent_pos[0] < env.width
			
 
				-        assert env.agent_pos[1] < env.height
			
 
				-
			
 
				-        # Test observation encode/decode roundtrip
			
 
				-        img = obs["image"]
			
 
				-        grid, vis_mask = Grid.decode(img)
			
 
				-        img2 = grid.encode(vis_mask=vis_mask)
			
 
				-        assert np.array_equal(img, img2)
			
 
				-
			
 
				-        # Test the env to string function
			
 
				-        str(env)
			
 
				-
			
 
				-        # Check that the reward is within the specified range
			
 
				-        assert reward >= env.reward_range[0], reward
			
 
				-        assert reward <= env.reward_range[1], reward
			
 
				-
			
 
				-        if done:
			
 
				-            num_episodes += 1
			
 
				-            env.reset()
			
 
				-
			
 
				-        env.render()
			
 
				-
			
 
				-    # Test the close method
			
 
				-    env.close()
			
 
				-
			
 
				-    env = gym.make(env_name)
			
 
				-    env = ReseedWrapper(env)
			
 
				-    for _ in range(10):
			
 
				-        env.reset()
			
 
				-        env.step(0)
			
 
				-        env.close()
			
 
				-
			
 
				-    env = gym.make(env_name)
			
 
				-    env = ImgObsWrapper(env)
			
 
				-    env.reset()
			
 
				-    env.step(0)
			
 
				-    env.close()
			
 
				-
			
 
				-    # Test the fully observable wrapper
			
 
				-    env = gym.make(env_name)
			
 
				-    env = FullyObsWrapper(env)
			
 
				-    env.reset()
			
 
				-    obs, _, _, _ = env.step(0)
			
 
				-    assert obs["image"].shape == env.observation_space.spaces["image"].shape
			
 
				-    env.close()
			
 
				-
			
 
				-    # RGB image observation wrapper
			
 
				-    env = gym.make(env_name)
			
 
				-    env = RGBImgPartialObsWrapper(env)
			
 
				-    env.reset()
			
 
				-    obs, _, _, _ = env.step(0)
			
 
				-    assert obs["image"].mean() > 0
			
 
				-    env.close()
			
 
				-
			
 
				-    env = gym.make(env_name)
			
 
				-    env = FlatObsWrapper(env)
			
 
				-    env.reset()
			
 
				-    env.step(0)
			
 
				-    env.close()
			
 
				-
			
 
				-    env = gym.make(env_name)
			
 
				-    env = ViewSizeWrapper(env, 5)
			
 
				-    env.reset()
			
 
				-    env.step(0)
			
 
				-    env.close()
			
 
				-
			
 
				-    # Test the DictObservationSpaceWrapper
			
 
				-    env = gym.make(env_name)
			
 
				-    env = DictObservationSpaceWrapper(env)
			
 
				-    env.reset()
			
 
				-    mission = env.mission
			
 
				-    obs, _, _, _ = env.step(0)
			
 
				-    assert env.string_to_indices(mission) == [
			
 
				-        value for value in obs["mission"] if value != 0
			
 
				-    ]
			
 
				-    env.close()
			
 
				-
			
 
				-    # Test the wrappers return proper observation spaces.
			
 
				-    wrappers = [RGBImgObsWrapper, RGBImgPartialObsWrapper, OneHotPartialObsWrapper]
			
 
				-    for wrapper in wrappers:
			
 
				-        env = wrapper(gym.make(env_name, render_mode="rgb_array"))
			
 
				-        obs_space, wrapper_name = env.observation_space, wrapper.__name__
			
 
				-        assert isinstance(
			
 
				-            obs_space, spaces.Dict
			
 
				-        ), f"Observation space for {wrapper_name} is not a Dict: {obs_space}."
			
 
				-        # This should not fail either
			
 
				-        ImgObsWrapper(env)
			
 
				-        env.reset()
			
 
				-        env.step(0)
			
 
				-        env.close()
			
 
				-
			
 
				-##############################################################################
			
 
				-
			
 
				-print("testing extra observations")
			
 
				-
			
 
				-
			
 
				-class EmptyEnvWithExtraObs(EmptyEnv):
			
 
				-    """
			
 
				-    Custom environment with an extra observation
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self, **kwargs) -> None:
			
 
				-        super().__init__(size=5, **kwargs)
			
 
				-        self.observation_space["size"] = spaces.Box(
			
 
				-            low=0,
			
 
				-            high=1000,  # gym does not like np.iinfo(np.uint).max,
			
 
				-            shape=(2,),
			
 
				-            dtype=np.uint,
			
 
				-        )
			
 
				-
			
 
				-    def reset(self, **kwargs):
			
 
				-        obs = super().reset(**kwargs)
			
 
				-        obs["size"] = np.array([self.width, self.height], dtype=np.uint)
			
 
				-        return obs
			
 
				-
			
 
				-    def step(self, action):
			
 
				-        obs, reward, done, info = super().step(action)
			
 
				-        obs["size"] = np.array([self.width, self.height], dtype=np.uint)
			
 
				-        return obs, reward, done, info
			
 
				-
			
 
				-
			
 
				-wrappers = [
			
 
				-    OneHotPartialObsWrapper,
			
 
				-    RGBImgObsWrapper,
			
 
				-    RGBImgPartialObsWrapper,
			
 
				-    FullyObsWrapper,
			
 
				-]
			
 
				-for wrapper in wrappers:
			
 
				-    env1 = wrapper(EmptyEnvWithExtraObs(render_mode="rgb_array"))
			
 
				-    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0", render_mode="rgb_array"))
			
 
				-
			
 
				-    obs1 = env1.reset(seed=0)
			
 
				-    obs2 = env2.reset(seed=0)
			
 
				-    assert "size" in obs1
			
 
				-    assert obs1["size"].shape == (2,)
			
 
				-    assert (obs1["size"] == [5, 5]).all()
			
 
				-    for key in obs2:
			
 
				-        assert np.array_equal(obs1[key], obs2[key])
			
 
				-
			
 
				-    obs1, reward1, done1, _ = env1.step(0)
			
 
				-    obs2, reward2, done2, _ = env2.step(0)
			
 
				-    assert "size" in obs1
			
 
				-    assert obs1["size"].shape == (2,)
			
 
				-    assert (obs1["size"] == [5, 5]).all()
			
 
				-    for key in obs2:
			
 
				-        assert np.array_equal(obs1[key], obs2[key])
			
 
				-
			
 
				-##############################################################################
			
 
				-
			
 
				-print("testing agent_sees method")
			
 
				-env = gym.make("MiniGrid-DoorKey-6x6-v0")
			
 
				-goal_pos = (env.grid.width - 2, env.grid.height - 2)
			
 
				-
			
 
				-# Test the "in" operator on grid objects
			
 
				-assert ("green", "goal") in env.grid
			
 
				-assert ("blue", "key") not in env.grid
			
 
				-
			
 
				-# Test the env.agent_sees() function
			
 
				-env.reset()
			
 
				-for i in range(0, 500):
			
 
				-    action = random.randint(0, env.action_space.n - 1)
			
 
				-    obs, reward, done, info = env.step(action)
			
 
				-
			
 
				-    grid, _ = Grid.decode(obs["image"])
			
 
				-    goal_visible = ("green", "goal") in grid
			
 
				-
			
 
				-    agent_sees_goal = env.agent_sees(*goal_pos)
			
 
				-    assert agent_sees_goal == goal_visible
			
 
				-    if done:
			
 
				-        env.reset()
			
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,6 @@ setup(
 
				     classifiers=[
			
 
				         "Development Status :: 5 - Production/Stable",
			
 
				         "Programming Language :: Python :: 3",
			
 
				-        "Programming Language :: Python :: 3.6",
			
 
				         "Programming Language :: Python :: 3.7",
			
 
				         "Programming Language :: Python :: 3.8",
			
 
				         "Programming Language :: Python :: 3.9",
			
@@ -44,6 +43,6 @@ setup(
 
				         "numpy>=1.18.0",
			
 
				         "matplotlib>=3.0",
			
 
				     ],
			
 
				-    python_requires=">=3.6",
			
 
				+    python_requires=">=3.7",
			
 
				     tests_require=extras["testing"],
			
 
				 )
			
--- a/test_interactive_mode.py
+++ b/test_interactive_mode.py
@@ -1,25 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-
			
 
				-import random
			
 
				-import time
			
 
				-
			
 
				-import gym
			
 
				-
			
 
				-# Load the gym environment
			
 
				-env = gym.make("MiniGrid-Empty-8x8-v0")
			
 
				-env.reset()
			
 
				-
			
 
				-for i in range(0, 100):
			
 
				-    print(f"step {i}")
			
 
				-
			
 
				-    # Pick a random action
			
 
				-    action = random.randint(0, env.action_space.n - 1)
			
 
				-
			
 
				-    obs, reward, done, info = env.step(action)
			
 
				-
			
 
				-    env.render()
			
 
				-
			
 
				-    time.sleep(0.05)
			
 
				-
			
 
				-# Test the close method
			
 
				-env.close()
			
--- a/tests/__init__.py
+++ b/tests/__init__.py
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -0,0 +1,207 @@
 
				+import gym
			
 
				+import numpy as np
			
 
				+import pytest
			
 
				+from gym.envs.registration import EnvSpec
			
 
				+from gym.utils.env_checker import check_env
			
 
				+
			
 
				+from gym_minigrid.minigrid import Grid
			
 
				+from tests.utils import all_testing_env_specs, assert_equals
			
 
				+
			
 
				+CHECK_ENV_IGNORE_WARNINGS = [
			
 
				+    f"\x1b[33mWARN: {message}\x1b[0m"
			
 
				+    for message in [
			
 
				+        "A Box observation space minimum value is -infinity. This is probably too low.",
			
 
				+        "A Box observation space maximum value is -infinity. This is probably too high.",
			
 
				+        "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.",
			
 
				+        "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.",
			
 
				+        "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.",
			
 
				+        "Core environment is written in old step API which returns one bool instead of two. It is recommended to rewrite the environment with new step API. ",
			
 
				+    ]
			
 
				+]
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
			
 
				+)
			
 
				+def test_env(spec):
			
 
				+    # Capture warnings
			
 
				+    env = spec.make(disable_env_checker=True).unwrapped
			
 
				+
			
 
				+    # Test if env adheres to Gym API
			
 
				+    with pytest.warns() as warnings:
			
 
				+        check_env(env)
			
 
				+
			
 
				+    for warning in warnings.list:
			
 
				+        if warning.message.args[0] not in CHECK_ENV_IGNORE_WARNINGS:
			
 
				+            raise gym.error.Error(f"Unexpected warning: {warning.message}")
			
 
				+
			
 
				+
			
 
				+# Note that this precludes running this test in multiple threads.
			
 
				+# However, we probably already can't do multithreading due to some environments.
			
 
				+SEED = 0
			
 
				+NUM_STEPS = 50
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs]
			
 
				+)
			
 
				+def test_env_determinism_rollout(env_spec: EnvSpec):
			
 
				+    """Run a rollout with two environments and assert equality.
			
 
				+
			
 
				+    This test run a rollout of NUM_STEPS steps with two environments
			
 
				+    initialized with the same seed and assert that:
			
 
				+
			
 
				+    - observation after first reset are the same
			
 
				+    - same actions are sampled by the two envs
			
 
				+    - observations are contained in the observation space
			
 
				+    - obs, rew, done and info are equals between the two envs
			
 
				+    """
			
 
				+    # Don't check rollout equality if it's a nondeterministic environment.
			
 
				+    if env_spec.nondeterministic is True:
			
 
				+        return
			
 
				+
			
 
				+    env_1 = env_spec.make(disable_env_checker=True)
			
 
				+    env_2 = env_spec.make(disable_env_checker=True)
			
 
				+
			
 
				+    initial_obs_1 = env_1.reset(seed=SEED)
			
 
				+    initial_obs_2 = env_2.reset(seed=SEED)
			
 
				+    assert_equals(initial_obs_1, initial_obs_2)
			
 
				+
			
 
				+    env_1.action_space.seed(SEED)
			
 
				+
			
 
				+    for time_step in range(NUM_STEPS):
			
 
				+        # We don't evaluate the determinism of actions
			
 
				+        action = env_1.action_space.sample()
			
 
				+
			
 
				+        obs_1, rew_1, done_1, info_1 = env_1.step(action)
			
 
				+        obs_2, rew_2, done_2, info_2 = env_2.step(action)
			
 
				+
			
 
				+        assert_equals(obs_1, obs_2, f"[{time_step}] ")
			
 
				+        assert env_1.observation_space.contains(
			
 
				+            obs_1
			
 
				+        )  # obs_2 verified by previous assertion
			
 
				+
			
 
				+        assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
			
 
				+        assert done_1 == done_2, f"[{time_step}] done 1={done_1}, done 2={done_2}"
			
 
				+        assert_equals(info_1, info_2, f"[{time_step}] ")
			
 
				+
			
 
				+        if done_1:  # done_2 verified by previous assertion
			
 
				+            env_1.reset(seed=SEED)
			
 
				+            env_2.reset(seed=SEED)
			
 
				+
			
 
				+    env_1.close()
			
 
				+    env_2.close()
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
			
 
				+)
			
 
				+def test_render_modes(spec):
			
 
				+    env = spec.make()
			
 
				+
			
 
				+    for mode in env.metadata.get("render_modes", []):
			
 
				+        if mode != "human":
			
 
				+            new_env = spec.make(render_mode=mode)
			
 
				+
			
 
				+            new_env.reset()
			
 
				+            new_env.step(new_env.action_space.sample())
			
 
				+            new_env.render()
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize("env_id", ["MiniGrid-DoorKey-6x6-v0"])
			
 
				+def test_agent_sees_method(env_id):
			
 
				+    env = gym.make(env_id)
			
 
				+    goal_pos = (env.grid.width - 2, env.grid.height - 2)
			
 
				+
			
 
				+    # Test the "in" operator on grid objects
			
 
				+    assert ("green", "goal") in env.grid
			
 
				+    assert ("blue", "key") not in env.grid
			
 
				+
			
 
				+    # Test the env.agent_sees() function
			
 
				+    env.reset()
			
 
				+    for i in range(0, 500):
			
 
				+        action = env.action_space.sample()
			
 
				+        obs, reward, done, info = env.step(action)
			
 
				+
			
 
				+        grid, _ = Grid.decode(obs["image"])
			
 
				+        goal_visible = ("green", "goal") in grid
			
 
				+
			
 
				+        agent_sees_goal = env.agent_sees(*goal_pos)
			
 
				+        assert agent_sees_goal == goal_visible
			
 
				+        if done:
			
 
				+            env.reset()
			
 
				+
			
 
				+    env.close()
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
			
 
				+)
			
 
				+def old_run_test(env_spec):
			
 
				+    # Load the gym environment
			
 
				+    env = env_spec.make()
			
 
				+    env.max_steps = min(env.max_steps, 200)
			
 
				+    env.reset()
			
 
				+    env.render()
			
 
				+
			
 
				+    # Verify that the same seed always produces the same environment
			
 
				+    for i in range(0, 5):
			
 
				+        seed = 1337 + i
			
 
				+        _ = env.reset(seed=seed)
			
 
				+        grid1 = env.grid
			
 
				+        _ = env.reset(seed=seed)
			
 
				+        grid2 = env.grid
			
 
				+        assert grid1 == grid2
			
 
				+
			
 
				+    env.reset()
			
 
				+
			
 
				+    # Run for a few episodes
			
 
				+    num_episodes = 0
			
 
				+    while num_episodes < 5:
			
 
				+        # Pick a random action
			
 
				+        action = env.action_space.sample()
			
 
				+
			
 
				+        obs, reward, done, info = env.step(action)
			
 
				+
			
 
				+        # Validate the agent position
			
 
				+        assert env.agent_pos[0] < env.width
			
 
				+        assert env.agent_pos[1] < env.height
			
 
				+
			
 
				+        # Test observation encode/decode roundtrip
			
 
				+        img = obs["image"]
			
 
				+        grid, vis_mask = Grid.decode(img)
			
 
				+        img2 = grid.encode(vis_mask=vis_mask)
			
 
				+        assert np.array_equal(img, img2)
			
 
				+
			
 
				+        # Test the env to string function
			
 
				+        str(env)
			
 
				+
			
 
				+        # Check that the reward is within the specified range
			
 
				+        assert reward >= env.reward_range[0], reward
			
 
				+        assert reward <= env.reward_range[1], reward
			
 
				+
			
 
				+        if done:
			
 
				+            num_episodes += 1
			
 
				+            env.reset()
			
 
				+
			
 
				+        env.render()
			
 
				+
			
 
				+    # Test the close method
			
 
				+    env.close()
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-8x8-v0"])
			
 
				+def test_interactive_mode(env_id):
			
 
				+    env = gym.make(env_id, render_mode="human")
			
 
				+    env.reset()
			
 
				+
			
 
				+    for i in range(0, 100):
			
 
				+        print(f"step {i}")
			
 
				+
			
 
				+        # Pick a random action
			
 
				+        action = env.action_space.sample()
			
 
				+
			
 
				+        obs, reward, done, info = env.step(action)
			
 
				+
			
 
				+    # Test the close method
			
 
				+    env.close()
			
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -0,0 +1,237 @@
 
				+import math
			
 
				+
			
 
				+import gym
			
 
				+import numpy as np
			
 
				+import pytest
			
 
				+
			
 
				+from gym_minigrid.envs import EmptyEnv
			
 
				+from gym_minigrid.minigrid import MiniGridEnv
			
 
				+from gym_minigrid.wrappers import (
			
 
				+    ActionBonus,
			
 
				+    DictObservationSpaceWrapper,
			
 
				+    FlatObsWrapper,
			
 
				+    FullyObsWrapper,
			
 
				+    ImgObsWrapper,
			
 
				+    OneHotPartialObsWrapper,
			
 
				+    ReseedWrapper,
			
 
				+    RGBImgObsWrapper,
			
 
				+    RGBImgPartialObsWrapper,
			
 
				+    StateBonus,
			
 
				+    ViewSizeWrapper,
			
 
				+)
			
 
				+from tests.utils import all_testing_env_specs, assert_equals
			
 
				+
			
 
				+SEEDS = [100, 243, 500]
			
 
				+NUM_STEPS = 100
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
			
 
				+)
			
 
				+def test_reseed_wrapper(env_spec):
			
 
				+    """
			
 
				+    Test the ReseedWrapper with a list of SEEDS.
			
 
				+    """
			
 
				+    unwrapped_env = env_spec.make()
			
 
				+    env = env_spec.make()
			
 
				+    env = ReseedWrapper(env, seeds=SEEDS)
			
 
				+    env.action_space.seed(0)
			
 
				+
			
 
				+    for seed in SEEDS:
			
 
				+        env.reset()
			
 
				+        unwrapped_env.reset(seed=seed)
			
 
				+        for time_step in range(NUM_STEPS):
			
 
				+            action = env.action_space.sample()
			
 
				+
			
 
				+            obs, rew, done, info = env.step(action)
			
 
				+            (
			
 
				+                unwrapped_obs,
			
 
				+                unwrapped_rew,
			
 
				+                unwrapped_done,
			
 
				+                unwrapped_info,
			
 
				+            ) = unwrapped_env.step(action)
			
 
				+
			
 
				+            assert_equals(obs, unwrapped_obs, f"[{time_step}] ")
			
 
				+            assert unwrapped_env.observation_space.contains(obs)
			
 
				+
			
 
				+            assert (
			
 
				+                rew == unwrapped_rew
			
 
				+            ), f"[{time_step}] reward={rew}, unwrapped reward={unwrapped_rew}"
			
 
				+            assert (
			
 
				+                done == unwrapped_done
			
 
				+            ), f"[{time_step}] done={done}, unwrapped done={unwrapped_done}"
			
 
				+            assert_equals(info, unwrapped_info, f"[{time_step}] ")
			
 
				+
			
 
				+            # Start the next seed
			
 
				+            if done:
			
 
				+                break
			
 
				+
			
 
				+    env.close()
			
 
				+    unwrapped_env.close()
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
			
 
				+def test_state_bonus_wrapper(env_id):
			
 
				+    env = gym.make(env_id)
			
 
				+    wrapped_env = StateBonus(gym.make(env_id))
			
 
				+
			
 
				+    action_forward = MiniGridEnv.Actions.forward
			
 
				+    action_left = MiniGridEnv.Actions.left
			
 
				+    action_right = MiniGridEnv.Actions.right
			
 
				+
			
 
				+    for _ in range(10):
			
 
				+        wrapped_env.reset()
			
 
				+        for _ in range(5):
			
 
				+            wrapped_env.step(action_forward)
			
 
				+
			
 
				+    # Turn lef 3 times (check that actions don't influence bonus)
			
 
				+    for _ in range(3):
			
 
				+        _, wrapped_rew, _, _ = wrapped_env.step(action_left)
			
 
				+
			
 
				+    env.reset()
			
 
				+    for _ in range(5):
			
 
				+        env.step(action_forward)
			
 
				+    # Turn right 3 times
			
 
				+    for _ in range(3):
			
 
				+        _, rew, _, _ = env.step(action_right)
			
 
				+
			
 
				+    expected_bonus_reward = rew + 1 / math.sqrt(13)
			
 
				+
			
 
				+    assert expected_bonus_reward == wrapped_rew
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
			
 
				+def test_action_bonus_wrapper(env_id):
			
 
				+    env = gym.make(env_id)
			
 
				+    wrapped_env = ActionBonus(gym.make(env_id))
			
 
				+
			
 
				+    action = MiniGridEnv.Actions.forward
			
 
				+
			
 
				+    for _ in range(10):
			
 
				+        wrapped_env.reset()
			
 
				+        for _ in range(5):
			
 
				+            _, wrapped_rew, _, _ = wrapped_env.step(action)
			
 
				+
			
 
				+    env.reset()
			
 
				+    for _ in range(5):
			
 
				+        _, rew, _, _ = env.step(action)
			
 
				+
			
 
				+    expected_bonus_reward = rew + 1 / math.sqrt(10)
			
 
				+
			
 
				+    assert expected_bonus_reward == wrapped_rew
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
			
 
				+)
			
 
				+def test_dict_observation_space_wrapper(env_spec):
			
 
				+    env = env_spec.make()
			
 
				+    env = DictObservationSpaceWrapper(env)
			
 
				+    env.reset()
			
 
				+    mission = env.mission
			
 
				+    obs, _, _, _ = env.step(0)
			
 
				+    assert env.string_to_indices(mission) == [
			
 
				+        value for value in obs["mission"] if value != 0
			
 
				+    ]
			
 
				+    env.close()
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "wrapper",
			
 
				+    [
			
 
				+        ReseedWrapper,
			
 
				+        ImgObsWrapper,
			
 
				+        FlatObsWrapper,
			
 
				+        ViewSizeWrapper,
			
 
				+        DictObservationSpaceWrapper,
			
 
				+        OneHotPartialObsWrapper,
			
 
				+        RGBImgPartialObsWrapper,
			
 
				+        FullyObsWrapper,
			
 
				+    ],
			
 
				+)
			
 
				+@pytest.mark.parametrize(
			
 
				+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
			
 
				+)
			
 
				+def test_main_wrappers(wrapper, env_spec):
			
 
				+    env = env_spec.make()
			
 
				+    env = wrapper(env)
			
 
				+    for _ in range(10):
			
 
				+        env.reset()
			
 
				+        env.step(0)
			
 
				+    env.close()
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "wrapper",
			
 
				+    [
			
 
				+        OneHotPartialObsWrapper,
			
 
				+        RGBImgPartialObsWrapper,
			
 
				+        FullyObsWrapper,
			
 
				+    ],
			
 
				+)
			
 
				+@pytest.mark.parametrize(
			
 
				+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
			
 
				+)
			
 
				+def test_observation_space_wrappers(wrapper, env_spec):
			
 
				+    env = wrapper(env_spec.make(disable_env_checker=True))
			
 
				+    obs_space, wrapper_name = env.observation_space, wrapper.__name__
			
 
				+    assert isinstance(
			
 
				+        obs_space, gym.spaces.Dict
			
 
				+    ), f"Observation space for {wrapper_name} is not a Dict: {obs_space}."
			
 
				+    # This should not fail either
			
 
				+    ImgObsWrapper(env)
			
 
				+    env.reset()
			
 
				+    env.step(0)
			
 
				+    env.close()
			
 
				+
			
 
				+
			
 
				+class EmptyEnvWithExtraObs(EmptyEnv):
			
 
				+    """
			
 
				+    Custom environment with an extra observation
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self) -> None:
			
 
				+        super().__init__(size=5)
			
 
				+        self.observation_space["size"] = gym.spaces.Box(
			
 
				+            low=0, high=np.iinfo(np.uint).max, shape=(2,), dtype=np.uint
			
 
				+        )
			
 
				+
			
 
				+    def reset(self, **kwargs):
			
 
				+        obs = super().reset(**kwargs)
			
 
				+        obs["size"] = np.array([self.width, self.height])
			
 
				+        return obs
			
 
				+
			
 
				+    def step(self, action):
			
 
				+        obs, reward, done, info = super().step(action)
			
 
				+        obs["size"] = np.array([self.width, self.height])
			
 
				+        return obs, reward, done, info
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    "wrapper",
			
 
				+    [
			
 
				+        OneHotPartialObsWrapper,
			
 
				+        RGBImgObsWrapper,
			
 
				+        RGBImgPartialObsWrapper,
			
 
				+        FullyObsWrapper,
			
 
				+    ],
			
 
				+)
			
 
				+def test_agent_sees_method(wrapper):
			
 
				+    env1 = wrapper(EmptyEnvWithExtraObs())
			
 
				+    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0"))
			
 
				+
			
 
				+    obs1 = env1.reset(seed=0)
			
 
				+    obs2 = env2.reset(seed=0)
			
 
				+    assert "size" in obs1
			
 
				+    assert obs1["size"].shape == (2,)
			
 
				+    assert (obs1["size"] == [5, 5]).all()
			
 
				+    for key in obs2:
			
 
				+        assert np.array_equal(obs1[key], obs2[key])
			
 
				+
			
 
				+    obs1, reward1, done1, _ = env1.step(0)
			
 
				+    obs2, reward2, done2, _ = env2.step(0)
			
 
				+    assert "size" in obs1
			
 
				+    assert obs1["size"].shape == (2,)
			
 
				+    assert (obs1["size"] == [5, 5]).all()
			
 
				+    for key in obs2:
			
 
				+        assert np.array_equal(obs1[key], obs2[key])
			
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -0,0 +1,34 @@
 
				+"""Finds all the specs that we can test with"""
			
 
				+import gym
			
 
				+import numpy as np
			
 
				+
			
 
				+all_testing_env_specs = [
			
 
				+    env_spec
			
 
				+    for env_spec in gym.envs.registry.values()
			
 
				+    if env_spec.entry_point.startswith("gym_minigrid.envs")
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def assert_equals(a, b, prefix=None):
			
 
				+    """Assert equality of data structures `a` and `b`.
			
 
				+
			
 
				+    Args:
			
 
				+        a: first data structure
			
 
				+        b: second data structure
			
 
				+        prefix: prefix for failed assertion message for types and dicts
			
 
				+    """
			
 
				+    assert type(a) == type(b), f"{prefix}Differing types: {a} and {b}"
			
 
				+    if isinstance(a, dict):
			
 
				+        assert list(a.keys()) == list(b.keys()), f"{prefix}Key sets differ: {a} and {b}"
			
 
				+
			
 
				+        for k in a.keys():
			
 
				+            v_a = a[k]
			
 
				+            v_b = b[k]
			
 
				+            assert_equals(v_a, v_b)
			
 
				+    elif isinstance(a, np.ndarray):
			
 
				+        np.testing.assert_array_equal(a, b)
			
 
				+    elif isinstance(a, tuple):
			
 
				+        for elem_from_a, elem_from_b in zip(a, b):
			
 
				+            assert_equals(elem_from_a, elem_from_b)
			
 
				+    else:
			
 
				+        assert a == b