2 years ago · 20384cfa59
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -6,14 +6,12 @@ jobs:
 
																     runs-on: ubuntu-latest
															
 
																     strategy:
															
 
																       matrix:
															
 
																-        python-version: ['3.6', '3.7', '3.8', '3.9', '3.10']
															
 
																+        python-version: ['3.7', '3.8', '3.9', '3.10']
															
 
																     steps:
															
 
																       - uses: actions/checkout@v2
															
 
																       - run: |
															
 
																            docker build -f py.Dockerfile \
															
 
																              --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
															
 
																-             --tag gym-minigrid-docker .
															
 
																-      
															
 
																-      # TODO: Add and fix tests for pytest
															
 
																-      # - name: Run tests
															
 
																-      #   run: docker run gym-docker pytest
															
 
																+             --tag gym-minigrid-docker .      
															
 
																+      - name: Run tests
															
 
																+        run: docker run gym-minigrid-docker pytest
															
--- a/gym_minigrid/benchmark.py
+++ b/gym_minigrid/benchmark.py
--- a/gym_minigrid/manual_control.py
+++ b/gym_minigrid/manual_control.py
--- a/gym_minigrid/minigrid.py
+++ b/gym_minigrid/minigrid.py
@@ -3,10 +3,12 @@ import math
 
																 import string
															
 
																 from abc import abstractmethod
															
 
																 from enum import IntEnum
															
 
																+from functools import partial
															
 
																 import gym
															
 
																 import numpy as np
															
 
																 from gym import spaces
															
 
																+from gym.utils.renderer import Renderer
															
 
																 # Size in pixels of a tile in the full-scale human view
															
 
																 from gym_minigrid.rendering import (
															
@@ -638,7 +640,7 @@ class MiniGridEnv(gym.Env):
 
																         # Deprecated: use 'render_modes' instead
															
 
																         "render.modes": ["human", "rgb_array"],
															
 
																         "video.frames_per_second": 10,  # Deprecated: use 'render_fps' instead
															
 
																-        "render_modes": ["human", "rgb_array"],
															
 
																+        "render_modes": ["human", "rgb_array", "single_rgb_array"],
															
 
																         "render_fps": 10,
															
 
																     }
															
@@ -668,6 +670,8 @@ class MiniGridEnv(gym.Env):
 
																         see_through_walls: bool = False,
															
 
																         agent_view_size: int = 7,
															
 
																         render_mode: str = None,
															
 
																+        highlight: bool = True,
															
 
																+        tile_size: int = TILE_PIXELS,
															
 
																         **kwargs
															
 
																     ):
															
 
																         # Can't set both grid_size and width/height
															
@@ -708,6 +712,12 @@ class MiniGridEnv(gym.Env):
 
																         # render mode
															
 
																         self.render_mode = render_mode
															
 
																+        render_frame = partial(
															
 
																+            self._render,
															
 
																+            highlight=highlight,
															
 
																+            tile_size=tile_size,
															
 
																+        )
															
 
																+        self.renderer = Renderer(self.render_mode, render_frame)
															
 
																         # Range of possible rewards
															
 
																         self.reward_range = (0, 1)
															
@@ -753,7 +763,12 @@ class MiniGridEnv(gym.Env):
 
																         # Return first observation
															
 
																         obs = self.gen_obs()
															
 
																-        return obs
															
 
																+        self.renderer.reset()
															
 
																+        self.renderer.render_step()
															
 
																+        if not return_info:
															
 
																+            return obs
															
 
																+        else:
															
 
																+            return obs, {}
															
 
																     def hash(self, size=16):
															
 
																         """Compute a hash that uniquely identifies the current state of the environment.
															
@@ -1164,6 +1179,7 @@ class MiniGridEnv(gym.Env):
 
																         obs = self.gen_obs()
															
 
																+        self.renderer.render_step()
															
 
																         return obs, reward, done, {}
															
 
																     def gen_obs_grid(self, agent_view_size=None):
															
@@ -1242,17 +1258,11 @@ class MiniGridEnv(gym.Env):
 
																         return img
															
 
																-    def render(self, mode="human", close=False, highlight=True, tile_size=TILE_PIXELS):
															
 
																+    def _render(self, mode="human", highlight=True, tile_size=TILE_PIXELS):
															
 
																+        assert mode in self.metadata["render_modes"]
															
 
																         """
															
 
																         Render the whole-grid human view
															
 
																         """
															
 
																-        if self.render_mode is not None:
															
 
																-            mode = self.render_mode
															
 
																-        if close:
															
 
																-            if self.window:
															
 
																-                self.window.close()
															
 
																-            return
															
 
																-
															
 
																         if mode == "human" and not self.window:
															
 
																             self.window = Window("gym_minigrid")
															
 
																             self.window.show(block=False)
															
@@ -1302,10 +1312,19 @@ class MiniGridEnv(gym.Env):
 
																         if mode == "human":
															
 
																             self.window.set_caption(self.mission)
															
 
																             self.window.show_img(img)
															
 
																+        else:
															
 
																+            return img
															
 
																-        return img
															
 
																+    def render(self, mode="human", close=False, highlight=True, tile_size=TILE_PIXELS):
															
 
																+        if close:
															
 
																+            raise Exception(
															
 
																+                "Please close the rendering window using env.close(). Closing the rendering window with the render method is no longer allowed."
															
 
																+            )
															
 
																+        if self.render_mode is not None:
															
 
																+            return self.renderer.get_renders()
															
 
																+        else:
															
 
																+            return self._render(mode, highlight=highlight, tile_size=tile_size)
															
 
																     def close(self):
															
 
																         if self.window:
															
 
																             self.window.close()
															
 
																-        return
															
--- a/gym_minigrid/wrappers.py
+++ b/gym_minigrid/wrappers.py
@@ -160,7 +160,6 @@ class RGBImgObsWrapper(ObservationWrapper):
 
																     """
															
 
																     Wrapper to use fully observable RGB image as observation,
															
 
																     This can be used to have the agent to solve the gridworld in pixel space.
															
 
																-    To use it, make the unwrapped environment with render_mode='rgb_array'.
															
 
																     """
															
 
																     def __init__(self, env, tile_size=8):
															
@@ -181,9 +180,10 @@ class RGBImgObsWrapper(ObservationWrapper):
 
																     def observation(self, obs):
															
 
																         env = self.unwrapped
															
 
																-        assert env.render_mode == "rgb_array", env.render_mode
															
 
																-        rgb_img = env.render(highlight=False, tile_size=self.tile_size)
															
 
																+        rgb_img = env._render(
															
 
																+            mode="rgb_array", highlight=True, tile_size=self.tile_size
															
 
																+        )
															
 
																         return {**obs, "image": rgb_img}
															
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,8 +7,6 @@ include = [
 
																 exclude = [
															
 
																     "**/node_modules",
															
 
																     "**/__pycache__",
															
 
																-
															
 
																-   #"gym_minigrid/**",
															
 
																 ]
															
 
																 strict = [
															
@@ -33,3 +31,6 @@ reportUntypedFunctionDecorator = "none"
 
																 reportMissingTypeStubs = false
															
 
																 reportUnboundVariable = "warning"
															
 
																 reportGeneralTypeIssues ="none"
															
 
																+
															
 
																+[tool.pytest.ini_options]
															
 
																+filterwarnings = ['ignore:.*step API.*:DeprecationWarning'] # TODO: to be removed when old step API is removed
															
--- a/run_tests.py
+++ b/run_tests.py
@@ -1,227 +0,0 @@
 
																-#!/usr/bin/env python3
															
 
																-
															
 
																-import random
															
 
																-
															
 
																-import gym
															
 
																-import numpy as np
															
 
																-from gym import spaces
															
 
																-from gym.envs.registration import registry
															
 
																-
															
 
																-from gym_minigrid.envs.empty import EmptyEnv
															
 
																-from gym_minigrid.minigrid import Grid
															
 
																-from gym_minigrid.wrappers import (
															
 
																-    DictObservationSpaceWrapper,
															
 
																-    FlatObsWrapper,
															
 
																-    FullyObsWrapper,
															
 
																-    ImgObsWrapper,
															
 
																-    OneHotPartialObsWrapper,
															
 
																-    ReseedWrapper,
															
 
																-    RGBImgObsWrapper,
															
 
																-    RGBImgPartialObsWrapper,
															
 
																-    ViewSizeWrapper,
															
 
																-)
															
 
																-
															
 
																-env_list = [key for key in registry.keys() if key.startswith("MiniGrid")]
															
 
																-
															
 
																-
															
 
																-print("%d environments registered" % len(env_list))
															
 
																-
															
 
																-for env_idx, env_name in enumerate(env_list):
															
 
																-    print(f"testing {env_name} ({env_idx + 1}/{len(env_list)})")
															
 
																-
															
 
																-    # Load the gym environment
															
 
																-    env = gym.make(env_name, render_mode="rgb_array")
															
 
																-    env.max_steps = min(env.max_steps, 200)
															
 
																-    env.reset()
															
 
																-    env.render()
															
 
																-
															
 
																-    # Verify that the same seed always produces the same environment
															
 
																-    for i in range(0, 5):
															
 
																-        seed = 1337 + i
															
 
																-        _ = env.reset(seed=seed)
															
 
																-        grid1 = env.grid
															
 
																-        _ = env.reset(seed=seed)
															
 
																-        grid2 = env.grid
															
 
																-        assert grid1 == grid2
															
 
																-
															
 
																-    env.reset()
															
 
																-
															
 
																-    # Run for a few episodes
															
 
																-    num_episodes = 0
															
 
																-    while num_episodes < 5:
															
 
																-        # Pick a random action
															
 
																-        action = random.randint(0, env.action_space.n - 1)
															
 
																-
															
 
																-        obs, reward, done, info = env.step(action)
															
 
																-
															
 
																-        # Validate the agent position
															
 
																-        assert env.agent_pos[0] < env.width
															
 
																-        assert env.agent_pos[1] < env.height
															
 
																-
															
 
																-        # Test observation encode/decode roundtrip
															
 
																-        img = obs["image"]
															
 
																-        grid, vis_mask = Grid.decode(img)
															
 
																-        img2 = grid.encode(vis_mask=vis_mask)
															
 
																-        assert np.array_equal(img, img2)
															
 
																-
															
 
																-        # Test the env to string function
															
 
																-        str(env)
															
 
																-
															
 
																-        # Check that the reward is within the specified range
															
 
																-        assert reward >= env.reward_range[0], reward
															
 
																-        assert reward <= env.reward_range[1], reward
															
 
																-
															
 
																-        if done:
															
 
																-            num_episodes += 1
															
 
																-            env.reset()
															
 
																-
															
 
																-        env.render()
															
 
																-
															
 
																-    # Test the close method
															
 
																-    env.close()
															
 
																-
															
 
																-    env = gym.make(env_name)
															
 
																-    env = ReseedWrapper(env)
															
 
																-    for _ in range(10):
															
 
																-        env.reset()
															
 
																-        env.step(0)
															
 
																-        env.close()
															
 
																-
															
 
																-    env = gym.make(env_name)
															
 
																-    env = ImgObsWrapper(env)
															
 
																-    env.reset()
															
 
																-    env.step(0)
															
 
																-    env.close()
															
 
																-
															
 
																-    # Test the fully observable wrapper
															
 
																-    env = gym.make(env_name)
															
 
																-    env = FullyObsWrapper(env)
															
 
																-    env.reset()
															
 
																-    obs, _, _, _ = env.step(0)
															
 
																-    assert obs["image"].shape == env.observation_space.spaces["image"].shape
															
 
																-    env.close()
															
 
																-
															
 
																-    # RGB image observation wrapper
															
 
																-    env = gym.make(env_name)
															
 
																-    env = RGBImgPartialObsWrapper(env)
															
 
																-    env.reset()
															
 
																-    obs, _, _, _ = env.step(0)
															
 
																-    assert obs["image"].mean() > 0
															
 
																-    env.close()
															
 
																-
															
 
																-    env = gym.make(env_name)
															
 
																-    env = FlatObsWrapper(env)
															
 
																-    env.reset()
															
 
																-    env.step(0)
															
 
																-    env.close()
															
 
																-
															
 
																-    env = gym.make(env_name)
															
 
																-    env = ViewSizeWrapper(env, 5)
															
 
																-    env.reset()
															
 
																-    env.step(0)
															
 
																-    env.close()
															
 
																-
															
 
																-    # Test the DictObservationSpaceWrapper
															
 
																-    env = gym.make(env_name)
															
 
																-    env = DictObservationSpaceWrapper(env)
															
 
																-    env.reset()
															
 
																-    mission = env.mission
															
 
																-    obs, _, _, _ = env.step(0)
															
 
																-    assert env.string_to_indices(mission) == [
															
 
																-        value for value in obs["mission"] if value != 0
															
 
																-    ]
															
 
																-    env.close()
															
 
																-
															
 
																-    # Test the wrappers return proper observation spaces.
															
 
																-    wrappers = [RGBImgObsWrapper, RGBImgPartialObsWrapper, OneHotPartialObsWrapper]
															
 
																-    for wrapper in wrappers:
															
 
																-        env = wrapper(gym.make(env_name, render_mode="rgb_array"))
															
 
																-        obs_space, wrapper_name = env.observation_space, wrapper.__name__
															
 
																-        assert isinstance(
															
 
																-            obs_space, spaces.Dict
															
 
																-        ), f"Observation space for {wrapper_name} is not a Dict: {obs_space}."
															
 
																-        # This should not fail either
															
 
																-        ImgObsWrapper(env)
															
 
																-        env.reset()
															
 
																-        env.step(0)
															
 
																-        env.close()
															
 
																-
															
 
																-##############################################################################
															
 
																-
															
 
																-print("testing extra observations")
															
 
																-
															
 
																-
															
 
																-class EmptyEnvWithExtraObs(EmptyEnv):
															
 
																-    """
															
 
																-    Custom environment with an extra observation
															
 
																-    """
															
 
																-
															
 
																-    def __init__(self, **kwargs) -> None:
															
 
																-        super().__init__(size=5, **kwargs)
															
 
																-        self.observation_space["size"] = spaces.Box(
															
 
																-            low=0,
															
 
																-            high=1000,  # gym does not like np.iinfo(np.uint).max,
															
 
																-            shape=(2,),
															
 
																-            dtype=np.uint,
															
 
																-        )
															
 
																-
															
 
																-    def reset(self, **kwargs):
															
 
																-        obs = super().reset(**kwargs)
															
 
																-        obs["size"] = np.array([self.width, self.height], dtype=np.uint)
															
 
																-        return obs
															
 
																-
															
 
																-    def step(self, action):
															
 
																-        obs, reward, done, info = super().step(action)
															
 
																-        obs["size"] = np.array([self.width, self.height], dtype=np.uint)
															
 
																-        return obs, reward, done, info
															
 
																-
															
 
																-
															
 
																-wrappers = [
															
 
																-    OneHotPartialObsWrapper,
															
 
																-    RGBImgObsWrapper,
															
 
																-    RGBImgPartialObsWrapper,
															
 
																-    FullyObsWrapper,
															
 
																-]
															
 
																-for wrapper in wrappers:
															
 
																-    env1 = wrapper(EmptyEnvWithExtraObs(render_mode="rgb_array"))
															
 
																-    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0", render_mode="rgb_array"))
															
 
																-
															
 
																-    obs1 = env1.reset(seed=0)
															
 
																-    obs2 = env2.reset(seed=0)
															
 
																-    assert "size" in obs1
															
 
																-    assert obs1["size"].shape == (2,)
															
 
																-    assert (obs1["size"] == [5, 5]).all()
															
 
																-    for key in obs2:
															
 
																-        assert np.array_equal(obs1[key], obs2[key])
															
 
																-
															
 
																-    obs1, reward1, done1, _ = env1.step(0)
															
 
																-    obs2, reward2, done2, _ = env2.step(0)
															
 
																-    assert "size" in obs1
															
 
																-    assert obs1["size"].shape == (2,)
															
 
																-    assert (obs1["size"] == [5, 5]).all()
															
 
																-    for key in obs2:
															
 
																-        assert np.array_equal(obs1[key], obs2[key])
															
 
																-
															
 
																-##############################################################################
															
 
																-
															
 
																-print("testing agent_sees method")
															
 
																-env = gym.make("MiniGrid-DoorKey-6x6-v0")
															
 
																-goal_pos = (env.grid.width - 2, env.grid.height - 2)
															
 
																-
															
 
																-# Test the "in" operator on grid objects
															
 
																-assert ("green", "goal") in env.grid
															
 
																-assert ("blue", "key") not in env.grid
															
 
																-
															
 
																-# Test the env.agent_sees() function
															
 
																-env.reset()
															
 
																-for i in range(0, 500):
															
 
																-    action = random.randint(0, env.action_space.n - 1)
															
 
																-    obs, reward, done, info = env.step(action)
															
 
																-
															
 
																-    grid, _ = Grid.decode(obs["image"])
															
 
																-    goal_visible = ("green", "goal") in grid
															
 
																-
															
 
																-    agent_sees_goal = env.agent_sees(*goal_pos)
															
 
																-    assert agent_sees_goal == goal_visible
															
 
																-    if done:
															
 
																-        env.reset()
															
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,6 @@ setup(
 
																     classifiers=[
															
 
																         "Development Status :: 5 - Production/Stable",
															
 
																         "Programming Language :: Python :: 3",
															
 
																-        "Programming Language :: Python :: 3.6",
															
 
																         "Programming Language :: Python :: 3.7",
															
 
																         "Programming Language :: Python :: 3.8",
															
 
																         "Programming Language :: Python :: 3.9",
															
@@ -44,6 +43,6 @@ setup(
 
																         "numpy>=1.18.0",
															
 
																         "matplotlib>=3.0",
															
 
																     ],
															
 
																-    python_requires=">=3.6",
															
 
																+    python_requires=">=3.7",
															
 
																     tests_require=extras["testing"],
															
 
																 )
															
--- a/test_interactive_mode.py
+++ b/test_interactive_mode.py
@@ -1,25 +0,0 @@
 
																-#!/usr/bin/env python3
															
 
																-
															
 
																-import random
															
 
																-import time
															
 
																-
															
 
																-import gym
															
 
																-
															
 
																-# Load the gym environment
															
 
																-env = gym.make("MiniGrid-Empty-8x8-v0")
															
 
																-env.reset()
															
 
																-
															
 
																-for i in range(0, 100):
															
 
																-    print(f"step {i}")
															
 
																-
															
 
																-    # Pick a random action
															
 
																-    action = random.randint(0, env.action_space.n - 1)
															
 
																-
															
 
																-    obs, reward, done, info = env.step(action)
															
 
																-
															
 
																-    env.render()
															
 
																-
															
 
																-    time.sleep(0.05)
															
 
																-
															
 
																-# Test the close method
															
 
																-env.close()
															
--- a/tests/__init__.py
+++ b/tests/__init__.py
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -0,0 +1,207 @@
 
																+import gym
															
 
																+import numpy as np
															
 
																+import pytest
															
 
																+from gym.envs.registration import EnvSpec
															
 
																+from gym.utils.env_checker import check_env
															
 
																+
															
 
																+from gym_minigrid.minigrid import Grid
															
 
																+from tests.utils import all_testing_env_specs, assert_equals
															
 
																+
															
 
																+CHECK_ENV_IGNORE_WARNINGS = [
															
 
																+    f"\x1b[33mWARN: {message}\x1b[0m"
															
 
																+    for message in [
															
 
																+        "A Box observation space minimum value is -infinity. This is probably too low.",
															
 
																+        "A Box observation space maximum value is -infinity. This is probably too high.",
															
 
																+        "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.",
															
 
																+        "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.",
															
 
																+        "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.",
															
 
																+        "Core environment is written in old step API which returns one bool instead of two. It is recommended to rewrite the environment with new step API. ",
															
 
																+    ]
															
 
																+]
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
															
 
																+)
															
 
																+def test_env(spec):
															
 
																+    # Capture warnings
															
 
																+    env = spec.make(disable_env_checker=True).unwrapped
															
 
																+
															
 
																+    # Test if env adheres to Gym API
															
 
																+    with pytest.warns() as warnings:
															
 
																+        check_env(env)
															
 
																+
															
 
																+    for warning in warnings.list:
															
 
																+        if warning.message.args[0] not in CHECK_ENV_IGNORE_WARNINGS:
															
 
																+            raise gym.error.Error(f"Unexpected warning: {warning.message}")
															
 
																+
															
 
																+
															
 
																+# Note that this precludes running this test in multiple threads.
															
 
																+# However, we probably already can't do multithreading due to some environments.
															
 
																+SEED = 0
															
 
																+NUM_STEPS = 50
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs]
															
 
																+)
															
 
																+def test_env_determinism_rollout(env_spec: EnvSpec):
															
 
																+    """Run a rollout with two environments and assert equality.
															
 
																+
															
 
																+    This test run a rollout of NUM_STEPS steps with two environments
															
 
																+    initialized with the same seed and assert that:
															
 
																+
															
 
																+    - observation after first reset are the same
															
 
																+    - same actions are sampled by the two envs
															
 
																+    - observations are contained in the observation space
															
 
																+    - obs, rew, done and info are equals between the two envs
															
 
																+    """
															
 
																+    # Don't check rollout equality if it's a nondeterministic environment.
															
 
																+    if env_spec.nondeterministic is True:
															
 
																+        return
															
 
																+
															
 
																+    env_1 = env_spec.make(disable_env_checker=True)
															
 
																+    env_2 = env_spec.make(disable_env_checker=True)
															
 
																+
															
 
																+    initial_obs_1 = env_1.reset(seed=SEED)
															
 
																+    initial_obs_2 = env_2.reset(seed=SEED)
															
 
																+    assert_equals(initial_obs_1, initial_obs_2)
															
 
																+
															
 
																+    env_1.action_space.seed(SEED)
															
 
																+
															
 
																+    for time_step in range(NUM_STEPS):
															
 
																+        # We don't evaluate the determinism of actions
															
 
																+        action = env_1.action_space.sample()
															
 
																+
															
 
																+        obs_1, rew_1, done_1, info_1 = env_1.step(action)
															
 
																+        obs_2, rew_2, done_2, info_2 = env_2.step(action)
															
 
																+
															
 
																+        assert_equals(obs_1, obs_2, f"[{time_step}] ")
															
 
																+        assert env_1.observation_space.contains(
															
 
																+            obs_1
															
 
																+        )  # obs_2 verified by previous assertion
															
 
																+
															
 
																+        assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
															
 
																+        assert done_1 == done_2, f"[{time_step}] done 1={done_1}, done 2={done_2}"
															
 
																+        assert_equals(info_1, info_2, f"[{time_step}] ")
															
 
																+
															
 
																+        if done_1:  # done_2 verified by previous assertion
															
 
																+            env_1.reset(seed=SEED)
															
 
																+            env_2.reset(seed=SEED)
															
 
																+
															
 
																+    env_1.close()
															
 
																+    env_2.close()
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
															
 
																+)
															
 
																+def test_render_modes(spec):
															
 
																+    env = spec.make()
															
 
																+
															
 
																+    for mode in env.metadata.get("render_modes", []):
															
 
																+        if mode != "human":
															
 
																+            new_env = spec.make(render_mode=mode)
															
 
																+
															
 
																+            new_env.reset()
															
 
																+            new_env.step(new_env.action_space.sample())
															
 
																+            new_env.render()
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize("env_id", ["MiniGrid-DoorKey-6x6-v0"])
															
 
																+def test_agent_sees_method(env_id):
															
 
																+    env = gym.make(env_id)
															
 
																+    goal_pos = (env.grid.width - 2, env.grid.height - 2)
															
 
																+
															
 
																+    # Test the "in" operator on grid objects
															
 
																+    assert ("green", "goal") in env.grid
															
 
																+    assert ("blue", "key") not in env.grid
															
 
																+
															
 
																+    # Test the env.agent_sees() function
															
 
																+    env.reset()
															
 
																+    for i in range(0, 500):
															
 
																+        action = env.action_space.sample()
															
 
																+        obs, reward, done, info = env.step(action)
															
 
																+
															
 
																+        grid, _ = Grid.decode(obs["image"])
															
 
																+        goal_visible = ("green", "goal") in grid
															
 
																+
															
 
																+        agent_sees_goal = env.agent_sees(*goal_pos)
															
 
																+        assert agent_sees_goal == goal_visible
															
 
																+        if done:
															
 
																+            env.reset()
															
 
																+
															
 
																+    env.close()
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
															
 
																+)
															
 
																+def old_run_test(env_spec):
															
 
																+    # Load the gym environment
															
 
																+    env = env_spec.make()
															
 
																+    env.max_steps = min(env.max_steps, 200)
															
 
																+    env.reset()
															
 
																+    env.render()
															
 
																+
															
 
																+    # Verify that the same seed always produces the same environment
															
 
																+    for i in range(0, 5):
															
 
																+        seed = 1337 + i
															
 
																+        _ = env.reset(seed=seed)
															
 
																+        grid1 = env.grid
															
 
																+        _ = env.reset(seed=seed)
															
 
																+        grid2 = env.grid
															
 
																+        assert grid1 == grid2
															
 
																+
															
 
																+    env.reset()
															
 
																+
															
 
																+    # Run for a few episodes
															
 
																+    num_episodes = 0
															
 
																+    while num_episodes < 5:
															
 
																+        # Pick a random action
															
 
																+        action = env.action_space.sample()
															
 
																+
															
 
																+        obs, reward, done, info = env.step(action)
															
 
																+
															
 
																+        # Validate the agent position
															
 
																+        assert env.agent_pos[0] < env.width
															
 
																+        assert env.agent_pos[1] < env.height
															
 
																+
															
 
																+        # Test observation encode/decode roundtrip
															
 
																+        img = obs["image"]
															
 
																+        grid, vis_mask = Grid.decode(img)
															
 
																+        img2 = grid.encode(vis_mask=vis_mask)
															
 
																+        assert np.array_equal(img, img2)
															
 
																+
															
 
																+        # Test the env to string function
															
 
																+        str(env)
															
 
																+
															
 
																+        # Check that the reward is within the specified range
															
 
																+        assert reward >= env.reward_range[0], reward
															
 
																+        assert reward <= env.reward_range[1], reward
															
 
																+
															
 
																+        if done:
															
 
																+            num_episodes += 1
															
 
																+            env.reset()
															
 
																+
															
 
																+        env.render()
															
 
																+
															
 
																+    # Test the close method
															
 
																+    env.close()
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-8x8-v0"])
															
 
																+def test_interactive_mode(env_id):
															
 
																+    env = gym.make(env_id, render_mode="human")
															
 
																+    env.reset()
															
 
																+
															
 
																+    for i in range(0, 100):
															
 
																+        print(f"step {i}")
															
 
																+
															
 
																+        # Pick a random action
															
 
																+        action = env.action_space.sample()
															
 
																+
															
 
																+        obs, reward, done, info = env.step(action)
															
 
																+
															
 
																+    # Test the close method
															
 
																+    env.close()
															
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -0,0 +1,237 @@
 
																+import math
															
 
																+
															
 
																+import gym
															
 
																+import numpy as np
															
 
																+import pytest
															
 
																+
															
 
																+from gym_minigrid.envs import EmptyEnv
															
 
																+from gym_minigrid.minigrid import MiniGridEnv
															
 
																+from gym_minigrid.wrappers import (
															
 
																+    ActionBonus,
															
 
																+    DictObservationSpaceWrapper,
															
 
																+    FlatObsWrapper,
															
 
																+    FullyObsWrapper,
															
 
																+    ImgObsWrapper,
															
 
																+    OneHotPartialObsWrapper,
															
 
																+    ReseedWrapper,
															
 
																+    RGBImgObsWrapper,
															
 
																+    RGBImgPartialObsWrapper,
															
 
																+    StateBonus,
															
 
																+    ViewSizeWrapper,
															
 
																+)
															
 
																+from tests.utils import all_testing_env_specs, assert_equals
															
 
																+
															
 
																+SEEDS = [100, 243, 500]
															
 
																+NUM_STEPS = 100
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
															
 
																+)
															
 
																+def test_reseed_wrapper(env_spec):
															
 
																+    """
															
 
																+    Test the ReseedWrapper with a list of SEEDS.
															
 
																+    """
															
 
																+    unwrapped_env = env_spec.make()
															
 
																+    env = env_spec.make()
															
 
																+    env = ReseedWrapper(env, seeds=SEEDS)
															
 
																+    env.action_space.seed(0)
															
 
																+
															
 
																+    for seed in SEEDS:
															
 
																+        env.reset()
															
 
																+        unwrapped_env.reset(seed=seed)
															
 
																+        for time_step in range(NUM_STEPS):
															
 
																+            action = env.action_space.sample()
															
 
																+
															
 
																+            obs, rew, done, info = env.step(action)
															
 
																+            (
															
 
																+                unwrapped_obs,
															
 
																+                unwrapped_rew,
															
 
																+                unwrapped_done,
															
 
																+                unwrapped_info,
															
 
																+            ) = unwrapped_env.step(action)
															
 
																+
															
 
																+            assert_equals(obs, unwrapped_obs, f"[{time_step}] ")
															
 
																+            assert unwrapped_env.observation_space.contains(obs)
															
 
																+
															
 
																+            assert (
															
 
																+                rew == unwrapped_rew
															
 
																+            ), f"[{time_step}] reward={rew}, unwrapped reward={unwrapped_rew}"
															
 
																+            assert (
															
 
																+                done == unwrapped_done
															
 
																+            ), f"[{time_step}] done={done}, unwrapped done={unwrapped_done}"
															
 
																+            assert_equals(info, unwrapped_info, f"[{time_step}] ")
															
 
																+
															
 
																+            # Start the next seed
															
 
																+            if done:
															
 
																+                break
															
 
																+
															
 
																+    env.close()
															
 
																+    unwrapped_env.close()
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
															
 
																+def test_state_bonus_wrapper(env_id):
															
 
																+    env = gym.make(env_id)
															
 
																+    wrapped_env = StateBonus(gym.make(env_id))
															
 
																+
															
 
																+    action_forward = MiniGridEnv.Actions.forward
															
 
																+    action_left = MiniGridEnv.Actions.left
															
 
																+    action_right = MiniGridEnv.Actions.right
															
 
																+
															
 
																+    for _ in range(10):
															
 
																+        wrapped_env.reset()
															
 
																+        for _ in range(5):
															
 
																+            wrapped_env.step(action_forward)
															
 
																+
															
 
																+    # Turn lef 3 times (check that actions don't influence bonus)
															
 
																+    for _ in range(3):
															
 
																+        _, wrapped_rew, _, _ = wrapped_env.step(action_left)
															
 
																+
															
 
																+    env.reset()
															
 
																+    for _ in range(5):
															
 
																+        env.step(action_forward)
															
 
																+    # Turn right 3 times
															
 
																+    for _ in range(3):
															
 
																+        _, rew, _, _ = env.step(action_right)
															
 
																+
															
 
																+    expected_bonus_reward = rew + 1 / math.sqrt(13)
															
 
																+
															
 
																+    assert expected_bonus_reward == wrapped_rew
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
															
 
																+def test_action_bonus_wrapper(env_id):
															
 
																+    env = gym.make(env_id)
															
 
																+    wrapped_env = ActionBonus(gym.make(env_id))
															
 
																+
															
 
																+    action = MiniGridEnv.Actions.forward
															
 
																+
															
 
																+    for _ in range(10):
															
 
																+        wrapped_env.reset()
															
 
																+        for _ in range(5):
															
 
																+            _, wrapped_rew, _, _ = wrapped_env.step(action)
															
 
																+
															
 
																+    env.reset()
															
 
																+    for _ in range(5):
															
 
																+        _, rew, _, _ = env.step(action)
															
 
																+
															
 
																+    expected_bonus_reward = rew + 1 / math.sqrt(10)
															
 
																+
															
 
																+    assert expected_bonus_reward == wrapped_rew
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
															
 
																+)
															
 
																+def test_dict_observation_space_wrapper(env_spec):
															
 
																+    env = env_spec.make()
															
 
																+    env = DictObservationSpaceWrapper(env)
															
 
																+    env.reset()
															
 
																+    mission = env.mission
															
 
																+    obs, _, _, _ = env.step(0)
															
 
																+    assert env.string_to_indices(mission) == [
															
 
																+        value for value in obs["mission"] if value != 0
															
 
																+    ]
															
 
																+    env.close()
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "wrapper",
															
 
																+    [
															
 
																+        ReseedWrapper,
															
 
																+        ImgObsWrapper,
															
 
																+        FlatObsWrapper,
															
 
																+        ViewSizeWrapper,
															
 
																+        DictObservationSpaceWrapper,
															
 
																+        OneHotPartialObsWrapper,
															
 
																+        RGBImgPartialObsWrapper,
															
 
																+        FullyObsWrapper,
															
 
																+    ],
															
 
																+)
															
 
																+@pytest.mark.parametrize(
															
 
																+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
															
 
																+)
															
 
																+def test_main_wrappers(wrapper, env_spec):
															
 
																+    env = env_spec.make()
															
 
																+    env = wrapper(env)
															
 
																+    for _ in range(10):
															
 
																+        env.reset()
															
 
																+        env.step(0)
															
 
																+    env.close()
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "wrapper",
															
 
																+    [
															
 
																+        OneHotPartialObsWrapper,
															
 
																+        RGBImgPartialObsWrapper,
															
 
																+        FullyObsWrapper,
															
 
																+    ],
															
 
																+)
															
 
																+@pytest.mark.parametrize(
															
 
																+    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
															
 
																+)
															
 
																+def test_observation_space_wrappers(wrapper, env_spec):
															
 
																+    env = wrapper(env_spec.make(disable_env_checker=True))
															
 
																+    obs_space, wrapper_name = env.observation_space, wrapper.__name__
															
 
																+    assert isinstance(
															
 
																+        obs_space, gym.spaces.Dict
															
 
																+    ), f"Observation space for {wrapper_name} is not a Dict: {obs_space}."
															
 
																+    # This should not fail either
															
 
																+    ImgObsWrapper(env)
															
 
																+    env.reset()
															
 
																+    env.step(0)
															
 
																+    env.close()
															
 
																+
															
 
																+
															
 
																+class EmptyEnvWithExtraObs(EmptyEnv):
															
 
																+    """
															
 
																+    Custom environment with an extra observation
															
 
																+    """
															
 
																+
															
 
																+    def __init__(self) -> None:
															
 
																+        super().__init__(size=5)
															
 
																+        self.observation_space["size"] = gym.spaces.Box(
															
 
																+            low=0, high=np.iinfo(np.uint).max, shape=(2,), dtype=np.uint
															
 
																+        )
															
 
																+
															
 
																+    def reset(self, **kwargs):
															
 
																+        obs = super().reset(**kwargs)
															
 
																+        obs["size"] = np.array([self.width, self.height])
															
 
																+        return obs
															
 
																+
															
 
																+    def step(self, action):
															
 
																+        obs, reward, done, info = super().step(action)
															
 
																+        obs["size"] = np.array([self.width, self.height])
															
 
																+        return obs, reward, done, info
															
 
																+
															
 
																+
															
 
																+@pytest.mark.parametrize(
															
 
																+    "wrapper",
															
 
																+    [
															
 
																+        OneHotPartialObsWrapper,
															
 
																+        RGBImgObsWrapper,
															
 
																+        RGBImgPartialObsWrapper,
															
 
																+        FullyObsWrapper,
															
 
																+    ],
															
 
																+)
															
 
																+def test_agent_sees_method(wrapper):
															
 
																+    env1 = wrapper(EmptyEnvWithExtraObs())
															
 
																+    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0"))
															
 
																+
															
 
																+    obs1 = env1.reset(seed=0)
															
 
																+    obs2 = env2.reset(seed=0)
															
 
																+    assert "size" in obs1
															
 
																+    assert obs1["size"].shape == (2,)
															
 
																+    assert (obs1["size"] == [5, 5]).all()
															
 
																+    for key in obs2:
															
 
																+        assert np.array_equal(obs1[key], obs2[key])
															
 
																+
															
 
																+    obs1, reward1, done1, _ = env1.step(0)
															
 
																+    obs2, reward2, done2, _ = env2.step(0)
															
 
																+    assert "size" in obs1
															
 
																+    assert obs1["size"].shape == (2,)
															
 
																+    assert (obs1["size"] == [5, 5]).all()
															
 
																+    for key in obs2:
															
 
																+        assert np.array_equal(obs1[key], obs2[key])
															
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -0,0 +1,34 @@
 
																+"""Finds all the specs that we can test with"""
															
 
																+import gym
															
 
																+import numpy as np
															
 
																+
															
 
																+all_testing_env_specs = [
															
 
																+    env_spec
															
 
																+    for env_spec in gym.envs.registry.values()
															
 
																+    if env_spec.entry_point.startswith("gym_minigrid.envs")
															
 
																+]
															
 
																+
															
 
																+
															
 
																+def assert_equals(a, b, prefix=None):
															
 
																+    """Assert equality of data structures `a` and `b`.
															
 
																+
															
 
																+    Args:
															
 
																+        a: first data structure
															
 
																+        b: second data structure
															
 
																+        prefix: prefix for failed assertion message for types and dicts
															
 
																+    """
															
 
																+    assert type(a) == type(b), f"{prefix}Differing types: {a} and {b}"
															
 
																+    if isinstance(a, dict):
															
 
																+        assert list(a.keys()) == list(b.keys()), f"{prefix}Key sets differ: {a} and {b}"
															
 
																+
															
 
																+        for k in a.keys():
															
 
																+            v_a = a[k]
															
 
																+            v_b = b[k]
															
 
																+            assert_equals(v_a, v_b)
															
 
																+    elif isinstance(a, np.ndarray):
															
 
																+        np.testing.assert_array_equal(a, b)
															
 
																+    elif isinstance(a, tuple):
															
 
																+        for elem_from_a, elem_from_b in zip(a, b):
															
 
																+            assert_equals(elem_from_a, elem_from_b)
															
 
																+    else:
															
 
																+        assert a == b