瀏覽代碼

use new_step API, and solve some pylance errors

saleml 2 年之前
父節點
當前提交
5c241a8a15

+ 1 - 1
README.md

@@ -136,7 +136,7 @@ use the `RGBImgPartialObsWrapper`. You can use it as follows:
 import gym
 from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
 
-env = gym.make('MiniGrid-Empty-8x8-v0')
+env = gym.make('MiniGrid-Empty-8x8-v0', new_step_api=True)
 env = RGBImgPartialObsWrapper(env) # Get pixel observations
 env = ImgObsWrapper(env) # Get rid of the 'mission' field
 obs = env.reset() # This now produces an RGB tensor only

+ 3 - 3
benchmark.py

@@ -18,7 +18,7 @@ parser.add_argument("--num_resets", default=200)
 parser.add_argument("--num_frames", default=5000)
 args = parser.parse_args()
 
-env = gym.make(args.env_name, render_mode="rgb_array")
+env = gym.make(args.env_name, render_mode="rgb_array", new_step_api=True)
 
 # Benchmark env.reset
 t0 = time.time()
@@ -37,14 +37,14 @@ dt = t1 - t0
 frames_per_sec = args.num_frames / dt
 
 # Create an environment with an RGB agent observation
-env = gym.make(args.env_name)
+env = gym.make(args.env_name, new_step_api=True)
 env = RGBImgPartialObsWrapper(env)
 env = ImgObsWrapper(env)
 
 # Benchmark rendering
 t0 = time.time()
 for i in range(args.num_frames):
-    obs, reward, done, info = env.step(0)
+    obs, reward, terminated, truncated, info = env.step(0)
 t1 = time.time()
 dt = t1 - t0
 agent_view_fps = args.num_frames / dt

+ 3 - 3
gym_minigrid/envs/blockedunlockpickup.py

@@ -38,14 +38,14 @@ class BlockedUnlockPickupEnv(RoomGrid):
         self.mission = f"pick up the {obj.color} {obj.type}"
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.pickup:
             if self.carrying and self.carrying == self.obj:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 7 - 6
gym_minigrid/envs/dynamicobstacles.py

@@ -1,6 +1,7 @@
 from operator import add
 
 import gym
+from gym.spaces import Discrete
 
 from gym_minigrid.minigrid import Ball, Goal, Grid, MiniGridEnv
 from gym_minigrid.register import register
@@ -30,7 +31,7 @@ class DynamicObstaclesEnv(MiniGridEnv):
             **kwargs
         )
         # Allow only 3 actions permitted: left, right, forward
-        self.action_space = gym.spaces.Discrete(self.actions.forward + 1)
+        self.action_space = Discrete(self.actions.forward + 1)
         self.reward_range = (-1, 1)
 
     def _gen_grid(self, width, height):
@@ -76,20 +77,20 @@ class DynamicObstaclesEnv(MiniGridEnv):
                 self.place_obj(
                     self.obstacles[i_obst], top=top, size=(3, 3), max_tries=100
                 )
-                self.grid.set(*old_pos, None)
+                self.grid.set(old_pos[0], old_pos[1], None)
             except Exception:
                 pass
 
         # Update the agent's position/direction
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         # If the agent tried to walk over an obstacle or wall
         if action == self.actions.forward and not_clear:
             reward = -1
-            done = True
-            return obs, reward, done, info
+            terminated = True
+            return obs, reward, terminated, truncated, info
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 6 - 5
gym_minigrid/envs/fetch.py

@@ -70,7 +70,7 @@ class FetchEnv(MiniGridEnv):
         assert hasattr(self, "mission")
 
     def step(self, action):
-        obs, reward, done, info = MiniGridEnv.step(self, action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if self.carrying:
             if (
@@ -78,12 +78,12 @@ class FetchEnv(MiniGridEnv):
                 and self.carrying.type == self.targetType
             ):
                 reward = self._reward()
-                done = True
+                terminated = True
             else:
                 reward = 0
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(
@@ -100,4 +100,5 @@ register(
     numObjs=2,
 )
 
-register(id="MiniGrid-Fetch-8x8-N3-v0", entry_point="gym_minigrid.envs.fetch:FetchEnv")
+register(id="MiniGrid-Fetch-8x8-N3-v0",
+         entry_point="gym_minigrid.envs.fetch:FetchEnv")

+ 2 - 6
gym_minigrid/envs/fourrooms.py

@@ -53,7 +53,8 @@ class FourRoomsEnv(MiniGridEnv):
         if self._agent_default_pos is not None:
             self.agent_pos = self._agent_default_pos
             self.grid.set(*self._agent_default_pos, None)
-            self.agent_dir = self._rand_int(0, 4)  # assuming random start direction
+            # assuming random start direction
+            self.agent_dir = self._rand_int(0, 4)
         else:
             self.place_agent()
 
@@ -65,11 +66,6 @@ class FourRoomsEnv(MiniGridEnv):
             self.place_obj(Goal())
 
         self.mission = "reach the goal"
-        self.mission = "Reach the goal"
-
-    def step(self, action):
-        obs, reward, done, info = MiniGridEnv.step(self, action)
-        return obs, reward, done, info
 
 
 register(

+ 4 - 4
gym_minigrid/envs/gotodoor.py

@@ -62,22 +62,22 @@ class GoToDoorEnv(MiniGridEnv):
         self.mission = "go to the %s door" % self.target_color
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         ax, ay = self.agent_pos
         tx, ty = self.target_pos
 
         # Don't let the agent open any of the doors
         if action == self.actions.toggle:
-            done = True
+            terminated = True
 
         # Reward performing done action in front of the target door
         if action == self.actions.done:
             if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
                 reward = self._reward()
-            done = True
+            terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 4 - 4
gym_minigrid/envs/gotoobject.py

@@ -64,22 +64,22 @@ class GoToObjectEnv(MiniGridEnv):
         # print(self.mission)
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         ax, ay = self.agent_pos
         tx, ty = self.target_pos
 
         # Toggle/pickup action terminates the episode
         if action == self.actions.toggle:
-            done = True
+            terminated = True
 
         # Reward performing the done action next to the target object
         if action == self.actions.done:
             if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
                 reward = self._reward()
-            done = True
+            terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 3 - 3
gym_minigrid/envs/keycorridor.py

@@ -44,14 +44,14 @@ class KeyCorridorEnv(RoomGrid):
         self.mission = f"pick up the {obj.color} {obj.type}"
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.pickup:
             if self.carrying and self.carrying == self.obj:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 2 - 5
gym_minigrid/envs/lockedroom.py

@@ -56,7 +56,8 @@ class LockedRoomEnv(MiniGridEnv):
 
             roomW = lWallIdx + 1
             roomH = height // 3 + 1
-            self.rooms.append(LockedRoom((0, j), (roomW, roomH), (lWallIdx, j + 3)))
+            self.rooms.append(LockedRoom(
+                (0, j), (roomW, roomH), (lWallIdx, j + 3)))
             self.rooms.append(
                 LockedRoom((rWallIdx, j), (roomW, roomH), (rWallIdx, j + 3))
             )
@@ -98,10 +99,6 @@ class LockedRoomEnv(MiniGridEnv):
             "go to the goal"
         ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)
 
-    def step(self, action):
-        obs, reward, done, info = MiniGridEnv.step(self, action)
-        return obs, reward, done, info
-
 
 register(
     id="MiniGrid-LockedRoom-v0",

+ 6 - 6
gym_minigrid/envs/memory.py

@@ -82,18 +82,18 @@ class MemoryEnv(MiniGridEnv):
         self.mission = "go to the matching object at the end of the hallway"
 
     def step(self, action):
-        if action == MiniGridEnv.Actions.pickup:
-            action = MiniGridEnv.Actions.toggle
-        obs, reward, done, info = MiniGridEnv.step(self, action)
+        if action == self.Actions.pickup:
+            action = self.Actions.toggle
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if tuple(self.agent_pos) == self.success_pos:
             reward = self._reward()
-            done = True
+            terminated = True
         if tuple(self.agent_pos) == self.failure_pos:
             reward = 0
-            done = True
+            terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 12 - 6
gym_minigrid/envs/multiroom.py

@@ -37,7 +37,8 @@ class MultiRoomEnv(MiniGridEnv):
         while len(roomList) < numRooms:
             curRoomList = []
 
-            entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))
+            entryDoorPos = (self._rand_int(0, width - 2),
+                            self._rand_int(0, width - 2))
 
             # Recursively place the rooms
             self._placeRoom(
@@ -89,7 +90,8 @@ class MultiRoomEnv(MiniGridEnv):
                 doorColor = self._rand_elem(sorted(doorColors))
 
                 entryDoor = Door(doorColor)
-                self.grid.set(*room.entryDoorPos, entryDoor)
+                self.grid.set(
+                    room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)
                 prevDoorColor = doorColor
 
                 prevRoom = roomList[idx - 1]
@@ -99,7 +101,8 @@ class MultiRoomEnv(MiniGridEnv):
         self.place_agent(roomList[0].top, roomList[0].size)
 
         # Place the final goal in the last room
-        self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
+        self.goal_pos = self.place_obj(
+            Goal(), roomList[-1].top, roomList[-1].size)
 
         self.mission = "traverse the rooms to get to the goal"
 
@@ -153,7 +156,8 @@ class MultiRoomEnv(MiniGridEnv):
                 return False
 
         # Add this room to the list
-        roomList.append(MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))
+        roomList.append(
+            MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))
 
         # If this was the last room, stop
         if numLeft == 1:
@@ -171,10 +175,12 @@ class MultiRoomEnv(MiniGridEnv):
             # Pick the exit door position
             # Exit on right wall
             if exitDoorWall == 0:
-                exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))
+                exitDoorPos = (topX + sizeX - 1, topY +
+                               self._rand_int(1, sizeY - 1))
             # Exit on south wall
             elif exitDoorWall == 1:
-                exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
+                exitDoorPos = (
+                    topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
             # Exit on left wall
             elif exitDoorWall == 2:
                 exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))

+ 12 - 8
gym_minigrid/envs/obstructedmaze.py

@@ -20,6 +20,7 @@ class ObstructedMazeEnv(RoomGrid):
             max_steps=max_steps,
             **kwargs
         )
+        self.obj = Ball()  # intiale the obj attribute, that will be changed later on
 
     def _gen_grid(self, width, height):
         super()._gen_grid(width, height)
@@ -36,14 +37,14 @@ class ObstructedMazeEnv(RoomGrid):
         self.mission = "pick up the %s ball" % self.ball_to_find_color
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.pickup:
             if self.carrying and self.carrying == self.obj:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def add_door(
         self,
@@ -66,13 +67,14 @@ class ObstructedMazeEnv(RoomGrid):
         if blocked:
             vec = DIR_TO_VEC[door_idx]
             blocking_ball = Ball(self.blocking_ball_color) if blocked else None
-            self.grid.set(door_pos[0] - vec[0], door_pos[1] - vec[1], blocking_ball)
+            self.grid.set(door_pos[0] - vec[0],
+                          door_pos[1] - vec[1], blocking_ball)
 
         if locked:
             obj = Key(door.color)
             if key_in_box:
-                box = Box(self.box_color) if key_in_box else None
-                box.contains = obj
+                box = Box(self.box_color)
+                box.set_contains(obj)
                 obj = box
             self.place_in_room(i, j, obj)
 
@@ -104,7 +106,8 @@ class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
             blocked=self.blocked,
         )
 
-        self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color)
+        self.obj, _ = self.add_object(
+            1, 0, "ball", color=self.ball_to_find_color)
         self.place_agent(0, 0)
 
 
@@ -162,7 +165,8 @@ class ObstructedMaze_Full(ObstructedMazeEnv):
         corners = [(2, 0), (2, 2), (0, 2), (0, 0)][: self.num_quarters]
         ball_room = self._rand_elem(corners)
 
-        self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color)
+        self.obj, _ = self.add_object(
+            ball_room[0], ball_room[1], "ball", color=self.ball_to_find_color)
         self.place_agent(*self.agent_room)
 
 

+ 0 - 4
gym_minigrid/envs/playground.py

@@ -67,10 +67,6 @@ class PlaygroundEnv(MiniGridEnv):
         # No explicit mission in this environment
         self.mission = ""
 
-    def step(self, action):
-        obs, reward, done, info = super().step(action)
-        return obs, reward, done, info
-
 
 register(
     id="MiniGrid-Playground-v0",

+ 4 - 4
gym_minigrid/envs/putnear.py

@@ -89,7 +89,7 @@ class PutNearEnv(MiniGridEnv):
     def step(self, action):
         preCarrying = self.carrying
 
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         u, v = self.dir_vec
         ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v)
@@ -101,16 +101,16 @@ class PutNearEnv(MiniGridEnv):
                 self.carrying.type != self.move_type
                 or self.carrying.color != self.moveColor
             ):
-                done = True
+                terminated = True
 
         # If successfully dropping an object near the target
         if action == self.actions.drop and preCarrying:
             if self.grid.get(ox, oy) is preCarrying:
                 if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
                     reward = self._reward()
-            done = True
+            terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 5 - 5
gym_minigrid/envs/redbluedoors.py

@@ -44,7 +44,7 @@ class RedBlueDoorEnv(MiniGridEnv):
         red_door_opened_before = self.red_door.is_open
         blue_door_opened_before = self.blue_door.is_open
 
-        obs, reward, done, info = MiniGridEnv.step(self, action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         red_door_opened_after = self.red_door.is_open
         blue_door_opened_after = self.blue_door.is_open
@@ -52,17 +52,17 @@ class RedBlueDoorEnv(MiniGridEnv):
         if blue_door_opened_after:
             if red_door_opened_before:
                 reward = self._reward()
-                done = True
+                terminated = True
             else:
                 reward = 0
-                done = True
+                terminated = True
 
         elif red_door_opened_after:
             if blue_door_opened_before:
                 reward = 0
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 5 - 4
gym_minigrid/envs/unlock.py

@@ -31,14 +31,15 @@ class UnlockEnv(RoomGrid):
         self.mission = "open the door"
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.toggle:
             if self.door.is_open:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
-register(id="MiniGrid-Unlock-v0", entry_point="gym_minigrid.envs.unlock:UnlockEnv")
+register(id="MiniGrid-Unlock-v0",
+         entry_point="gym_minigrid.envs.unlock:UnlockEnv")

+ 3 - 3
gym_minigrid/envs/unlockpickup.py

@@ -33,14 +33,14 @@ class UnlockPickupEnv(RoomGrid):
         self.mission = f"pick up the {obj.color} {obj.type}"
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.pickup:
             if self.carrying and self.carrying == self.obj:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 register(

+ 72 - 49
gym_minigrid/minigrid.py

@@ -6,7 +6,7 @@ from enum import IntEnum
 import gym
 import numpy as np
 from gym import spaces
-
+from abc import abstractmethod
 # Size in pixels of a tile in the full-scale human view
 from gym_minigrid.rendering import (
     downsample,
@@ -34,7 +34,8 @@ COLORS = {
 COLOR_NAMES = sorted(list(COLORS.keys()))
 
 # Used to map colors to integers
-COLOR_TO_IDX = {"red": 0, "green": 1, "blue": 2, "purple": 3, "yellow": 4, "grey": 5}
+COLOR_TO_IDX = {"red": 0, "green": 1, "blue": 2,
+                "purple": 3, "yellow": 4, "grey": 5}
 
 IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
 
@@ -202,10 +203,14 @@ class Lava(WorldObj):
         for i in range(3):
             ylo = 0.3 + 0.2 * i
             yhi = 0.4 + 0.2 * i
-            fill_coords(img, point_in_line(0.1, ylo, 0.3, yhi, r=0.03), (0, 0, 0))
-            fill_coords(img, point_in_line(0.3, yhi, 0.5, ylo, r=0.03), (0, 0, 0))
-            fill_coords(img, point_in_line(0.5, ylo, 0.7, yhi, r=0.03), (0, 0, 0))
-            fill_coords(img, point_in_line(0.7, yhi, 0.9, ylo, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(
+                0.1, ylo, 0.3, yhi, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(
+                0.3, yhi, 0.5, ylo, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(
+                0.5, ylo, 0.7, yhi, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(
+                0.7, yhi, 0.9, ylo, r=0.03), (0, 0, 0))
 
 
 class Wall(WorldObj):
@@ -252,7 +257,7 @@ class Door(WorldObj):
             state = 0
         elif self.is_locked:
             state = 2
-        elif not self.is_open:
+        else:
             state = 1
 
         return (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
@@ -268,7 +273,8 @@ class Door(WorldObj):
         # Door frame and door
         if self.is_locked:
             fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
-            fill_coords(img, point_in_rect(0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c))
+            fill_coords(img, point_in_rect(
+                0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c))
 
             # Draw key slot
             fill_coords(img, point_in_rect(0.52, 0.75, 0.50, 0.56), c)
@@ -323,6 +329,9 @@ class Box(WorldObj):
     def can_pickup(self):
         return True
 
+    def set_contains(self, contains):
+        self.contains = contains
+
     def render(self, img):
         c = COLORS[self.color]
 
@@ -335,7 +344,7 @@ class Box(WorldObj):
 
     def toggle(self, env, pos):
         # Replace the box by its contents
-        env.grid.set(*pos, self.contains)
+        env.grid.set(pos[0], pos[1], self.contains)
         return True
 
 
@@ -482,7 +491,8 @@ class Grid:
             )
 
             # Rotate the agent based on its direction
-            tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5, theta=0.5 * math.pi * agent_dir)
+            tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5,
+                               theta=0.5 * math.pi * agent_dir)
             fill_coords(img, tri_fn, (255, 0, 0))
 
         # Highlight the cell if needed
@@ -497,7 +507,7 @@ class Grid:
 
         return img
 
-    def render(self, tile_size, agent_pos=None, agent_dir=None, highlight_mask=None):
+    def render(self, tile_size, agent_pos, agent_dir=None, highlight_mask=None):
         """
         Render this grid at a given scale
         :param r: target renderer object
@@ -505,7 +515,8 @@ class Grid:
         """
 
         if highlight_mask is None:
-            highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool)
+            highlight_mask = np.zeros(
+                shape=(self.width, self.height), dtype=bool)
 
         # Compute the total grid size
         width_px = self.width * tile_size
@@ -580,17 +591,17 @@ class Grid:
 
         return grid, vis_mask
 
-    def process_vis(grid, agent_pos):
-        mask = np.zeros(shape=(grid.width, grid.height), dtype=bool)
+    def process_vis(self, agent_pos):
+        mask = np.zeros(shape=(self.width, self.height), dtype=bool)
 
         mask[agent_pos[0], agent_pos[1]] = True
 
-        for j in reversed(range(0, grid.height)):
-            for i in range(0, grid.width - 1):
+        for j in reversed(range(0, self.height)):
+            for i in range(0, self.width - 1):
                 if not mask[i, j]:
                     continue
 
-                cell = grid.get(i, j)
+                cell = self.get(i, j)
                 if cell and not cell.see_behind():
                     continue
 
@@ -599,11 +610,11 @@ class Grid:
                     mask[i + 1, j - 1] = True
                     mask[i, j - 1] = True
 
-            for i in reversed(range(1, grid.width)):
+            for i in reversed(range(1, self.width)):
                 if not mask[i, j]:
                     continue
 
-                cell = grid.get(i, j)
+                cell = self.get(i, j)
                 if cell and not cell.see_behind():
                     continue
 
@@ -612,10 +623,10 @@ class Grid:
                     mask[i - 1, j - 1] = True
                     mask[i, j - 1] = True
 
-        for j in range(0, grid.height):
-            for i in range(0, grid.width):
+        for j in range(0, self.height):
+            for i in range(0, self.width):
                 if not mask[i, j]:
-                    grid.set(i, j, None)
+                    self.set(i, j, None)
 
         return mask
 
@@ -713,24 +724,29 @@ class MiniGridEnv(gym.Env):
         self.see_through_walls = see_through_walls
 
         # Current position and direction of the agent
-        self.agent_pos = None
-        self.agent_dir = None
+        self.agent_pos = (-1, -1)
+        self.agent_dir = -1
+
+        # Current grid and mission and carryinh
+        self.grid = Grid(width, height)
+        self.mission = ""
+        self.carrying = None
 
         # Initialize the state
         self.reset()
 
     def reset(self, *, seed=None, return_info=False, options=None):
         super().reset(seed=seed)
-        # Current position and direction of the agent
-        self.agent_pos = None
-        self.agent_dir = None
+
+        # Reinitialize episode-specific variables
+        self.agent_pos = (-1, -1)
+        self.agent_dir = -1
 
         # Generate a new random grid at the start of each episode
         self._gen_grid(self.width, self.height)
 
         # These fields should be defined by _gen_grid
-        assert self.agent_pos is not None
-        assert self.agent_dir is not None
+        assert self.agent_pos >= (0, 0) and self.agent_dir >= 0
 
         # Check that the agent doesn't overlap with an object
         start_cell = self.grid.get(*self.agent_pos)
@@ -752,7 +768,8 @@ class MiniGridEnv(gym.Env):
         """
         sample_hash = hashlib.sha256()
 
-        to_encode = [self.grid.encode().tolist(), self.agent_pos, self.agent_dir]
+        to_encode = [self.grid.encode().tolist(), self.agent_pos,
+                     self.agent_dir]
         for item in to_encode:
             sample_hash.update(str(item).encode("utf8"))
 
@@ -815,8 +832,9 @@ class MiniGridEnv(gym.Env):
 
         return str
 
+    @abstractmethod
     def _gen_grid(self, width, height):
-        assert False, "_gen_grid needs to be implemented by each environment"
+        pass
 
     def _reward(self):
         """
@@ -918,11 +936,15 @@ class MiniGridEnv(gym.Env):
 
             pos = np.array(
                 (
-                    self._rand_int(top[0], min(top[0] + size[0], self.grid.width)),
-                    self._rand_int(top[1], min(top[1] + size[1], self.grid.height)),
+                    self._rand_int(top[0], min(
+                        top[0] + size[0], self.grid.width)),
+                    self._rand_int(top[1], min(
+                        top[1] + size[1], self.grid.height)),
                 )
             )
 
+            pos = tuple(pos)
+
             # Don't place the object on top of another object
             if self.grid.get(*pos) is not None:
                 continue
@@ -937,7 +959,7 @@ class MiniGridEnv(gym.Env):
 
             break
 
-        self.grid.set(*pos, obj)
+        self.grid.set(pos[0], pos[1], obj)
 
         if obj is not None:
             obj.init_pos = pos
@@ -959,7 +981,7 @@ class MiniGridEnv(gym.Env):
         Set the agent's starting point at an empty position in the grid
         """
 
-        self.agent_pos = None
+        self.agent_pos = (-1, -1)
         pos = self.place_obj(None, top, size, max_tries=max_tries)
         self.agent_pos = pos
 
@@ -1089,13 +1111,16 @@ class MiniGridEnv(gym.Env):
         obs_cell = obs_grid.get(vx, vy)
         world_cell = self.grid.get(x, y)
 
+        assert world_cell is not None
+
         return obs_cell is not None and obs_cell.type == world_cell.type
 
     def step(self, action):
         self.step_count += 1
 
         reward = 0
-        done = False
+        terminated = False
+        truncated = False
 
         # Get the position in front of the agent
         fwd_pos = self.front_pos
@@ -1116,12 +1141,12 @@ class MiniGridEnv(gym.Env):
         # Move forward
         elif action == self.actions.forward:
             if fwd_cell is None or fwd_cell.can_overlap():
-                self.agent_pos = fwd_pos
+                self.agent_pos = tuple(fwd_pos)
             if fwd_cell is not None and fwd_cell.type == "goal":
-                done = True
+                terminated = True
                 reward = self._reward()
             if fwd_cell is not None and fwd_cell.type == "lava":
-                done = True
+                terminated = True
 
         # Pick up an object
         elif action == self.actions.pickup:
@@ -1129,12 +1154,12 @@ class MiniGridEnv(gym.Env):
                 if self.carrying is None:
                     self.carrying = fwd_cell
                     self.carrying.cur_pos = np.array([-1, -1])
-                    self.grid.set(*fwd_pos, None)
+                    self.grid.set(fwd_pos[0], fwd_pos[1], None)
 
         # Drop an object
         elif action == self.actions.drop:
             if not fwd_cell and self.carrying:
-                self.grid.set(*fwd_pos, self.carrying)
+                self.grid.set(fwd_pos[0], fwd_pos[1], self.carrying)
                 self.carrying.cur_pos = fwd_pos
                 self.carrying = None
 
@@ -1148,14 +1173,14 @@ class MiniGridEnv(gym.Env):
             pass
 
         else:
-            assert False, "unknown action"
+            raise ValueError('Unknown action: {}'.format(action))
 
         if self.step_count >= self.max_steps:
-            done = True
+            truncated = True
 
         obs = self.gen_obs()
 
-        return obs, reward, done, {}
+        return obs, reward, terminated, truncated, {}
 
     def gen_obs_grid(self, agent_view_size=None):
         """
@@ -1204,15 +1229,12 @@ class MiniGridEnv(gym.Env):
         # Encode the partially observable view into a numpy array
         image = grid.encode(vis_mask)
 
-        assert hasattr(
-            self, "mission"
-        ), "environments must define a textual mission string"
-
         # Observations are dictionaries containing:
         # - an image (partially observable view of the environment)
         # - the agent's direction/orientation (acting as a compass)
         # - a textual mission string (instructions for the agent)
-        obs = {"image": image, "direction": self.agent_dir, "mission": self.mission}
+        obs = {"image": image, "direction": self.agent_dir,
+               "mission": self.mission}
 
         return obs
 
@@ -1293,6 +1315,7 @@ class MiniGridEnv(gym.Env):
         )
 
         if mode == "human":
+            assert self.window is not None
             self.window.set_caption(self.mission)
             self.window.show_img(img)
 

+ 5 - 5
gym_minigrid/rendering.py

@@ -49,8 +49,8 @@ def rotate_fn(fin, cx, cy, theta):
 
 
 def point_in_line(x0, y0, x1, y1, r):
-    p0 = np.array([x0, y0])
-    p1 = np.array([x1, y1])
+    p0 = np.array([x0, y0], dtype=np.float32)
+    p1 = np.array([x1, y1], dtype=np.float32)
     dir = p1 - p0
     dist = np.linalg.norm(dir)
     dir = dir / dist
@@ -94,9 +94,9 @@ def point_in_rect(xmin, xmax, ymin, ymax):
 
 
 def point_in_triangle(a, b, c):
-    a = np.array(a)
-    b = np.array(b)
-    c = np.array(c)
+    a = np.array(a, dtype=np.float32)
+    b = np.array(b, dtype=np.float32)
+    c = np.array(c, dtype=np.float32)
 
     def fn(x, y):
         v0 = c - a

+ 2 - 2
gym_minigrid/roomgrid.py

@@ -201,7 +201,7 @@ class RoomGrid(MiniGridEnv):
             obj = Key(color)
         elif kind == "ball":
             obj = Ball(color)
-        elif kind == "box":
+        else:  # kind == "box"
             obj = Box(color)
 
         return self.place_in_room(i, j, obj)
@@ -233,7 +233,7 @@ class RoomGrid(MiniGridEnv):
         door = Door(color, is_locked=locked)
 
         pos = room.door_pos[door_idx]
-        self.grid.set(*pos, door)
+        self.grid.set(pos[0], pos[1], door)
         door.cur_pos = pos
 
         neighbor = room.neighbors[door_idx]

+ 4 - 8
gym_minigrid/wrappers.py

@@ -26,10 +26,6 @@ class ReseedWrapper(gym.Wrapper):
         self.seed_idx = (self.seed_idx + 1) % len(self.seeds)
         return self.env.reset(seed=seed, **kwargs)
 
-    def step(self, action):
-        obs, reward, done, info = self.env.step(action)
-        return obs, reward, done, info
-
 
 class ActionBonus(gym.Wrapper):
     """
@@ -43,7 +39,7 @@ class ActionBonus(gym.Wrapper):
         self.counts = {}
 
     def step(self, action):
-        obs, reward, done, info = self.env.step(action)
+        obs, reward, terminated, truncated, info = self.env.step(action)
 
         env = self.unwrapped
         tup = (tuple(env.agent_pos), env.agent_dir, action)
@@ -60,7 +56,7 @@ class ActionBonus(gym.Wrapper):
         bonus = 1 / math.sqrt(new_count)
         reward += bonus
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
         return self.env.reset(**kwargs)
@@ -77,7 +73,7 @@ class StateBonus(gym.Wrapper):
         self.counts = {}
 
     def step(self, action):
-        obs, reward, done, info = self.env.step(action)
+        obs, reward, terminated, truncated, info = self.env.step(action)
 
         # Tuple based on which we index the counts
         # We use the position after an update
@@ -96,7 +92,7 @@ class StateBonus(gym.Wrapper):
         bonus = 1 / math.sqrt(new_count)
         reward += bonus
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
         return self.env.reset(**kwargs)

+ 7 - 4
manual_control.py

@@ -27,11 +27,14 @@ def reset():
 
 
 def step(action):
-    obs, reward, done, info = env.step(action)
+    obs, reward, terminated, truncated, info = env.step(action)
     print(f"step={env.step_count}, reward={reward:.2f}")
 
-    if done:
-        print("done!")
+    if terminated:
+        print("terminated!")
+        reset()
+    elif truncated:
+        print("truncated!")
         reset()
     else:
         redraw(obs)
@@ -93,7 +96,7 @@ parser.add_argument(
 
 args = parser.parse_args()
 
-env = gym.make(args.env, render_mode="rgb_array")
+env = gym.make(args.env, render_mode="rgb_array", new_step_api=True)
 
 if args.agent_view:
     env = RGBImgPartialObsWrapper(env)

+ 29 - 26
run_tests.py

@@ -6,7 +6,7 @@ import gym
 import numpy as np
 from gym import spaces
 
-from gym_minigrid.envs.empty import EmptyEnv5x5
+from gym_minigrid.envs.empty import EmptyEnv
 from gym_minigrid.minigrid import Grid
 from gym_minigrid.register import env_list
 from gym_minigrid.wrappers import (
@@ -30,7 +30,7 @@ for env_idx, env_name in enumerate(env_list):
     print(f"testing {env_name} ({env_idx + 1}/{len(env_list)})")
 
     # Load the gym environment
-    env = gym.make(env_name, render_mode="rgb_array")
+    env = gym.make(env_name, render_mode="rgb_array", new_step_api=True)
     env.max_steps = min(env.max_steps, 200)
     env.reset()
     env.render()
@@ -52,7 +52,7 @@ for env_idx, env_name in enumerate(env_list):
         # Pick a random action
         action = random.randint(0, env.action_space.n - 1)
 
-        obs, reward, done, info = env.step(action)
+        obs, reward, terminated, truncated, info = env.step(action)
 
         # Validate the agent position
         assert env.agent_pos[0] < env.width
@@ -71,7 +71,7 @@ for env_idx, env_name in enumerate(env_list):
         assert reward >= env.reward_range[0], reward
         assert reward <= env.reward_range[1], reward
 
-        if done:
+        if terminated or truncated:
             num_episodes += 1
             env.reset()
 
@@ -80,62 +80,64 @@ for env_idx, env_name in enumerate(env_list):
     # Test the close method
     env.close()
 
-    env = gym.make(env_name)
+    env = gym.make(env_name, new_step_api=True)
     env = ReseedWrapper(env)
     for _ in range(10):
         env.reset()
         env.step(0)
         env.close()
 
-    env = gym.make(env_name)
+    env = gym.make(env_name, new_step_api=True)
     env = ImgObsWrapper(env)
     env.reset()
     env.step(0)
     env.close()
 
     # Test the fully observable wrapper
-    env = gym.make(env_name)
+    env = gym.make(env_name, new_step_api=True)
     env = FullyObsWrapper(env)
     env.reset()
-    obs, _, _, _ = env.step(0)
+    obs, _, _, _, _ = env.step(0)
     assert obs["image"].shape == env.observation_space.spaces["image"].shape
     env.close()
 
     # RGB image observation wrapper
-    env = gym.make(env_name)
+    env = gym.make(env_name, new_step_api=True)
     env = RGBImgPartialObsWrapper(env)
     env.reset()
-    obs, _, _, _ = env.step(0)
+    obs, _, _, _, _ = env.step(0)
     assert obs["image"].mean() > 0
     env.close()
 
-    env = gym.make(env_name)
+    env = gym.make(env_name, new_step_api=True)
     env = FlatObsWrapper(env)
     env.reset()
     env.step(0)
     env.close()
 
-    env = gym.make(env_name)
+    env = gym.make(env_name, new_step_api=True)
     env = ViewSizeWrapper(env, 5)
     env.reset()
     env.step(0)
     env.close()
 
     # Test the DictObservationSpaceWrapper
-    env = gym.make(env_name)
+    env = gym.make(env_name, new_step_api=True)
     env = DictObservationSpaceWrapper(env)
     env.reset()
     mission = env.mission
-    obs, _, _, _ = env.step(0)
+    obs, _, _, _, _ = env.step(0)
     assert env.string_to_indices(mission) == [
         value for value in obs["mission"] if value != 0
     ]
     env.close()
 
     # Test the wrappers return proper observation spaces.
-    wrappers = [RGBImgObsWrapper, RGBImgPartialObsWrapper, OneHotPartialObsWrapper]
+    wrappers = [RGBImgObsWrapper,
+                RGBImgPartialObsWrapper, OneHotPartialObsWrapper]
     for wrapper in wrappers:
-        env = wrapper(gym.make(env_name, render_mode="rgb_array"))
+        env = wrapper(
+            gym.make(env_name, render_mode="rgb_array", new_step_api=True))
         obs_space, wrapper_name = env.observation_space, wrapper.__name__
         assert isinstance(
             obs_space, spaces.Dict
@@ -151,13 +153,13 @@ for env_idx, env_name in enumerate(env_list):
 print("testing extra observations")
 
 
-class EmptyEnvWithExtraObs(EmptyEnv5x5):
+class EmptyEnvWithExtraObs(EmptyEnv):
     """
     Custom environment with an extra observation
     """
 
     def __init__(self, **kwargs) -> None:
-        super().__init__(**kwargs)
+        super().__init__(size=5, **kwargs)
         self.observation_space["size"] = spaces.Box(
             low=0,
             high=1000,  # gym does not like np.iinfo(np.uint).max,
@@ -171,9 +173,9 @@ class EmptyEnvWithExtraObs(EmptyEnv5x5):
         return obs
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
         obs["size"] = np.array([self.width, self.height], dtype=np.uint)
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 wrappers = [
@@ -184,7 +186,8 @@ wrappers = [
 ]
 for wrapper in wrappers:
     env1 = wrapper(EmptyEnvWithExtraObs(render_mode="rgb_array"))
-    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0", render_mode="rgb_array"))
+    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0",
+                   render_mode="rgb_array", new_step_api=True))
 
     obs1 = env1.reset(seed=0)
     obs2 = env2.reset(seed=0)
@@ -194,8 +197,8 @@ for wrapper in wrappers:
     for key in obs2:
         assert np.array_equal(obs1[key], obs2[key])
 
-    obs1, reward1, done1, _ = env1.step(0)
-    obs2, reward2, done2, _ = env2.step(0)
+    obs1, reward1, terminated1, truncated1, _ = env1.step(0)
+    obs2, reward2, terminated2, truncated2, _ = env2.step(0)
     assert "size" in obs1
     assert obs1["size"].shape == (2,)
     assert (obs1["size"] == [5, 5]).all()
@@ -205,7 +208,7 @@ for wrapper in wrappers:
 ##############################################################################
 
 print("testing agent_sees method")
-env = gym.make("MiniGrid-DoorKey-6x6-v0")
+env = gym.make("MiniGrid-DoorKey-6x6-v0", new_step_api=True)
 goal_pos = (env.grid.width - 2, env.grid.height - 2)
 
 # Test the "in" operator on grid objects
@@ -216,12 +219,12 @@ assert ("blue", "key") not in env.grid
 env.reset()
 for i in range(0, 500):
     action = random.randint(0, env.action_space.n - 1)
-    obs, reward, done, info = env.step(action)
+    obs, reward, terminated, truncated, info = env.step(action)
 
     grid, _ = Grid.decode(obs["image"])
     goal_visible = ("green", "goal") in grid
 
     agent_sees_goal = env.agent_sees(*goal_pos)
     assert agent_sees_goal == goal_visible
-    if done:
+    if terminated or truncated:
         env.reset()

+ 2 - 2
test_interactive_mode.py

@@ -6,7 +6,7 @@ import time
 import gym
 
 # Load the gym environment
-env = gym.make("MiniGrid-Empty-8x8-v0")
+env = gym.make("MiniGrid-Empty-8x8-v0", new_step_api=True)
 env.reset()
 
 for i in range(0, 100):
@@ -15,7 +15,7 @@ for i in range(0, 100):
     # Pick a random action
     action = random.randint(0, env.action_space.n - 1)
 
-    obs, reward, done, info = env.step(action)
+    obs, reward, terminated, truncated, info = env.step(action)
 
     env.render()