Przeglądaj źródła

Merge pull request #204 from saleml/newstep

use new step API, and solve some pylance errors
Mark Towers 2 lat temu
rodzic
commit
59dba0e1f5

+ 1 - 1
README.md

@@ -137,7 +137,7 @@ use the `RGBImgPartialObsWrapper`. You can use it as follows:
 import gym
 from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
 
-env = gym.make('MiniGrid-Empty-8x8-v0')
+env = gym.make('MiniGrid-Empty-8x8-v0', new_step_api=True)
 env = RGBImgPartialObsWrapper(env) # Get pixel observations
 env = ImgObsWrapper(env) # Get rid of the 'mission' field
 obs = env.reset() # This now produces an RGB tensor only

+ 3 - 3
gym_minigrid/benchmark.py

@@ -18,7 +18,7 @@ parser.add_argument("--num_resets", default=200)
 parser.add_argument("--num_frames", default=5000)
 args = parser.parse_args()
 
-env = gym.make(args.env_name)
+env = gym.make(args.env_name, new_step_api=True)
 
 # Benchmark env.reset
 t0 = time.time()
@@ -37,7 +37,7 @@ dt = t1 - t0
 frames_per_sec = args.num_frames / dt
 
 # Create an environment with an RGB agent observation
-env = gym.make(args.env_name)
+env = gym.make(args.env_name, new_step_api=True)
 env = RGBImgPartialObsWrapper(env)
 env = ImgObsWrapper(env)
 
@@ -45,7 +45,7 @@ env.reset()
 # Benchmark rendering
 t0 = time.time()
 for i in range(args.num_frames):
-    obs, reward, done, info = env.step(0)
+    obs, reward, terminated, truncated, info = env.step(0)
 t1 = time.time()
 dt = t1 - t0
 agent_view_fps = args.num_frames / dt

+ 3 - 3
gym_minigrid/envs/blockedunlockpickup.py

@@ -42,11 +42,11 @@ class BlockedUnlockPickupEnv(RoomGrid):
         self.mission = f"pick up the {obj.color} {obj.type}"
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.pickup:
             if self.carrying and self.carrying == self.obj:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 7 - 7
gym_minigrid/envs/dynamicobstacles.py

@@ -1,6 +1,6 @@
 from operator import add
 
-import gym
+from gym.spaces import Discrete
 
 from gym_minigrid.minigrid import Ball, Goal, Grid, MiniGridEnv, MissionSpace
 
@@ -35,7 +35,7 @@ class DynamicObstaclesEnv(MiniGridEnv):
             **kwargs
         )
         # Allow only 3 actions permitted: left, right, forward
-        self.action_space = gym.spaces.Discrete(self.actions.forward + 1)
+        self.action_space = Discrete(self.actions.forward + 1)
         self.reward_range = (-1, 1)
 
     def _gen_grid(self, width, height):
@@ -81,17 +81,17 @@ class DynamicObstaclesEnv(MiniGridEnv):
                 self.place_obj(
                     self.obstacles[i_obst], top=top, size=(3, 3), max_tries=100
                 )
-                self.grid.set(*old_pos, None)
+                self.grid.set(old_pos[0], old_pos[1], None)
             except Exception:
                 pass
 
         # Update the agent's position/direction
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         # If the agent tried to walk over an obstacle or wall
         if action == self.actions.forward and not_clear:
             reward = -1
-            done = True
-            return obs, reward, done, info
+            terminated = True
+            return obs, reward, terminated, truncated, info
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 4 - 4
gym_minigrid/envs/fetch.py

@@ -95,7 +95,7 @@ class FetchEnv(MiniGridEnv):
         assert hasattr(self, "mission")
 
     def step(self, action):
-        obs, reward, done, info = MiniGridEnv.step(self, action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if self.carrying:
             if (
@@ -103,9 +103,9 @@ class FetchEnv(MiniGridEnv):
                 and self.carrying.type == self.targetType
             ):
                 reward = self._reward()
-                done = True
+                terminated = True
             else:
                 reward = 0
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 2 - 5
gym_minigrid/envs/fourrooms.py

@@ -61,7 +61,8 @@ class FourRoomsEnv(MiniGridEnv):
         if self._agent_default_pos is not None:
             self.agent_pos = self._agent_default_pos
             self.grid.set(*self._agent_default_pos, None)
-            self.agent_dir = self._rand_int(0, 4)  # assuming random start direction
+            # assuming random start direction
+            self.agent_dir = self._rand_int(0, 4)
         else:
             self.place_agent()
 
@@ -71,7 +72,3 @@ class FourRoomsEnv(MiniGridEnv):
             goal.init_pos, goal.cur_pos = self._goal_default_pos
         else:
             self.place_obj(Goal())
-
-    def step(self, action):
-        obs, reward, done, info = MiniGridEnv.step(self, action)
-        return obs, reward, done, info

+ 4 - 4
gym_minigrid/envs/gotodoor.py

@@ -67,19 +67,19 @@ class GoToDoorEnv(MiniGridEnv):
         self.mission = "go to the %s door" % self.target_color
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         ax, ay = self.agent_pos
         tx, ty = self.target_pos
 
         # Don't let the agent open any of the doors
         if action == self.actions.toggle:
-            done = True
+            terminated = True
 
         # Reward performing done action in front of the target door
         if action == self.actions.done:
             if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
                 reward = self._reward()
-            done = True
+            terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 4 - 4
gym_minigrid/envs/gotoobject.py

@@ -86,19 +86,19 @@ class GoToObjectEnv(MiniGridEnv):
         # print(self.mission)
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         ax, ay = self.agent_pos
         tx, ty = self.target_pos
 
         # Toggle/pickup action terminates the episode
         if action == self.actions.toggle:
-            done = True
+            terminated = True
 
         # Reward performing the done action next to the target object
         if action == self.actions.done:
             if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
                 reward = self._reward()
-            done = True
+            terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 3 - 3
gym_minigrid/envs/keycorridor.py

@@ -48,11 +48,11 @@ class KeyCorridorEnv(RoomGrid):
         self.mission = f"pick up the {obj.color} {obj.type}"
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.pickup:
             if self.carrying and self.carrying == self.obj:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 0 - 4
gym_minigrid/envs/lockedroom.py

@@ -116,7 +116,3 @@ class LockedRoomEnv(MiniGridEnv):
             "unlock the %s door and "
             "go to the goal"
         ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)
-
-    def step(self, action):
-        obs, reward, done, info = MiniGridEnv.step(self, action)
-        return obs, reward, done, info

+ 6 - 6
gym_minigrid/envs/memory.py

@@ -89,15 +89,15 @@ class MemoryEnv(MiniGridEnv):
         self.mission = "go to the matching object at the end of the hallway"
 
     def step(self, action):
-        if action == MiniGridEnv.Actions.pickup:
-            action = MiniGridEnv.Actions.toggle
-        obs, reward, done, info = MiniGridEnv.step(self, action)
+        if action == self.Actions.pickup:
+            action = self.Actions.toggle
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if tuple(self.agent_pos) == self.success_pos:
             reward = self._reward()
-            done = True
+            terminated = True
         if tuple(self.agent_pos) == self.failure_pos:
             reward = 0
-            done = True
+            terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 1 - 1
gym_minigrid/envs/multiroom.py

@@ -107,7 +107,7 @@ class MultiRoomEnv(MiniGridEnv):
                 doorColor = self._rand_elem(sorted(doorColors))
 
                 entryDoor = Door(doorColor)
-                self.grid.set(*room.entryDoorPos, entryDoor)
+                self.grid.set(room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)
                 prevDoorColor = doorColor
 
                 prevRoom = roomList[idx - 1]

+ 7 - 4
gym_minigrid/envs/obstructedmaze.py

@@ -23,6 +23,7 @@ class ObstructedMazeEnv(RoomGrid):
             max_steps=max_steps,
             **kwargs,
         )
+        self.obj = Ball()  # initialize the obj attribute, that will be changed later on
 
     def _gen_grid(self, width, height):
         super()._gen_grid(width, height)
@@ -39,14 +40,14 @@ class ObstructedMazeEnv(RoomGrid):
         self.mission = "pick up the %s ball" % self.ball_to_find_color
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.pickup:
             if self.carrying and self.carrying == self.obj:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def add_door(
         self,
@@ -165,7 +166,9 @@ class ObstructedMaze_Full(ObstructedMazeEnv):
         corners = [(2, 0), (2, 2), (0, 2), (0, 0)][: self.num_quarters]
         ball_room = self._rand_elem(corners)
 
-        self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color)
+        self.obj, _ = self.add_object(
+            ball_room[0], ball_room[1], "ball", color=self.ball_to_find_color
+        )
         self.place_agent(*self.agent_room)
 
 

+ 0 - 4
gym_minigrid/envs/playground.py

@@ -88,7 +88,3 @@ class PlaygroundEnv(MiniGridEnv):
 
         # No explicit mission in this environment
         self.mission = ""
-
-    def step(self, action):
-        obs, reward, done, info = super().step(action)
-        return obs, reward, done, info

+ 4 - 4
gym_minigrid/envs/putnear.py

@@ -113,7 +113,7 @@ class PutNearEnv(MiniGridEnv):
     def step(self, action):
         preCarrying = self.carrying
 
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         u, v = self.dir_vec
         ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v)
@@ -125,13 +125,13 @@ class PutNearEnv(MiniGridEnv):
                 self.carrying.type != self.move_type
                 or self.carrying.color != self.moveColor
             ):
-                done = True
+                terminated = True
 
         # If successfully dropping an object near the target
         if action == self.actions.drop and preCarrying:
             if self.grid.get(ox, oy) is preCarrying:
                 if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
                     reward = self._reward()
-            done = True
+            terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 5 - 5
gym_minigrid/envs/redbluedoors.py

@@ -49,7 +49,7 @@ class RedBlueDoorEnv(MiniGridEnv):
         red_door_opened_before = self.red_door.is_open
         blue_door_opened_before = self.blue_door.is_open
 
-        obs, reward, done, info = MiniGridEnv.step(self, action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         red_door_opened_after = self.red_door.is_open
         blue_door_opened_after = self.blue_door.is_open
@@ -57,14 +57,14 @@ class RedBlueDoorEnv(MiniGridEnv):
         if blue_door_opened_after:
             if red_door_opened_before:
                 reward = self._reward()
-                done = True
+                terminated = True
             else:
                 reward = 0
-                done = True
+                terminated = True
 
         elif red_door_opened_after:
             if blue_door_opened_before:
                 reward = 0
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 3 - 3
gym_minigrid/envs/unlock.py

@@ -33,11 +33,11 @@ class UnlockEnv(RoomGrid):
         self.mission = "open the door"
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.toggle:
             if self.door.is_open:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 3 - 3
gym_minigrid/envs/unlockpickup.py

@@ -38,11 +38,11 @@ class UnlockPickupEnv(RoomGrid):
         self.mission = f"pick up the {obj.color} {obj.type}"
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
 
         if action == self.actions.pickup:
             if self.carrying and self.carrying == self.obj:
                 reward = self._reward()
-                done = True
+                terminated = True
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info

+ 7 - 4
gym_minigrid/manual_control.py

@@ -25,11 +25,14 @@ def reset():
 
 
 def step(action):
-    obs, reward, done, info = env.step(action)
+    obs, reward, terminated, truncated, info = env.step(action)
     print(f"step={env.step_count}, reward={reward:.2f}")
 
-    if done:
-        print("done!")
+    if terminated:
+        print("terminated!")
+        reset()
+    elif truncated:
+        print("truncated!")
         reset()
     else:
         redraw(obs)
@@ -92,7 +95,7 @@ parser.add_argument(
 args = parser.parse_args()
 
 seed = None if args.seed == -1 else args.seed
-env = gym.make(args.env, seed=seed)
+env = gym.make(args.env, seed=seed, new_step_api=True)
 
 if args.agent_view:
     env = RGBImgPartialObsWrapper(env)

+ 33 - 22
gym_minigrid/minigrid.py

@@ -534,7 +534,7 @@ class Box(WorldObj):
 
     def toggle(self, env, pos):
         # Replace the box by its contents
-        env.grid.set(*pos, self.contains)
+        env.grid.set(pos[0], pos[1], self.contains)
         return True
 
 
@@ -696,7 +696,7 @@ class Grid:
 
         return img
 
-    def render(self, tile_size, agent_pos=None, agent_dir=None, highlight_mask=None):
+    def render(self, tile_size, agent_pos, agent_dir=None, highlight_mask=None):
         """
         Render this grid at a given scale
         :param r: target renderer object
@@ -914,21 +914,29 @@ class MiniGridEnv(gym.Env):
         self.agent_pos: np.ndarray = None
         self.agent_dir: int = None
 
+        # Current grid and mission and carryinh
+        self.grid = Grid(width, height)
+        self.carrying = None
+
         # Initialize the state
         self.reset()
 
     def reset(self, *, seed=None, return_info=False, options=None):
         super().reset(seed=seed)
-        # Current position and direction of the agent
-        self.agent_pos = None
-        self.agent_dir = None
+
+        # Reinitialize episode-specific variables
+        self.agent_pos = (-1, -1)
+        self.agent_dir = -1
 
         # Generate a new random grid at the start of each episode
         self._gen_grid(self.width, self.height)
 
         # These fields should be defined by _gen_grid
-        assert self.agent_pos is not None
-        assert self.agent_dir is not None
+        assert (
+            self.agent_pos >= (0, 0)
+            if isinstance(self.agent_pos, tuple)
+            else all(self.agent_pos >= 0) and self.agent_dir >= 0
+        )
 
         # Check that the agent doesn't overlap with an object
         start_cell = self.grid.get(*self.agent_pos)
@@ -1126,6 +1134,8 @@ class MiniGridEnv(gym.Env):
                 )
             )
 
+            pos = tuple(pos)
+
             # Don't place the object on top of another object
             if self.grid.get(*pos) is not None:
                 continue
@@ -1140,7 +1150,7 @@ class MiniGridEnv(gym.Env):
 
             break
 
-        self.grid.set(*pos, obj)
+        self.grid.set(pos[0], pos[1], obj)
 
         if obj is not None:
             obj.init_pos = pos
@@ -1162,7 +1172,7 @@ class MiniGridEnv(gym.Env):
         Set the agent's starting point at an empty position in the grid
         """
 
-        self.agent_pos = None
+        self.agent_pos = (-1, -1)
         pos = self.place_obj(None, top, size, max_tries=max_tries)
         self.agent_pos = pos
 
@@ -1292,13 +1302,16 @@ class MiniGridEnv(gym.Env):
         obs_cell = obs_grid.get(vx, vy)
         world_cell = self.grid.get(x, y)
 
+        assert world_cell is not None
+
         return obs_cell is not None and obs_cell.type == world_cell.type
 
     def step(self, action):
         self.step_count += 1
 
         reward = 0
-        done = False
+        terminated = False
+        truncated = False
 
         # Get the position in front of the agent
         fwd_pos = self.front_pos
@@ -1319,24 +1332,25 @@ class MiniGridEnv(gym.Env):
         # Move forward
         elif action == self.actions.forward:
             if fwd_cell is None or fwd_cell.can_overlap():
-                self.agent_pos = fwd_pos
+                self.agent_pos = tuple(fwd_pos)
             if fwd_cell is not None and fwd_cell.type == "goal":
-                done = True
+                terminated = True
                 reward = self._reward()
             if fwd_cell is not None and fwd_cell.type == "lava":
-                done = True
+                terminated = True
+
         # Pick up an object
         elif action == self.actions.pickup:
             if fwd_cell and fwd_cell.can_pickup():
                 if self.carrying is None:
                     self.carrying = fwd_cell
                     self.carrying.cur_pos = np.array([-1, -1])
-                    self.grid.set(*fwd_pos, None)
+                    self.grid.set(fwd_pos[0], fwd_pos[1], None)
 
         # Drop an object
         elif action == self.actions.drop:
             if not fwd_cell and self.carrying:
-                self.grid.set(*fwd_pos, self.carrying)
+                self.grid.set(fwd_pos[0], fwd_pos[1], self.carrying)
                 self.carrying.cur_pos = fwd_pos
                 self.carrying = None
 
@@ -1350,14 +1364,14 @@ class MiniGridEnv(gym.Env):
             pass
 
         else:
-            assert False, "unknown action"
+            raise ValueError(f"Unknown action: {action}")
 
         if self.step_count >= self.max_steps:
-            done = True
+            truncated = True
 
         obs = self.gen_obs()
 
-        return obs, reward, done, {}
+        return obs, reward, terminated, truncated, {}
 
     def gen_obs_grid(self, agent_view_size=None):
         """
@@ -1406,10 +1420,6 @@ class MiniGridEnv(gym.Env):
         # Encode the partially observable view into a numpy array
         image = grid.encode(vis_mask)
 
-        assert hasattr(
-            self, "mission"
-        ), "environments must define a textual mission string"
-
         # Observations are dictionaries containing:
         # - an image (partially observable view of the environment)
         # - the agent's direction/orientation (acting as a compass)
@@ -1487,6 +1497,7 @@ class MiniGridEnv(gym.Env):
         )
 
         if mode == "human":
+            assert self.window is not None
             self.window.set_caption(self.mission)
             self.window.show_img(img)
         else:

+ 5 - 5
gym_minigrid/rendering.py

@@ -49,8 +49,8 @@ def rotate_fn(fin, cx, cy, theta):
 
 
 def point_in_line(x0, y0, x1, y1, r):
-    p0 = np.array([x0, y0])
-    p1 = np.array([x1, y1])
+    p0 = np.array([x0, y0], dtype=np.float32)
+    p1 = np.array([x1, y1], dtype=np.float32)
     dir = p1 - p0
     dist = np.linalg.norm(dir)
     dir = dir / dist
@@ -94,9 +94,9 @@ def point_in_rect(xmin, xmax, ymin, ymax):
 
 
 def point_in_triangle(a, b, c):
-    a = np.array(a)
-    b = np.array(b)
-    c = np.array(c)
+    a = np.array(a, dtype=np.float32)
+    b = np.array(b, dtype=np.float32)
+    c = np.array(c, dtype=np.float32)
 
     def fn(x, y):
         v0 = c - a

+ 1 - 1
gym_minigrid/roomgrid.py

@@ -241,7 +241,7 @@ class RoomGrid(MiniGridEnv):
         door = Door(color, is_locked=locked)
 
         pos = room.door_pos[door_idx]
-        self.grid.set(*pos, door)
+        self.grid.set(pos[0], pos[1], door)
         door.cur_pos = pos
 
         neighbor = room.neighbors[door_idx]

+ 6 - 7
gym_minigrid/wrappers.py

@@ -28,11 +28,10 @@ class ReseedWrapper(Wrapper):
         return self.env.reset(seed=seed, **kwargs)
 
     def step(self, action):
-        obs, reward, done, info = self.env.step(action)
-        return obs, reward, done, info
+        return self.env.step(action)
 
 
-class ActionBonus(Wrapper):
+class ActionBonus(gym.Wrapper):
     """
     Wrapper which adds an exploration bonus.
     This is a reward to encourage exploration of less
@@ -44,7 +43,7 @@ class ActionBonus(Wrapper):
         self.counts = {}
 
     def step(self, action):
-        obs, reward, done, info = self.env.step(action)
+        obs, reward, terminated, truncated, info = self.env.step(action)
 
         env = self.unwrapped
         tup = (tuple(env.agent_pos), env.agent_dir, action)
@@ -61,7 +60,7 @@ class ActionBonus(Wrapper):
         bonus = 1 / math.sqrt(new_count)
         reward += bonus
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
         return self.env.reset(**kwargs)
@@ -78,7 +77,7 @@ class StateBonus(Wrapper):
         self.counts = {}
 
     def step(self, action):
-        obs, reward, done, info = self.env.step(action)
+        obs, reward, terminated, truncated, info = self.env.step(action)
 
         # Tuple based on which we index the counts
         # We use the position after an update
@@ -97,7 +96,7 @@ class StateBonus(Wrapper):
         bonus = 1 / math.sqrt(new_count)
         reward += bonus
 
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
         return self.env.reset(**kwargs)

+ 25 - 18
tests/test_envs.py

@@ -17,7 +17,7 @@ CHECK_ENV_IGNORE_WARNINGS = [
         "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.",
         "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.",
         "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.",
-        "Core environment is written in old step API which returns one bool instead of two. It is recommended to rewrite the environment with new step API. ",
+        "Core environment is written in old step API which returns one bool instead of two. It is recommended to  norewrite the environment with new step API. ",
     ]
 ]
 
@@ -55,14 +55,14 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
     - observation after first reset are the same
     - same actions are sampled by the two envs
     - observations are contained in the observation space
-    - obs, rew, done and info are equals between the two envs
+    - obs, rew, terminated, truncated and info are equals between the two envs
     """
     # Don't check rollout equality if it's a nondeterministic environment.
     if env_spec.nondeterministic is True:
         return
 
-    env_1 = env_spec.make(disable_env_checker=True)
-    env_2 = env_spec.make(disable_env_checker=True)
+    env_1 = env_spec.make(disable_env_checker=True, new_step_api=True)
+    env_2 = env_spec.make(disable_env_checker=True, new_step_api=True)
 
     initial_obs_1 = env_1.reset(seed=SEED)
     initial_obs_2 = env_2.reset(seed=SEED)
@@ -74,8 +74,8 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
         # We don't evaluate the determinism of actions
         action = env_1.action_space.sample()
 
-        obs_1, rew_1, done_1, info_1 = env_1.step(action)
-        obs_2, rew_2, done_2, info_2 = env_2.step(action)
+        obs_1, rew_1, terminated_1, truncated_1, info_1 = env_1.step(action)
+        obs_2, rew_2, terminated_2, truncated_2, info_2 = env_2.step(action)
 
         assert_equals(obs_1, obs_2, f"[{time_step}] ")
         assert env_1.observation_space.contains(
@@ -83,10 +83,17 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
         )  # obs_2 verified by previous assertion
 
         assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
-        assert done_1 == done_2, f"[{time_step}] done 1={done_1}, done 2={done_2}"
+        assert (
+            terminated_1 == terminated_2
+        ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}"
+        assert (
+            truncated_1 == truncated_2
+        ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}"
         assert_equals(info_1, info_2, f"[{time_step}] ")
 
-        if done_1:  # done_2 verified by previous assertion
+        if (
+            terminated_1 or truncated_1
+        ):  # terminated_2 and truncated_2 verified by previous assertion
             env_1.reset(seed=SEED)
             env_2.reset(seed=SEED)
 
@@ -98,11 +105,11 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
     "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 def test_render_modes(spec):
-    env = spec.make()
+    env = spec.make(new_step_api=True)
 
     for mode in env.metadata.get("render_modes", []):
         if mode != "human":
-            new_env = spec.make()
+            new_env = spec.make(new_step_api=True)
 
             new_env.reset()
             new_env.step(new_env.action_space.sample())
@@ -111,7 +118,7 @@ def test_render_modes(spec):
 
 @pytest.mark.parametrize("env_id", ["MiniGrid-DoorKey-6x6-v0"])
 def test_agent_sees_method(env_id):
-    env = gym.make(env_id)
+    env = gym.make(env_id, new_step_api=True)
     goal_pos = (env.grid.width - 2, env.grid.height - 2)
 
     # Test the "in" operator on grid objects
@@ -122,14 +129,14 @@ def test_agent_sees_method(env_id):
     env.reset()
     for i in range(0, 500):
         action = env.action_space.sample()
-        obs, reward, done, info = env.step(action)
+        obs, reward, terminated, truncated, info = env.step(action)
 
         grid, _ = Grid.decode(obs["image"])
         goal_visible = ("green", "goal") in grid
 
         agent_sees_goal = env.agent_sees(*goal_pos)
         assert agent_sees_goal == goal_visible
-        if done:
+        if terminated or truncated:
             env.reset()
 
     env.close()
@@ -140,7 +147,7 @@ def test_agent_sees_method(env_id):
 )
 def old_run_test(env_spec):
     # Load the gym environment
-    env = env_spec.make()
+    env = env_spec.make(new_step_api=True)
     env.max_steps = min(env.max_steps, 200)
     env.reset()
     env.render()
@@ -162,7 +169,7 @@ def old_run_test(env_spec):
         # Pick a random action
         action = env.action_space.sample()
 
-        obs, reward, done, info = env.step(action)
+        obs, reward, terminated, truncated, info = env.step(action)
 
         # Validate the agent position
         assert env.agent_pos[0] < env.width
@@ -181,7 +188,7 @@ def old_run_test(env_spec):
         assert reward >= env.reward_range[0], reward
         assert reward <= env.reward_range[1], reward
 
-        if done:
+        if terminated or truncated:
             num_episodes += 1
             env.reset()
 
@@ -193,7 +200,7 @@ def old_run_test(env_spec):
 
 @pytest.mark.parametrize("env_id", ["MiniGrid-Empty-8x8-v0"])
 def test_interactive_mode(env_id):
-    env = gym.make(env_id)
+    env = gym.make(env_id, new_step_api=True)
     env.reset()
 
     for i in range(0, 100):
@@ -202,7 +209,7 @@ def test_interactive_mode(env_id):
         # Pick a random action
         action = env.action_space.sample()
 
-        obs, reward, done, info = env.step(action)
+        obs, reward, terminated, truncated, info = env.step(action)
 
     # Test the close method
     env.close()

+ 28 - 24
tests/test_wrappers.py

@@ -32,8 +32,8 @@ def test_reseed_wrapper(env_spec):
     """
     Test the ReseedWrapper with a list of SEEDS.
     """
-    unwrapped_env = env_spec.make()
-    env = env_spec.make()
+    unwrapped_env = env_spec.make(new_step_api=True)
+    env = env_spec.make(new_step_api=True)
     env = ReseedWrapper(env, seeds=SEEDS)
     env.action_space.seed(0)
 
@@ -43,11 +43,12 @@ def test_reseed_wrapper(env_spec):
         for time_step in range(NUM_STEPS):
             action = env.action_space.sample()
 
-            obs, rew, done, info = env.step(action)
+            obs, rew, terminated, truncated, info = env.step(action)
             (
                 unwrapped_obs,
                 unwrapped_rew,
-                unwrapped_done,
+                unwrapped_terminated,
+                unwrapped_truncated,
                 unwrapped_info,
             ) = unwrapped_env.step(action)
 
@@ -58,12 +59,15 @@ def test_reseed_wrapper(env_spec):
                 rew == unwrapped_rew
             ), f"[{time_step}] reward={rew}, unwrapped reward={unwrapped_rew}"
             assert (
-                done == unwrapped_done
-            ), f"[{time_step}] done={done}, unwrapped done={unwrapped_done}"
+                terminated == unwrapped_terminated
+            ), f"[{time_step}] terminated={terminated}, unwrapped terminated={unwrapped_terminated}"
+            assert (
+                truncated == unwrapped_truncated
+            ), f"[{time_step}] truncated={truncated}, unwrapped truncated={unwrapped_truncated}"
             assert_equals(info, unwrapped_info, f"[{time_step}] ")
 
             # Start the next seed
-            if done:
+            if terminated or truncated:
                 break
 
     env.close()
@@ -72,8 +76,8 @@ def test_reseed_wrapper(env_spec):
 
 @pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
 def test_state_bonus_wrapper(env_id):
-    env = gym.make(env_id)
-    wrapped_env = StateBonus(gym.make(env_id))
+    env = gym.make(env_id, new_step_api=True)
+    wrapped_env = StateBonus(gym.make(env_id, new_step_api=True))
 
     action_forward = MiniGridEnv.Actions.forward
     action_left = MiniGridEnv.Actions.left
@@ -86,14 +90,14 @@ def test_state_bonus_wrapper(env_id):
 
     # Turn lef 3 times (check that actions don't influence bonus)
     for _ in range(3):
-        _, wrapped_rew, _, _ = wrapped_env.step(action_left)
+        _, wrapped_rew, _, _, _ = wrapped_env.step(action_left)
 
     env.reset()
     for _ in range(5):
         env.step(action_forward)
     # Turn right 3 times
     for _ in range(3):
-        _, rew, _, _ = env.step(action_right)
+        _, rew, _, _, _ = env.step(action_right)
 
     expected_bonus_reward = rew + 1 / math.sqrt(13)
 
@@ -102,19 +106,19 @@ def test_state_bonus_wrapper(env_id):
 
 @pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
 def test_action_bonus_wrapper(env_id):
-    env = gym.make(env_id)
-    wrapped_env = ActionBonus(gym.make(env_id))
+    env = gym.make(env_id, new_step_api=True)
+    wrapped_env = ActionBonus(gym.make(env_id, new_step_api=True))
 
     action = MiniGridEnv.Actions.forward
 
     for _ in range(10):
         wrapped_env.reset()
         for _ in range(5):
-            _, wrapped_rew, _, _ = wrapped_env.step(action)
+            _, wrapped_rew, _, _, _ = wrapped_env.step(action)
 
     env.reset()
     for _ in range(5):
-        _, rew, _, _ = env.step(action)
+        _, rew, _, _, _ = env.step(action)
 
     expected_bonus_reward = rew + 1 / math.sqrt(10)
 
@@ -125,11 +129,11 @@ def test_action_bonus_wrapper(env_id):
     "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 def test_dict_observation_space_wrapper(env_spec):
-    env = env_spec.make()
+    env = env_spec.make(new_step_api=True)
     env = DictObservationSpaceWrapper(env)
     env.reset()
     mission = env.mission
-    obs, _, _, _ = env.step(0)
+    obs, _, _, _, _ = env.step(0)
     assert env.string_to_indices(mission) == [
         value for value in obs["mission"] if value != 0
     ]
@@ -153,7 +157,7 @@ def test_dict_observation_space_wrapper(env_spec):
     "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 def test_main_wrappers(wrapper, env_spec):
-    env = env_spec.make()
+    env = env_spec.make(new_step_api=True)
     env = wrapper(env)
     for _ in range(10):
         env.reset()
@@ -173,7 +177,7 @@ def test_main_wrappers(wrapper, env_spec):
     "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 def test_observation_space_wrappers(wrapper, env_spec):
-    env = wrapper(env_spec.make(disable_env_checker=True))
+    env = wrapper(env_spec.make(disable_env_checker=True, new_step_api=True))
     obs_space, wrapper_name = env.observation_space, wrapper.__name__
     assert isinstance(
         obs_space, gym.spaces.Dict
@@ -202,9 +206,9 @@ class EmptyEnvWithExtraObs(EmptyEnv):
         return obs
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, terminated, truncated, info = super().step(action)
         obs["size"] = np.array([self.width, self.height])
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 @pytest.mark.parametrize(
@@ -218,7 +222,7 @@ class EmptyEnvWithExtraObs(EmptyEnv):
 )
 def test_agent_sees_method(wrapper):
     env1 = wrapper(EmptyEnvWithExtraObs())
-    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0"))
+    env2 = wrapper(gym.make("MiniGrid-Empty-5x5-v0", new_step_api=True))
 
     obs1 = env1.reset(seed=0)
     obs2 = env2.reset(seed=0)
@@ -228,8 +232,8 @@ def test_agent_sees_method(wrapper):
     for key in obs2:
         assert np.array_equal(obs1[key], obs2[key])
 
-    obs1, reward1, done1, _ = env1.step(0)
-    obs2, reward2, done2, _ = env2.step(0)
+    obs1, reward1, terminated1, truncated1, _ = env1.step(0)
+    obs2, reward2, terminated2, truncated2, _ = env2.step(0)
     assert "size" in obs1
     assert obs1["size"].shape == (2,)
     assert (obs1["size"] == [5, 5]).all()