Переглянути джерело

Adapt to the new gym API where the seed is given to the reset function of the env (by calling super().reset())

saleml 2 роки тому
батько
коміт
cac13fa8ef

+ 1 - 2
gym_minigrid/envs/blockedunlockpickup.py

@@ -8,14 +8,13 @@ class BlockedUnlockPickup(RoomGrid):
     in another room
     """
 
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self, **kwargs):
         room_size = 6
         super().__init__(
             num_rows=1,
             num_cols=2,
             room_size=room_size,
             max_steps=16*room_size**2,
-            seed=seed,
             **kwargs
         )
 

+ 1 - 2
gym_minigrid/envs/crossing.py

@@ -9,7 +9,7 @@ class CrossingEnv(MiniGridEnv):
     Environment with wall or lava obstacles, sparse reward.
     """
 
-    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None, **kwargs):
+    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, **kwargs):
         self.num_crossings = num_crossings
         self.obstacle_type = obstacle_type
         super().__init__(
@@ -17,7 +17,6 @@ class CrossingEnv(MiniGridEnv):
             max_steps=4*size*size,
             # Set this to True for maximum speed
             see_through_walls=False,
-            seed=None,
             **kwargs
         )
 

+ 6 - 14
gym_minigrid/envs/keycorridor.py

@@ -12,7 +12,6 @@ class KeyCorridor(RoomGrid):
         num_rows=3,
         obj_type="ball",
         room_size=6,
-        seed=None,
         **kwargs
     ):
         self.obj_type = obj_type
@@ -21,7 +20,6 @@ class KeyCorridor(RoomGrid):
             room_size=room_size,
             num_rows=num_rows,
             max_steps=30*room_size**2,
-            seed=seed,
             **kwargs
         )
 
@@ -61,56 +59,50 @@ class KeyCorridor(RoomGrid):
         return obs, reward, done, info
 
 class KeyCorridorS3R1(KeyCorridor):
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self, **kwargs):
         super().__init__(
             room_size=3,
             num_rows=1,
-            seed=seed,
             **kwargs
         )
 
 class KeyCorridorS3R2(KeyCorridor):
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self, **kwargs):
         super().__init__(
             room_size=3,
             num_rows=2,
-            seed=seed,
             **kwargs
         )
 
 class KeyCorridorS3R3(KeyCorridor):
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self, **kwargs):
         super().__init__(
             room_size=3,
             num_rows=3,
-            seed=seed, 
             **kwargs
         )
 
 class KeyCorridorS4R3(KeyCorridor):
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self,  **kwargs):
         super().__init__(
             room_size=4,
             num_rows=3,
-            seed=seed, 
             **kwargs
         )
 
 class KeyCorridorS5R3(KeyCorridor):
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self, **kwargs):
         super().__init__(
             room_size=5,
             num_rows=3,
-            seed=seed, 
             **kwargs
         )
 
 class KeyCorridorS6R3(KeyCorridor):
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self,  **kwargs):
         super().__init__(
             room_size=6,
             num_rows=3,
-            seed=seed, 
             **kwargs
         )
 

+ 1 - 2
gym_minigrid/envs/lavagap.py

@@ -7,14 +7,13 @@ class LavaGapEnv(MiniGridEnv):
     This environment is similar to LavaCrossing but simpler in structure.
     """
 
-    def __init__(self, size, obstacle_type=Lava, seed=None, **kwargs):
+    def __init__(self, size, obstacle_type=Lava, **kwargs):
         self.obstacle_type = obstacle_type
         super().__init__(
             grid_size=size,
             max_steps=4*size*size,
             # Set this to True for maximum speed
             see_through_walls=False,
-            seed=None,
             **kwargs
         )
 

+ 12 - 14
gym_minigrid/envs/memory.py

@@ -13,14 +13,12 @@ class MemoryEnv(MiniGridEnv):
 
     def __init__(
         self,
-        seed,
         size=8,
         random_length=False, 
         **kwargs
     ):
         self.random_length = random_length
         super().__init__(
-            seed=seed,
             grid_size=size,
             max_steps=5*size**2,
             # Set this to True for maximum speed
@@ -102,8 +100,8 @@ class MemoryEnv(MiniGridEnv):
         return obs, reward, done, info
 
 class MemoryS17Random(MemoryEnv):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__(seed=seed, size=17, random_length=True, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(size=17, random_length=True, **kwargs)
 
 register(
     id='MiniGrid-MemoryS17Random-v0',
@@ -111,8 +109,8 @@ register(
 )
 
 class MemoryS13Random(MemoryEnv):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__(seed=seed, size=13, random_length=True, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(size=13, random_length=True, **kwargs)
 
 register(
     id='MiniGrid-MemoryS13Random-v0',
@@ -120,8 +118,8 @@ register(
 )
 
 class MemoryS13(MemoryEnv):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__(seed=seed, size=13, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(size=13, **kwargs)
 
 register(
     id='MiniGrid-MemoryS13-v0',
@@ -129,8 +127,8 @@ register(
 )
 
 class MemoryS11(MemoryEnv):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__(seed=seed, size=11, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(size=11, **kwargs)
 
 register(
     id='MiniGrid-MemoryS11-v0',
@@ -138,8 +136,8 @@ register(
 )
 
 class MemoryS9(MemoryEnv):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__(seed=seed, size=9, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(size=9, **kwargs)
 
 register(
     id='MiniGrid-MemoryS9-v0',
@@ -147,8 +145,8 @@ register(
 )
 
 class MemoryS7(MemoryEnv):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__(seed=seed, size=7, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(size=7, **kwargs)
 
 register(
     id='MiniGrid-MemoryS7-v0',

+ 17 - 21
gym_minigrid/envs/obstructedmaze.py

@@ -12,7 +12,6 @@ class ObstructedMazeEnv(RoomGrid):
         num_rows,
         num_cols,
         num_rooms_visited,
-        seed=None, 
         **kwargs
     ):
         room_size = 6
@@ -22,8 +21,7 @@ class ObstructedMazeEnv(RoomGrid):
             room_size=room_size,
             num_rows=num_rows,
             num_cols=num_cols,
-            max_steps=max_steps,
-            seed=seed, 
+            max_steps=max_steps,        
             **kwargs
         )
 
@@ -81,7 +79,7 @@ class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
     rooms. Doors are obstructed by a ball and keys are hidden in boxes.
     """
 
-    def __init__(self, key_in_box=True, blocked=True, seed=None, **kwargs):
+    def __init__(self, key_in_box=True, blocked=True, **kwargs):
         self.key_in_box = key_in_box
         self.blocked = blocked
 
@@ -89,7 +87,6 @@ class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
             num_rows=1,
             num_cols=2,
             num_rooms_visited=2,
-            seed=seed, 
             **kwargs
         )
 
@@ -105,12 +102,12 @@ class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
         self.place_agent(0, 0)
 
 class ObstructedMaze_1Dl(ObstructedMaze_1Dlhb):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__(False, False, seed, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(False, False, **kwargs)
 
 class ObstructedMaze_1Dlh(ObstructedMaze_1Dlhb):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__(True, False, seed, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(True, False, **kwargs)
 
 class ObstructedMaze_Full(ObstructedMazeEnv):
     """
@@ -120,7 +117,7 @@ class ObstructedMaze_Full(ObstructedMazeEnv):
     """
 
     def __init__(self, agent_room=(1, 1), key_in_box=True, blocked=True,
-                 num_quarters=4, num_rooms_visited=25, seed=None, **kwargs):
+                 num_quarters=4, num_rooms_visited=25, **kwargs):
         self.agent_room = agent_room
         self.key_in_box = key_in_box
         self.blocked = blocked
@@ -130,7 +127,6 @@ class ObstructedMaze_Full(ObstructedMazeEnv):
             num_rows=3,
             num_cols=3,
             num_rooms_visited=num_rooms_visited,
-            seed=seed, 
             **kwargs
         )
 
@@ -162,25 +158,25 @@ class ObstructedMaze_Full(ObstructedMazeEnv):
         self.place_agent(*self.agent_room)
 
 class ObstructedMaze_2Dl(ObstructedMaze_Full):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__((2, 1), False, False, 1, 4, seed, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__((2, 1), False, False, 1, 4, **kwargs)
 
 class ObstructedMaze_2Dlh(ObstructedMaze_Full):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__((2, 1), True, False, 1, 4, seed, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__((2, 1), True, False, 1, 4, **kwargs)
 
 
 class ObstructedMaze_2Dlhb(ObstructedMaze_Full):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__((2, 1), True, True, 1, 4, seed, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__((2, 1), True, True, 1, 4, **kwargs)
 
 class ObstructedMaze_1Q(ObstructedMaze_Full):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__((1, 1), True, True, 1, 5, seed, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__((1, 1), True, True, 1, 5, **kwargs)
 
 class ObstructedMaze_2Q(ObstructedMaze_Full):
-    def __init__(self, seed=None, **kwargs):
-        super().__init__((1, 1), True, True, 2, 11, seed, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__((1, 1), True, True, 2, 11, **kwargs)
 
 register(
     id="MiniGrid-ObstructedMaze-1Dl-v0",

+ 1 - 2
gym_minigrid/envs/unlock.py

@@ -7,14 +7,13 @@ class Unlock(RoomGrid):
     Unlock a door
     """
 
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self, **kwargs):
         room_size = 6
         super().__init__(
             num_rows=1,
             num_cols=2,
             room_size=room_size,
             max_steps=8*room_size**2,
-            seed=seed, 
             **kwargs
         )
 

+ 1 - 2
gym_minigrid/envs/unlockpickup.py

@@ -7,14 +7,13 @@ class UnlockPickup(RoomGrid):
     Unlock a door, then pick up a box in another room
     """
 
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self, **kwargs):
         room_size = 6
         super().__init__(
             num_rows=1,
             num_cols=2,
             room_size=room_size,
             max_steps=8*room_size**2,
-            seed=seed, 
             **kwargs
         )
 

+ 2 - 11
gym_minigrid/minigrid.py

@@ -5,7 +5,6 @@ import gym
 from enum import IntEnum
 import numpy as np
 from gym import error, spaces, utils
-from gym.utils import seeding
 from .rendering import *
 
 # Size in pixels of a tile in the full-scale human view
@@ -672,8 +671,8 @@ class MiniGridEnv(gym.Env):
         height=None,
         max_steps=100,
         see_through_walls=False,
-        seed=1337,
-        agent_view_size=7
+        agent_view_size=7,
+        render_mode=None
     ):
         # Can't set both grid_size and width/height
         if grid_size:
@@ -722,8 +721,6 @@ class MiniGridEnv(gym.Env):
         self.agent_pos = None
         self.agent_dir = None
 
-        # Initialize the RNG
-        self.seed(seed=seed)
 
         # Initialize the state
         self.reset()
@@ -734,8 +731,6 @@ class MiniGridEnv(gym.Env):
         self.agent_dir = None
 
         # Generate a new random grid at the start of each episode
-        # To keep the same grid for each episode, call env.seed() with
-        # the same seed before calling env.reset()
         self._gen_grid(self.width, self.height)
 
         # These fields should be defined by _gen_grid
@@ -756,10 +751,6 @@ class MiniGridEnv(gym.Env):
         obs = self.gen_obs()
         return obs
 
-    def seed(self, seed=1337):
-        # Seed the random number generator
-        self.np_random, _ = seeding.np_random(seed)
-        return [seed]
 
     def hash(self, size=16):
         """Compute a hash that uniquely identifies the current state of the environment.

+ 0 - 2
gym_minigrid/roomgrid.py

@@ -72,7 +72,6 @@ class RoomGrid(MiniGridEnv):
         num_rows=3,
         num_cols=3,
         max_steps=100,
-        seed=0,
         agent_view_size=7,
         **kwargs
     ):
@@ -95,7 +94,6 @@ class RoomGrid(MiniGridEnv):
             height=height,
             max_steps=max_steps,
             see_through_walls=False,
-            seed=seed,
             agent_view_size=agent_view_size,
             **kwargs
         )

+ 1 - 2
gym_minigrid/wrappers.py

@@ -22,8 +22,7 @@ class ReseedWrapper(gym.core.Wrapper):
     def reset(self, **kwargs):
         seed = self.seeds[self.seed_idx]
         self.seed_idx = (self.seed_idx + 1) % len(self.seeds)
-        self.env.seed(seed)
-        return self.env.reset(**kwargs)
+        return self.env.reset(seed=seed, **kwargs)
 
     def step(self, action):
         obs, reward, done, info = self.env.step(action)

+ 3 - 4
manual_control.py

@@ -15,10 +15,9 @@ def redraw(img):
     window.show_img(img)
 
 def reset():
-    if args.seed != -1:
-        env.seed(args.seed)
-
-    obs = env.reset()
+    if args.seed == -1:
+        seed = None
+    obs = env.reset(seed=seed)
 
     if hasattr(env, 'mission'):
         print('Mission: %s' % env.mission)

+ 2 - 2
run_tests.py

@@ -29,9 +29,9 @@ for env_idx, env_name in enumerate(env_list):
     # Verify that the same seed always produces the same environment
     for i in range(0, 5):
         seed = 1337 + i
-        env.seed(seed)
+        _ = env.reset(seed=seed)
         grid1 = env.grid
-        env.seed(seed)
+        _ = env.reset(seed=seed)
         grid2 = env.grid
         assert grid1 == grid2