3 年之前 · 0832a47bc1
--- a/gym_minigrid/minigrid.py
+++ b/gym_minigrid/minigrid.py
@@ -12,48 +12,48 @@ TILE_PIXELS = 32
 
				 
			
 
				 # Map of color names to RGB values
			
 
				 COLORS = {
			
 
				-    'red'   : np.array([255, 0, 0]),
			
 
				-    'green' : np.array([0, 255, 0]),
			
 
				-    'blue'  : np.array([0, 0, 255]),
			
 
				+    'red': np.array([255, 0, 0]),
			
 
				+    'green': np.array([0, 255, 0]),
			
 
				+    'blue': np.array([0, 0, 255]),
			
 
				     'purple': np.array([112, 39, 195]),
			
 
				     'yellow': np.array([255, 255, 0]),
			
 
				-    'grey'  : np.array([100, 100, 100])
			
 
				+    'grey': np.array([100, 100, 100])
			
 
				 }
			
 
				 
			
 
				 COLOR_NAMES = sorted(list(COLORS.keys()))
			
 
				 
			
 
				 # Used to map colors to integers
			
 
				 COLOR_TO_IDX = {
			
 
				-    'red'   : 0,
			
 
				-    'green' : 1,
			
 
				-    'blue'  : 2,
			
 
				+    'red': 0,
			
 
				+    'green': 1,
			
 
				+    'blue': 2,
			
 
				     'purple': 3,
			
 
				     'yellow': 4,
			
 
				-    'grey'  : 5
			
 
				+    'grey': 5
			
 
				 }
			
 
				 
			
 
				 IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
			
 
				 
			
 
				 # Map of object type to integers
			
 
				 OBJECT_TO_IDX = {
			
 
				-    'unseen'        : 0,
			
 
				-    'empty'         : 1,
			
 
				-    'wall'          : 2,
			
 
				-    'floor'         : 3,
			
 
				-    'door'          : 4,
			
 
				-    'key'           : 5,
			
 
				-    'ball'          : 6,
			
 
				-    'box'           : 7,
			
 
				-    'goal'          : 8,
			
 
				-    'lava'          : 9,
			
 
				-    'agent'         : 10,
			
 
				+    'unseen': 0,
			
 
				+    'empty': 1,
			
 
				+    'wall': 2,
			
 
				+    'floor': 3,
			
 
				+    'door': 4,
			
 
				+    'key': 5,
			
 
				+    'ball': 6,
			
 
				+    'box': 7,
			
 
				+    'goal': 8,
			
 
				+    'lava': 9,
			
 
				+    'agent': 10,
			
 
				 }
			
 
				 
			
 
				 IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
			
 
				 
			
 
				 # Map of state names to integers
			
 
				 STATE_TO_IDX = {
			
 
				-    'open'  : 0,
			
 
				+    'open': 0,
			
 
				     'closed': 1,
			
 
				     'locked': 2,
			
 
				 }
			
@@ -70,6 +70,7 @@ DIR_TO_VEC = [
 
				     np.array((0, -1)),
			
 
				 ]
			
 
				 
			
 
				+
			
 
				 class WorldObj:
			
 
				     """
			
 
				     Base class for grid world objects
			
@@ -151,6 +152,7 @@ class WorldObj:
 
				         """Draw this object with the given renderer"""
			
 
				         raise NotImplementedError
			
 
				 
			
 
				+
			
 
				 class Goal(WorldObj):
			
 
				     def __init__(self):
			
 
				         super().__init__('goal', 'green')
			
@@ -161,6 +163,7 @@ class Goal(WorldObj):
 
				     def render(self, img):
			
 
				         fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color])
			
 
				 
			
 
				+
			
 
				 class Floor(WorldObj):
			
 
				     """
			
 
				     Colored floor tile the agent can walk over
			
@@ -195,10 +198,15 @@ class Lava(WorldObj):
 
				         for i in range(3):
			
 
				             ylo = 0.3 + 0.2 * i
			
 
				             yhi = 0.4 + 0.2 * i
			
 
				-            fill_coords(img, point_in_line(0.1, ylo, 0.3, yhi, r=0.03), (0,0,0))
			
 
				-            fill_coords(img, point_in_line(0.3, yhi, 0.5, ylo, r=0.03), (0,0,0))
			
 
				-            fill_coords(img, point_in_line(0.5, ylo, 0.7, yhi, r=0.03), (0,0,0))
			
 
				-            fill_coords(img, point_in_line(0.7, yhi, 0.9, ylo, r=0.03), (0,0,0))
			
 
				+            fill_coords(img, point_in_line(
			
 
				+                0.1, ylo, 0.3, yhi, r=0.03), (0, 0, 0))
			
 
				+            fill_coords(img, point_in_line(
			
 
				+                0.3, yhi, 0.5, ylo, r=0.03), (0, 0, 0))
			
 
				+            fill_coords(img, point_in_line(
			
 
				+                0.5, ylo, 0.7, yhi, r=0.03), (0, 0, 0))
			
 
				+            fill_coords(img, point_in_line(
			
 
				+                0.7, yhi, 0.9, ylo, r=0.03), (0, 0, 0))
			
 
				+
			
 
				 
			
 
				 class Wall(WorldObj):
			
 
				     def __init__(self, color='grey'):
			
@@ -210,6 +218,7 @@ class Wall(WorldObj):
 
				     def render(self, img):
			
 
				         fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color])
			
 
				 
			
 
				+
			
 
				 class Door(WorldObj):
			
 
				     def __init__(self, color, is_open=False, is_locked=False):
			
 
				         super().__init__('door', color)
			
@@ -253,25 +262,27 @@ class Door(WorldObj):
 
				 
			
 
				         if self.is_open:
			
 
				             fill_coords(img, point_in_rect(0.88, 1.00, 0.00, 1.00), c)
			
 
				-            fill_coords(img, point_in_rect(0.92, 0.96, 0.04, 0.96), (0,0,0))
			
 
				+            fill_coords(img, point_in_rect(0.92, 0.96, 0.04, 0.96), (0, 0, 0))
			
 
				             return
			
 
				 
			
 
				         # Door frame and door
			
 
				         if self.is_locked:
			
 
				             fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
			
 
				-            fill_coords(img, point_in_rect(0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c))
			
 
				+            fill_coords(img, point_in_rect(
			
 
				+                0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c))
			
 
				 
			
 
				             # Draw key slot
			
 
				             fill_coords(img, point_in_rect(0.52, 0.75, 0.50, 0.56), c)
			
 
				         else:
			
 
				             fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
			
 
				-            fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0,0,0))
			
 
				+            fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0, 0, 0))
			
 
				             fill_coords(img, point_in_rect(0.08, 0.92, 0.08, 0.92), c)
			
 
				-            fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), (0,0,0))
			
 
				+            fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), (0, 0, 0))
			
 
				 
			
 
				             # Draw door handle
			
 
				             fill_coords(img, point_in_circle(cx=0.75, cy=0.50, r=0.08), c)
			
 
				 
			
 
				+
			
 
				 class Key(WorldObj):
			
 
				     def __init__(self, color='blue'):
			
 
				         super(Key, self).__init__('key', color)
			
@@ -291,7 +302,8 @@ class Key(WorldObj):
 
				 
			
 
				         # Ring
			
 
				         fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.190), c)
			
 
				-        fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.064), (0,0,0))
			
 
				+        fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.064), (0, 0, 0))
			
 
				+
			
 
				 
			
 
				 class Ball(WorldObj):
			
 
				     def __init__(self, color='blue'):
			
@@ -303,6 +315,7 @@ class Ball(WorldObj):
 
				     def render(self, img):
			
 
				         fill_coords(img, point_in_circle(0.5, 0.5, 0.31), COLORS[self.color])
			
 
				 
			
 
				+
			
 
				 class Box(WorldObj):
			
 
				     def __init__(self, color, contains=None):
			
 
				         super(Box, self).__init__('box', color)
			
@@ -316,7 +329,7 @@ class Box(WorldObj):
 
				 
			
 
				         # Outline
			
 
				         fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), c)
			
 
				-        fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.82), (0,0,0))
			
 
				+        fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.82), (0, 0, 0))
			
 
				 
			
 
				         # Horizontal slit
			
 
				         fill_coords(img, point_in_rect(0.16, 0.84, 0.47, 0.53), c)
			
@@ -326,6 +339,7 @@ class Box(WorldObj):
 
				         env.grid.set(*pos, self.contains)
			
 
				         return True
			
 
				 
			
 
				+
			
 
				 class Grid:
			
 
				     """
			
 
				     Represent a grid and operations on it
			
@@ -359,7 +373,7 @@ class Grid:
 
				         return False
			
 
				 
			
 
				     def __eq__(self, other):
			
 
				-        grid1  = self.encode()
			
 
				+        grid1 = self.encode()
			
 
				         grid2 = other.encode()
			
 
				         return np.array_equal(grid2, grid1)
			
 
				 
			
@@ -454,7 +468,8 @@ class Grid:
 
				         if key in cls.tile_cache:
			
 
				             return cls.tile_cache[key]
			
 
				 
			
 
				-        img = np.zeros(shape=(tile_size * subdivs, tile_size * subdivs, 3), dtype=np.uint8)
			
 
				+        img = np.zeros(shape=(tile_size * subdivs,
			
 
				+                       tile_size * subdivs, 3), dtype=np.uint8)
			
 
				 
			
 
				         # Draw the grid lines (top and left edges)
			
 
				         fill_coords(img, point_in_rect(0, 0.031, 0, 1), (100, 100, 100))
			
@@ -472,7 +487,8 @@ class Grid:
 
				             )
			
 
				 
			
 
				             # Rotate the agent based on its direction
			
 
				-            tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5, theta=0.5*math.pi*agent_dir)
			
 
				+            tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5,
			
 
				+                               theta=0.5*math.pi*agent_dir)
			
 
				             fill_coords(img, tri_fn, (255, 0, 0))
			
 
				 
			
 
				         # Highlight the cell if needed
			
@@ -501,7 +517,8 @@ class Grid:
 
				         """
			
 
				 
			
 
				         if highlight_mask is None:
			
 
				-            highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool)
			
 
				+            highlight_mask = np.zeros(
			
 
				+                shape=(self.width, self.height), dtype=bool)
			
 
				 
			
 
				         # Compute the total grid size
			
 
				         width_px = self.width * tile_size
			
@@ -615,14 +632,18 @@ class Grid:
 
				 
			
 
				         return mask
			
 
				 
			
 
				+
			
 
				 class MiniGridEnv(gym.Env):
			
 
				     """
			
 
				     2D grid world game environment
			
 
				     """
			
 
				 
			
 
				     metadata = {
			
 
				+        # Deprecated: use 'render_modes' instead
			
 
				+        'render.modes': ['human', 'rgb_array'],
			
 
				+        'video.frames_per_second': 10,  # Deprecated: use 'render_fps' instead
			
 
				         'render_modes': ['human', 'rgb_array'],
			
 
				-        'render_fps' : 10
			
 
				+        'render_fps': 10
			
 
				     }
			
 
				 
			
 
				     # Enumeration of possible actions
			
@@ -682,7 +703,7 @@ class MiniGridEnv(gym.Env):
 
				             'direction': spaces.Discrete(4),
			
 
				             'mission': spaces.Text(max_length=200,
			
 
				                                    charset=string.ascii_letters + string.digits + ' .,!- '
			
 
				-                                  )
			
 
				+                                   )
			
 
				         })
			
 
				 
			
 
				         # render mode
			
@@ -704,7 +725,6 @@ class MiniGridEnv(gym.Env):
 
				         self.agent_pos = None
			
 
				         self.agent_dir = None
			
 
				 
			
 
				-
			
 
				         # Initialize the state
			
 
				         self.reset()
			
 
				 
			
@@ -735,14 +755,14 @@ class MiniGridEnv(gym.Env):
 
				         obs = self.gen_obs()
			
 
				         return obs
			
 
				 
			
 
				-
			
 
				     def hash(self, size=16):
			
 
				         """Compute a hash that uniquely identifies the current state of the environment.
			
 
				         :param size: Size of the hashing
			
 
				         """
			
 
				         sample_hash = hashlib.sha256()
			
 
				 
			
 
				-        to_encode = [self.grid.encode().tolist(), self.agent_pos, self.agent_dir]
			
 
				+        to_encode = [self.grid.encode().tolist(), self.agent_pos,
			
 
				+                     self.agent_dir]
			
 
				         for item in to_encode:
			
 
				             sample_hash.update(str(item).encode('utf8'))
			
 
				 
			
@@ -761,14 +781,14 @@ class MiniGridEnv(gym.Env):
 
				 
			
 
				         # Map of object types to short string
			
 
				         OBJECT_TO_STR = {
			
 
				-            'wall'          : 'W',
			
 
				-            'floor'         : 'F',
			
 
				-            'door'          : 'D',
			
 
				-            'key'           : 'K',
			
 
				-            'ball'          : 'A',
			
 
				-            'box'           : 'B',
			
 
				-            'goal'          : 'G',
			
 
				-            'lava'          : 'V',
			
 
				+            'wall': 'W',
			
 
				+            'floor': 'F',
			
 
				+            'door': 'D',
			
 
				+            'key': 'K',
			
 
				+            'ball': 'A',
			
 
				+            'box': 'B',
			
 
				+            'goal': 'G',
			
 
				+            'lava': 'V',
			
 
				         }
			
 
				 
			
 
				         # Short string for opened door
			
@@ -888,12 +908,12 @@ class MiniGridEnv(gym.Env):
 
				         )
			
 
				 
			
 
				     def place_obj(self,
			
 
				-        obj,
			
 
				-        top=None,
			
 
				-        size=None,
			
 
				-        reject_fn=None,
			
 
				-        max_tries=math.inf
			
 
				-    ):
			
 
				+                  obj,
			
 
				+                  top=None,
			
 
				+                  size=None,
			
 
				+                  reject_fn=None,
			
 
				+                  max_tries=math.inf
			
 
				+                  ):
			
 
				         """
			
 
				         Place an object at an empty position in the grid
			
 
				 
			
@@ -1174,7 +1194,7 @@ class MiniGridEnv(gym.Env):
 
				         """
			
 
				 
			
 
				         topX, topY, botX, botY = self.get_view_exts(agent_view_size)
			
 
				-        
			
 
				+
			
 
				         agent_view_size = agent_view_size or self.agent_view_size
			
 
				 
			
 
				         grid = self.grid.slice(topX, topY, agent_view_size, agent_view_size)
			
@@ -1185,7 +1205,8 @@ class MiniGridEnv(gym.Env):
 
				         # Process occluders and visibility
			
 
				         # Note that this incurs some performance cost
			
 
				         if not self.see_through_walls:
			
 
				-            vis_mask = grid.process_vis(agent_pos=(agent_view_size // 2 , agent_view_size - 1))
			
 
				+            vis_mask = grid.process_vis(agent_pos=(
			
 
				+                agent_view_size // 2, agent_view_size - 1))
			
 
				         else:
			
 
				             vis_mask = np.ones(shape=(grid.width, grid.height), dtype=bool)
			
 
				 
			
@@ -1210,7 +1231,8 @@ class MiniGridEnv(gym.Env):
 
				         # Encode the partially observable view into a numpy array
			
 
				         image = grid.encode(vis_mask)
			
 
				 
			
 
				-        assert hasattr(self, 'mission'), "environments must define a textual mission string"
			
 
				+        assert hasattr(
			
 
				+            self, 'mission'), "environments must define a textual mission string"
			
 
				 
			
 
				         # Observations are dictionaries containing:
			
 
				         # - an image (partially observable view of the environment)
			
@@ -1264,7 +1286,8 @@ class MiniGridEnv(gym.Env):
 
				         # of the agent's view area
			
 
				         f_vec = self.dir_vec
			
 
				         r_vec = self.right_vec
			
 
				-        top_left = self.agent_pos + f_vec * (self.agent_view_size-1) - r_vec * (self.agent_view_size // 2)
			
 
				+        top_left = self.agent_pos + f_vec * \
			
 
				+            (self.agent_view_size-1) - r_vec * (self.agent_view_size // 2)
			
 
				 
			
 
				         # Mask of which cells to highlight
			
 
				         highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool)