7 years ago · bfd0f76513
--- a/README.md
+++ b/README.md
@@ -84,17 +84,18 @@ python3 pytorch_rl/enjoy.py --env-name MiniGrid-Empty-6x6-v0 --load-dir ./traine
 
				 
			
 
				 MiniGrid is built to support tasks involving natural language and sparse rewards.
			
 
				 The observations are dictionaries, with an 'image' field, partially observable
			
 
				-view of the environment, and a 'mission' field which is a textual string
			
 
				-describing the objective the agent should reach to get a reward. Using
			
 
				-dictionaries makes it easy for you to add additional information to observations
			
 
				+view of the environment, a 'mission' field which is a textual string
			
 
				+describing the objective the agent should reach to get a reward, and a 'direction'
			
 
				+field which can be used as an optional compass. Using dictionaries makes it
			
 
				+easy for you to add additional information to observations
			
 
				 if you need to, without having to force everything into a single tensor.
			
 
				-If your RL code expects a tensor for observations, please take a look at
			
 
				+If your RL code expects one single tensor for observations, please take a look at
			
 
				 `FlatObsWrapper` in
			
 
				 [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py).
			
 
				 
			
 
				 The partially observable view of the environment uses a compact and efficient
			
 
				-encoding, with just 3 input values per visible grid cell, 147 values total.
			
 
				-If you want to obtain an array of RGB pixels instead, see the `getObsRender` method in
			
 
				+encoding, with just 3 input values per visible grid cell, 7x7x3 values total.
			
 
				+If you want to obtain an array of RGB pixels instead, see the `get_obs_render` method in
			
 
				 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py).
			
 
				 
			
 
				 Structure of the world:
			
@@ -117,7 +118,7 @@ Actions in the basic environment:
 
				 
			
 
				 By default, sparse rewards for reaching a goal square are provided, but you can
			
 
				 define your own reward function by creating a class derived from MiniGridEnv. Extending
			
 
				-the environment with new object types or action should be very easy very easy.
			
 
				+the environment with new object types or action should be very easy.
			
 
				 If you wish to do this, you should take a look at the
			
 
				 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file.
			
 
				 
			
--- a/gym_minigrid/envs/doorkey.py
+++ b/gym_minigrid/envs/doorkey.py
@@ -9,7 +9,7 @@ class DoorKeyEnv(MiniGridEnv):
 
				     def __init__(self, size=8):
			
 
				         super().__init__(grid_size=size, max_steps=4 * size)
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         # Create an empty grid
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
--- a/gym_minigrid/envs/empty.py
+++ b/gym_minigrid/envs/empty.py
@@ -7,17 +7,19 @@ class EmptyEnv(MiniGridEnv):
 
				     """
			
 
				 
			
 
				     def __init__(self, size=8):
			
 
				-        super().__init__(grid_size=size, max_steps=3*size)
			
 
				-
			
 
				-    def _genGrid(self, width, height):
			
 
				+        super().__init__(
			
 
				+            grid_size=size,
			
 
				+            max_steps=3*size,
			
 
				+            # Set this to True for maximum speed
			
 
				+            see_through_walls=True
			
 
				+        )
			
 
				+
			
 
				+    def _gen_grid(self, width, height):
			
 
				         # Create an empty grid
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
 
				         # Generate the surrounding walls
			
 
				-        self.grid.horzWall(0, 0)
			
 
				-        self.grid.horzWall(0, height-1)
			
 
				-        self.grid.vertWall(0, 0)
			
 
				-        self.grid.vertWall(width-1, 0)
			
 
				+        self.grid.wallRect(0, 0, width, height)
			
 
				 
			
 
				         # Place the agent in the top-left corner
			
 
				         self.start_pos = (1, 1)
			
--- a/gym_minigrid/envs/fetch.py
+++ b/gym_minigrid/envs/fetch.py
@@ -13,10 +13,17 @@ class FetchEnv(MiniGridEnv):
 
				         numObjs=3
			
 
				     ):
			
 
				         self.numObjs = numObjs
			
 
				-        super().__init__(grid_size=size, max_steps=5*size)
			
 
				+
			
 
				+        super().__init__(
			
 
				+            grid_size=size,
			
 
				+            max_steps=5*size,
			
 
				+            # Set this to True for maximum speed
			
 
				+            see_through_walls=True
			
 
				+        )
			
 
				+
			
 
				         self.reward_range = (0, 1)
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
 
				         # Generate the surrounding walls
			
--- a/gym_minigrid/envs/gotodoor.py
+++ b/gym_minigrid/envs/gotodoor.py
@@ -12,10 +12,17 @@ class GoToDoorEnv(MiniGridEnv):
 
				         size=5
			
 
				     ):
			
 
				         assert size >= 5
			
 
				-        super().__init__(grid_size=size, max_steps=10*size)
			
 
				+
			
 
				+        super().__init__(
			
 
				+            grid_size=size,
			
 
				+            max_steps=5*size,
			
 
				+            # Set this to True for maximum speed
			
 
				+            see_through_walls=True
			
 
				+        )
			
 
				+
			
 
				         self.reward_range = (0, 1)
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         # Create the grid
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
--- a/gym_minigrid/envs/gotoobject.py
+++ b/gym_minigrid/envs/gotoobject.py
@@ -13,10 +13,17 @@ class GoToObjectEnv(MiniGridEnv):
 
				         numObjs=2
			
 
				     ):
			
 
				         self.numObjs = numObjs
			
 
				-        super().__init__(grid_size=size, max_steps=5*size)
			
 
				+
			
 
				+        super().__init__(
			
 
				+            grid_size=size,
			
 
				+            max_steps=5*size,
			
 
				+            # Set this to True for maximum speed
			
 
				+            see_through_walls=True
			
 
				+        )
			
 
				+
			
 
				         self.reward_range = (0, 1)
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
 
				         # Generate the surrounding walls
			
--- a/gym_minigrid/envs/lockedroom.py
+++ b/gym_minigrid/envs/lockedroom.py
@@ -38,7 +38,7 @@ class LockedRoom(MiniGridEnv):
 
				             'image': self.observation_space
			
 
				         })
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         # Create the grid
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
--- a/gym_minigrid/envs/multiroom.py
+++ b/gym_minigrid/envs/multiroom.py
@@ -38,7 +38,7 @@ class MultiRoomEnv(MiniGridEnv):
 
				             max_steps=self.maxNumRooms * 20
			
 
				         )
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         roomList = []
			
 
				 
			
 
				         # Choose a random number of rooms to generate
			
--- a/gym_minigrid/envs/playground_v0.py
+++ b/gym_minigrid/envs/playground_v0.py
@@ -11,7 +11,7 @@ class PlaygroundV0(MiniGridEnv):
 
				         super().__init__(grid_size=19, max_steps=100)
			
 
				         self.reward_range = (0, 1)
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         # Create the grid
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
--- a/gym_minigrid/envs/putnear.py
+++ b/gym_minigrid/envs/putnear.py
@@ -13,10 +13,17 @@ class PutNearEnv(MiniGridEnv):
 
				         numObjs=2
			
 
				     ):
			
 
				         self.numObjs = numObjs
			
 
				-        super().__init__(grid_size=size, max_steps=5*size)
			
 
				+
			
 
				+        super().__init__(
			
 
				+            grid_size=size,
			
 
				+            max_steps=5*size,
			
 
				+            # Set this to True for maximum speed
			
 
				+            see_through_walls=True
			
 
				+        )
			
 
				+
			
 
				         self.reward_range = (0, 1)
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
 
				         # Generate the surrounding walls
			
--- a/gym_minigrid/envs/roomgrid.py
+++ b/gym_minigrid/envs/roomgrid.py
@@ -77,7 +77,7 @@ class RoomGrid(MiniGridEnv):
 
				         assert j < self.num_rows
			
 
				         return self.room_grid[j][i]
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				+    def _gen_grid(self, width, height):
			
 
				         # Create the grid
			
 
				         self.grid = Grid(width, height)
			
 
				 
			
--- a/gym_minigrid/minigrid.py
+++ b/gym_minigrid/minigrid.py
@@ -69,11 +69,11 @@ class WorldObj:
 
				         """Can the agent overlap with this?"""
			
 
				         return False
			
 
				 
			
 
				-    def canPickup(self):
			
 
				+    def can_pickup(self):
			
 
				         """Can the agent pick this up?"""
			
 
				         return False
			
 
				 
			
 
				-    def canContain(self):
			
 
				+    def can_contain(self):
			
 
				         """Can this contain another object?"""
			
 
				         return False
			
 
				 
			
@@ -86,9 +86,11 @@ class WorldObj:
 
				         return False
			
 
				 
			
 
				     def render(self, r):
			
 
				-        assert False
			
 
				+        """Draw this object with the given renderer"""
			
 
				+        raise NotImplementedError
			
 
				 
			
 
				     def _set_color(self, r):
			
 
				+        """Set the color of this object as the active drawing color"""
			
 
				         c = COLORS[self.color]
			
 
				         r.setLineColor(c[0], c[1], c[2])
			
 
				         r.setColor(c[0], c[1], c[2])
			
@@ -189,6 +191,9 @@ class LockedDoor(WorldObj):
 
				         """The agent can only walk over this cell when the door is open"""
			
 
				         return self.is_open
			
 
				 
			
 
				+    def see_behind(self):
			
 
				+        return self.is_open
			
 
				+
			
 
				     def render(self, r):
			
 
				         c = COLORS[self.color]
			
 
				         r.setLineColor(c[0], c[1], c[2])
			
@@ -226,7 +231,7 @@ class Key(WorldObj):
 
				     def __init__(self, color='blue'):
			
 
				         super(Key, self).__init__('key', color)
			
 
				 
			
 
				-    def canPickup(self):
			
 
				+    def can_pickup(self):
			
 
				         return True
			
 
				 
			
 
				     def render(self, r):
			
@@ -263,7 +268,7 @@ class Ball(WorldObj):
 
				     def __init__(self, color='blue'):
			
 
				         super(Ball, self).__init__('ball', color)
			
 
				 
			
 
				-    def canPickup(self):
			
 
				+    def can_pickup(self):
			
 
				         return True
			
 
				 
			
 
				     def render(self, r):
			
@@ -275,7 +280,7 @@ class Box(WorldObj):
 
				         super(Box, self).__init__('box', color)
			
 
				         self.contains = contains
			
 
				 
			
 
				-    def canPickup(self):
			
 
				+    def can_pickup(self):
			
 
				         return True
			
 
				 
			
 
				     def render(self, r):
			
@@ -596,6 +601,45 @@ class Grid:
 
				 
			
 
				         return mask
			
 
				 
			
 
				+    def process_vis_prop(
			
 
				+        grid,
			
 
				+        agent_pos
			
 
				+    ):
			
 
				+        mask = np.zeros(shape=(grid.width, grid.height), dtype=np.bool)
			
 
				+
			
 
				+        mask[agent_pos[0], agent_pos[1]] = True
			
 
				+
			
 
				+        for j in reversed(range(1, grid.height)):
			
 
				+            for i in range(0, grid.width-1):
			
 
				+                if not mask[i, j]:
			
 
				+                    continue
			
 
				+
			
 
				+                cell = grid.get(i, j)
			
 
				+                if cell and not cell.see_behind():
			
 
				+                    continue
			
 
				+
			
 
				+                mask[i+1, j] = True
			
 
				+                mask[i+1, j-1] = True
			
 
				+                mask[i, j-1] = True
			
 
				+
			
 
				+            for i in reversed(range(1, grid.width)):
			
 
				+                if not mask[i, j]:
			
 
				+                    continue
			
 
				+
			
 
				+                cell = grid.get(i, j)
			
 
				+                if cell and not cell.see_behind():
			
 
				+                    continue
			
 
				+
			
 
				+                mask[i-1, j-1] = True
			
 
				+                mask[i-1, j] = True
			
 
				+                mask[i, j-1] = True
			
 
				+
			
 
				+        for j in range(0, grid.height):
			
 
				+            for i in range(0, grid.width):
			
 
				+                if not mask[i, j]:
			
 
				+                    grid.set(i, j, None)
			
 
				+                    #grid.set(i, j, Wall('red'))
			
 
				+
			
 
				 class MiniGridEnv(gym.Env):
			
 
				     """
			
 
				     2D grid world game environment
			
@@ -623,7 +667,12 @@ class MiniGridEnv(gym.Env):
 
				         # Wait/stay put/do nothing
			
 
				         wait = 6
			
 
				 
			
 
				-    def __init__(self, grid_size=16, max_steps=100):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        grid_size=16,
			
 
				+        max_steps=100,
			
 
				+        see_through_walls=False
			
 
				+    ):
			
 
				         # Action enumeration for this environment
			
 
				         self.actions = MiniGridEnv.Actions
			
 
				 
			
@@ -654,6 +703,7 @@ class MiniGridEnv(gym.Env):
 
				         # Environment configuration
			
 
				         self.grid_size = grid_size
			
 
				         self.max_steps = max_steps
			
 
				+        self.see_through_walls = see_through_walls
			
 
				 
			
 
				         # Starting position and direction for the agent
			
 
				         self.start_pos = None
			
@@ -667,9 +717,9 @@ class MiniGridEnv(gym.Env):
 
				         # Generate a new random grid at the start of each episode
			
 
				         # To keep the same grid for each episode, call env.seed() with
			
 
				         # the same seed before calling env.reset()
			
 
				-        self._genGrid(self.grid_size, self.grid_size)
			
 
				+        self._gen_grid(self.grid_size, self.grid_size)
			
 
				 
			
 
				-        # These fields should be defined by _genGrid
			
 
				+        # These fields should be defined by _gen_grid
			
 
				         assert self.start_pos != None
			
 
				         assert self.start_dir != None
			
 
				 
			
@@ -788,8 +838,8 @@ class MiniGridEnv(gym.Env):
 
				 
			
 
				         return "\n".join([" ".join(line) for line in new_array])
			
 
				 
			
 
				-    def _genGrid(self, width, height):
			
 
				-        assert False, "_genGrid needs to be implemented by each environment"
			
 
				+    def _gen_grid(self, width, height):
			
 
				+        assert False, "_gen_grid needs to be implemented by each environment"
			
 
				 
			
 
				     def _randInt(self, low, high):
			
 
				         """
			
@@ -1005,7 +1055,7 @@ class MiniGridEnv(gym.Env):
 
				 
			
 
				         # Pick up an object
			
 
				         elif action == self.actions.pickup:
			
 
				-            if fwdCell and fwdCell.canPickup():
			
 
				+            if fwdCell and fwdCell.can_pickup():
			
 
				                 if self.carrying is None:
			
 
				                     self.carrying = fwdCell
			
 
				                     self.grid.set(*fwdPos, None)
			
@@ -1057,7 +1107,9 @@ class MiniGridEnv(gym.Env):
 
				             grid.set(*agent_pos, None)
			
 
				 
			
 
				         # Process occluders and visibility
			
 
				-        grid.process_vis(agent_pos=(3, 6))
			
 
				+        # Note that this incurs some performance cost
			
 
				+        if not self.see_through_walls:
			
 
				+            grid.process_vis_prop(agent_pos=(3, 6))
			
 
				 
			
 
				         # Encode the partially observable view into a numpy array
			
 
				         image = grid.encode()