8 years ago · bfd0f76513
--- a/README.md
+++ b/README.md
@@ -84,17 +84,18 @@ python3 pytorch_rl/enjoy.py --env-name MiniGrid-Empty-6x6-v0 --load-dir ./traine
 
																 MiniGrid is built to support tasks involving natural language and sparse rewards.
															
 
																 The observations are dictionaries, with an 'image' field, partially observable
															
 
																-view of the environment, and a 'mission' field which is a textual string
															
 
																-describing the objective the agent should reach to get a reward. Using
															
 
																-dictionaries makes it easy for you to add additional information to observations
															
 
																+view of the environment, a 'mission' field which is a textual string
															
 
																+describing the objective the agent should reach to get a reward, and a 'direction'
															
 
																+field which can be used as an optional compass. Using dictionaries makes it
															
 
																+easy for you to add additional information to observations
															
 
																 if you need to, without having to force everything into a single tensor.
															
 
																-If your RL code expects a tensor for observations, please take a look at
															
 
																+If your RL code expects one single tensor for observations, please take a look at
															
 
																 `FlatObsWrapper` in
															
 
																 [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py).
															
 
																 The partially observable view of the environment uses a compact and efficient
															
 
																-encoding, with just 3 input values per visible grid cell, 147 values total.
															
 
																-If you want to obtain an array of RGB pixels instead, see the `getObsRender` method in
															
 
																+encoding, with just 3 input values per visible grid cell, 7x7x3 values total.
															
 
																+If you want to obtain an array of RGB pixels instead, see the `get_obs_render` method in
															
 
																 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py).
															
 
																 Structure of the world:
															
@@ -117,7 +118,7 @@ Actions in the basic environment:
 
																 By default, sparse rewards for reaching a goal square are provided, but you can
															
 
																 define your own reward function by creating a class derived from MiniGridEnv. Extending
															
 
																-the environment with new object types or action should be very easy very easy.
															
 
																+the environment with new object types or action should be very easy.
															
 
																 If you wish to do this, you should take a look at the
															
 
																 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file.
															
--- a/gym_minigrid/envs/doorkey.py
+++ b/gym_minigrid/envs/doorkey.py
@@ -9,7 +9,7 @@ class DoorKeyEnv(MiniGridEnv):
 
																     def __init__(self, size=8):
															
 
																         super().__init__(grid_size=size, max_steps=4 * size)
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         # Create an empty grid
															
 
																         self.grid = Grid(width, height)
															
--- a/gym_minigrid/envs/empty.py
+++ b/gym_minigrid/envs/empty.py
@@ -7,17 +7,19 @@ class EmptyEnv(MiniGridEnv):
 
																     """
															
 
																     def __init__(self, size=8):
															
 
																-        super().__init__(grid_size=size, max_steps=3*size)
															
 
																-
															
 
																-    def _genGrid(self, width, height):
															
 
																+        super().__init__(
															
 
																+            grid_size=size,
															
 
																+            max_steps=3*size,
															
 
																+            # Set this to True for maximum speed
															
 
																+            see_through_walls=True
															
 
																+        )
															
 
																+
															
 
																+    def _gen_grid(self, width, height):
															
 
																         # Create an empty grid
															
 
																         self.grid = Grid(width, height)
															
 
																         # Generate the surrounding walls
															
 
																-        self.grid.horzWall(0, 0)
															
 
																-        self.grid.horzWall(0, height-1)
															
 
																-        self.grid.vertWall(0, 0)
															
 
																-        self.grid.vertWall(width-1, 0)
															
 
																+        self.grid.wallRect(0, 0, width, height)
															
 
																         # Place the agent in the top-left corner
															
 
																         self.start_pos = (1, 1)
															
--- a/gym_minigrid/envs/fetch.py
+++ b/gym_minigrid/envs/fetch.py
@@ -13,10 +13,17 @@ class FetchEnv(MiniGridEnv):
 
																         numObjs=3
															
 
																     ):
															
 
																         self.numObjs = numObjs
															
 
																-        super().__init__(grid_size=size, max_steps=5*size)
															
 
																+
															
 
																+        super().__init__(
															
 
																+            grid_size=size,
															
 
																+            max_steps=5*size,
															
 
																+            # Set this to True for maximum speed
															
 
																+            see_through_walls=True
															
 
																+        )
															
 
																+
															
 
																         self.reward_range = (0, 1)
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         self.grid = Grid(width, height)
															
 
																         # Generate the surrounding walls
															
--- a/gym_minigrid/envs/gotodoor.py
+++ b/gym_minigrid/envs/gotodoor.py
@@ -12,10 +12,17 @@ class GoToDoorEnv(MiniGridEnv):
 
																         size=5
															
 
																     ):
															
 
																         assert size >= 5
															
 
																-        super().__init__(grid_size=size, max_steps=10*size)
															
 
																+
															
 
																+        super().__init__(
															
 
																+            grid_size=size,
															
 
																+            max_steps=5*size,
															
 
																+            # Set this to True for maximum speed
															
 
																+            see_through_walls=True
															
 
																+        )
															
 
																+
															
 
																         self.reward_range = (0, 1)
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         # Create the grid
															
 
																         self.grid = Grid(width, height)
															
--- a/gym_minigrid/envs/gotoobject.py
+++ b/gym_minigrid/envs/gotoobject.py
@@ -13,10 +13,17 @@ class GoToObjectEnv(MiniGridEnv):
 
																         numObjs=2
															
 
																     ):
															
 
																         self.numObjs = numObjs
															
 
																-        super().__init__(grid_size=size, max_steps=5*size)
															
 
																+
															
 
																+        super().__init__(
															
 
																+            grid_size=size,
															
 
																+            max_steps=5*size,
															
 
																+            # Set this to True for maximum speed
															
 
																+            see_through_walls=True
															
 
																+        )
															
 
																+
															
 
																         self.reward_range = (0, 1)
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         self.grid = Grid(width, height)
															
 
																         # Generate the surrounding walls
															
--- a/gym_minigrid/envs/lockedroom.py
+++ b/gym_minigrid/envs/lockedroom.py
@@ -38,7 +38,7 @@ class LockedRoom(MiniGridEnv):
 
																             'image': self.observation_space
															
 
																         })
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         # Create the grid
															
 
																         self.grid = Grid(width, height)
															
--- a/gym_minigrid/envs/multiroom.py
+++ b/gym_minigrid/envs/multiroom.py
@@ -38,7 +38,7 @@ class MultiRoomEnv(MiniGridEnv):
 
																             max_steps=self.maxNumRooms * 20
															
 
																         )
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         roomList = []
															
 
																         # Choose a random number of rooms to generate
															
--- a/gym_minigrid/envs/playground_v0.py
+++ b/gym_minigrid/envs/playground_v0.py
@@ -11,7 +11,7 @@ class PlaygroundV0(MiniGridEnv):
 
																         super().__init__(grid_size=19, max_steps=100)
															
 
																         self.reward_range = (0, 1)
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         # Create the grid
															
 
																         self.grid = Grid(width, height)
															
--- a/gym_minigrid/envs/putnear.py
+++ b/gym_minigrid/envs/putnear.py
@@ -13,10 +13,17 @@ class PutNearEnv(MiniGridEnv):
 
																         numObjs=2
															
 
																     ):
															
 
																         self.numObjs = numObjs
															
 
																-        super().__init__(grid_size=size, max_steps=5*size)
															
 
																+
															
 
																+        super().__init__(
															
 
																+            grid_size=size,
															
 
																+            max_steps=5*size,
															
 
																+            # Set this to True for maximum speed
															
 
																+            see_through_walls=True
															
 
																+        )
															
 
																+
															
 
																         self.reward_range = (0, 1)
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         self.grid = Grid(width, height)
															
 
																         # Generate the surrounding walls
															
--- a/gym_minigrid/envs/roomgrid.py
+++ b/gym_minigrid/envs/roomgrid.py
@@ -77,7 +77,7 @@ class RoomGrid(MiniGridEnv):
 
																         assert j < self.num_rows
															
 
																         return self.room_grid[j][i]
															
 
																-    def _genGrid(self, width, height):
															
 
																+    def _gen_grid(self, width, height):
															
 
																         # Create the grid
															
 
																         self.grid = Grid(width, height)
															
--- a/gym_minigrid/minigrid.py
+++ b/gym_minigrid/minigrid.py
@@ -69,11 +69,11 @@ class WorldObj:
 
																         """Can the agent overlap with this?"""
															
 
																         return False
															
 
																-    def canPickup(self):
															
 
																+    def can_pickup(self):
															
 
																         """Can the agent pick this up?"""
															
 
																         return False
															
 
																-    def canContain(self):
															
 
																+    def can_contain(self):
															
 
																         """Can this contain another object?"""
															
 
																         return False
															
@@ -86,9 +86,11 @@ class WorldObj:
 
																         return False
															
 
																     def render(self, r):
															
 
																-        assert False
															
 
																+        """Draw this object with the given renderer"""
															
 
																+        raise NotImplementedError
															
 
																     def _set_color(self, r):
															
 
																+        """Set the color of this object as the active drawing color"""
															
 
																         c = COLORS[self.color]
															
 
																         r.setLineColor(c[0], c[1], c[2])
															
 
																         r.setColor(c[0], c[1], c[2])
															
@@ -189,6 +191,9 @@ class LockedDoor(WorldObj):
 
																         """The agent can only walk over this cell when the door is open"""
															
 
																         return self.is_open
															
 
																+    def see_behind(self):
															
 
																+        return self.is_open
															
 
																+
															
 
																     def render(self, r):
															
 
																         c = COLORS[self.color]
															
 
																         r.setLineColor(c[0], c[1], c[2])
															
@@ -226,7 +231,7 @@ class Key(WorldObj):
 
																     def __init__(self, color='blue'):
															
 
																         super(Key, self).__init__('key', color)
															
 
																-    def canPickup(self):
															
 
																+    def can_pickup(self):
															
 
																         return True
															
 
																     def render(self, r):
															
@@ -263,7 +268,7 @@ class Ball(WorldObj):
 
																     def __init__(self, color='blue'):
															
 
																         super(Ball, self).__init__('ball', color)
															
 
																-    def canPickup(self):
															
 
																+    def can_pickup(self):
															
 
																         return True
															
 
																     def render(self, r):
															
@@ -275,7 +280,7 @@ class Box(WorldObj):
 
																         super(Box, self).__init__('box', color)
															
 
																         self.contains = contains
															
 
																-    def canPickup(self):
															
 
																+    def can_pickup(self):
															
 
																         return True
															
 
																     def render(self, r):
															
@@ -596,6 +601,45 @@ class Grid:
 
																         return mask
															
 
																+    def process_vis_prop(
															
 
																+        grid,
															
 
																+        agent_pos
															
 
																+    ):
															
 
																+        mask = np.zeros(shape=(grid.width, grid.height), dtype=np.bool)
															
 
																+
															
 
																+        mask[agent_pos[0], agent_pos[1]] = True
															
 
																+
															
 
																+        for j in reversed(range(1, grid.height)):
															
 
																+            for i in range(0, grid.width-1):
															
 
																+                if not mask[i, j]:
															
 
																+                    continue
															
 
																+
															
 
																+                cell = grid.get(i, j)
															
 
																+                if cell and not cell.see_behind():
															
 
																+                    continue
															
 
																+
															
 
																+                mask[i+1, j] = True
															
 
																+                mask[i+1, j-1] = True
															
 
																+                mask[i, j-1] = True
															
 
																+
															
 
																+            for i in reversed(range(1, grid.width)):
															
 
																+                if not mask[i, j]:
															
 
																+                    continue
															
 
																+
															
 
																+                cell = grid.get(i, j)
															
 
																+                if cell and not cell.see_behind():
															
 
																+                    continue
															
 
																+
															
 
																+                mask[i-1, j-1] = True
															
 
																+                mask[i-1, j] = True
															
 
																+                mask[i, j-1] = True
															
 
																+
															
 
																+        for j in range(0, grid.height):
															
 
																+            for i in range(0, grid.width):
															
 
																+                if not mask[i, j]:
															
 
																+                    grid.set(i, j, None)
															
 
																+                    #grid.set(i, j, Wall('red'))
															
 
																+
															
 
																 class MiniGridEnv(gym.Env):
															
 
																     """
															
 
																     2D grid world game environment
															
@@ -623,7 +667,12 @@ class MiniGridEnv(gym.Env):
 
																         # Wait/stay put/do nothing
															
 
																         wait = 6
															
 
																-    def __init__(self, grid_size=16, max_steps=100):
															
 
																+    def __init__(
															
 
																+        self,
															
 
																+        grid_size=16,
															
 
																+        max_steps=100,
															
 
																+        see_through_walls=False
															
 
																+    ):
															
 
																         # Action enumeration for this environment
															
 
																         self.actions = MiniGridEnv.Actions
															
@@ -654,6 +703,7 @@ class MiniGridEnv(gym.Env):
 
																         # Environment configuration
															
 
																         self.grid_size = grid_size
															
 
																         self.max_steps = max_steps
															
 
																+        self.see_through_walls = see_through_walls
															
 
																         # Starting position and direction for the agent
															
 
																         self.start_pos = None
															
@@ -667,9 +717,9 @@ class MiniGridEnv(gym.Env):
 
																         # Generate a new random grid at the start of each episode
															
 
																         # To keep the same grid for each episode, call env.seed() with
															
 
																         # the same seed before calling env.reset()
															
 
																-        self._genGrid(self.grid_size, self.grid_size)
															
 
																+        self._gen_grid(self.grid_size, self.grid_size)
															
 
																-        # These fields should be defined by _genGrid
															
 
																+        # These fields should be defined by _gen_grid
															
 
																         assert self.start_pos != None
															
 
																         assert self.start_dir != None
															
@@ -788,8 +838,8 @@ class MiniGridEnv(gym.Env):
 
																         return "\n".join([" ".join(line) for line in new_array])
															
 
																-    def _genGrid(self, width, height):
															
 
																-        assert False, "_genGrid needs to be implemented by each environment"
															
 
																+    def _gen_grid(self, width, height):
															
 
																+        assert False, "_gen_grid needs to be implemented by each environment"
															
 
																     def _randInt(self, low, high):
															
 
																         """
															
@@ -1005,7 +1055,7 @@ class MiniGridEnv(gym.Env):
 
																         # Pick up an object
															
 
																         elif action == self.actions.pickup:
															
 
																-            if fwdCell and fwdCell.canPickup():
															
 
																+            if fwdCell and fwdCell.can_pickup():
															
 
																                 if self.carrying is None:
															
 
																                     self.carrying = fwdCell
															
 
																                     self.grid.set(*fwdPos, None)
															
@@ -1057,7 +1107,9 @@ class MiniGridEnv(gym.Env):
 
																             grid.set(*agent_pos, None)
															
 
																         # Process occluders and visibility
															
 
																-        grid.process_vis(agent_pos=(3, 6))
															
 
																+        # Note that this incurs some performance cost
															
 
																+        if not self.see_through_walls:
															
 
																+            grid.process_vis_prop(agent_pos=(3, 6))
															
 
																         # Encode the partially observable view into a numpy array
															
 
																         image = grid.encode()