Browse Source

Faster visibility algorithm. Method renamings.

Maxime Chevalier-Boisvert 7 years ago
parent
commit
bfd0f76513

+ 8 - 7
README.md

@@ -84,17 +84,18 @@ python3 pytorch_rl/enjoy.py --env-name MiniGrid-Empty-6x6-v0 --load-dir ./traine
 
 MiniGrid is built to support tasks involving natural language and sparse rewards.
 The observations are dictionaries, with an 'image' field, partially observable
-view of the environment, and a 'mission' field which is a textual string
-describing the objective the agent should reach to get a reward. Using
-dictionaries makes it easy for you to add additional information to observations
+view of the environment, a 'mission' field which is a textual string
+describing the objective the agent should reach to get a reward, and a 'direction'
+field which can be used as an optional compass. Using dictionaries makes it
+easy for you to add additional information to observations
 if you need to, without having to force everything into a single tensor.
-If your RL code expects a tensor for observations, please take a look at
+If your RL code expects one single tensor for observations, please take a look at
 `FlatObsWrapper` in
 [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py).
 
 The partially observable view of the environment uses a compact and efficient
-encoding, with just 3 input values per visible grid cell, 147 values total.
-If you want to obtain an array of RGB pixels instead, see the `getObsRender` method in
+encoding, with just 3 input values per visible grid cell, 7x7x3 values total.
+If you want to obtain an array of RGB pixels instead, see the `get_obs_render` method in
 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py).
 
 Structure of the world:
@@ -117,7 +118,7 @@ Actions in the basic environment:
 
 By default, sparse rewards for reaching a goal square are provided, but you can
 define your own reward function by creating a class derived from MiniGridEnv. Extending
-the environment with new object types or action should be very easy very easy.
+the environment with new object types or action should be very easy.
 If you wish to do this, you should take a look at the
 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file.
 

+ 1 - 1
gym_minigrid/envs/doorkey.py

@@ -9,7 +9,7 @@ class DoorKeyEnv(MiniGridEnv):
     def __init__(self, size=8):
         super().__init__(grid_size=size, max_steps=4 * size)
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create an empty grid
         self.grid = Grid(width, height)
 

+ 9 - 7
gym_minigrid/envs/empty.py

@@ -7,17 +7,19 @@ class EmptyEnv(MiniGridEnv):
     """
 
     def __init__(self, size=8):
-        super().__init__(grid_size=size, max_steps=3*size)
-
-    def _genGrid(self, width, height):
+        super().__init__(
+            grid_size=size,
+            max_steps=3*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
+    def _gen_grid(self, width, height):
         # Create an empty grid
         self.grid = Grid(width, height)
 
         # Generate the surrounding walls
-        self.grid.horzWall(0, 0)
-        self.grid.horzWall(0, height-1)
-        self.grid.vertWall(0, 0)
-        self.grid.vertWall(width-1, 0)
+        self.grid.wallRect(0, 0, width, height)
 
         # Place the agent in the top-left corner
         self.start_pos = (1, 1)

+ 9 - 2
gym_minigrid/envs/fetch.py

@@ -13,10 +13,17 @@ class FetchEnv(MiniGridEnv):
         numObjs=3
     ):
         self.numObjs = numObjs
-        super().__init__(grid_size=size, max_steps=5*size)
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
         self.reward_range = (0, 1)
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
 
         # Generate the surrounding walls

+ 9 - 2
gym_minigrid/envs/gotodoor.py

@@ -12,10 +12,17 @@ class GoToDoorEnv(MiniGridEnv):
         size=5
     ):
         assert size >= 5
-        super().__init__(grid_size=size, max_steps=10*size)
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
         self.reward_range = (0, 1)
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create the grid
         self.grid = Grid(width, height)
 

+ 9 - 2
gym_minigrid/envs/gotoobject.py

@@ -13,10 +13,17 @@ class GoToObjectEnv(MiniGridEnv):
         numObjs=2
     ):
         self.numObjs = numObjs
-        super().__init__(grid_size=size, max_steps=5*size)
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
         self.reward_range = (0, 1)
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
 
         # Generate the surrounding walls

+ 1 - 1
gym_minigrid/envs/lockedroom.py

@@ -38,7 +38,7 @@ class LockedRoom(MiniGridEnv):
             'image': self.observation_space
         })
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create the grid
         self.grid = Grid(width, height)
 

+ 1 - 1
gym_minigrid/envs/multiroom.py

@@ -38,7 +38,7 @@ class MultiRoomEnv(MiniGridEnv):
             max_steps=self.maxNumRooms * 20
         )
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         roomList = []
 
         # Choose a random number of rooms to generate

+ 1 - 1
gym_minigrid/envs/playground_v0.py

@@ -11,7 +11,7 @@ class PlaygroundV0(MiniGridEnv):
         super().__init__(grid_size=19, max_steps=100)
         self.reward_range = (0, 1)
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create the grid
         self.grid = Grid(width, height)
 

+ 9 - 2
gym_minigrid/envs/putnear.py

@@ -13,10 +13,17 @@ class PutNearEnv(MiniGridEnv):
         numObjs=2
     ):
         self.numObjs = numObjs
-        super().__init__(grid_size=size, max_steps=5*size)
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
         self.reward_range = (0, 1)
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
 
         # Generate the surrounding walls

+ 1 - 1
gym_minigrid/envs/roomgrid.py

@@ -77,7 +77,7 @@ class RoomGrid(MiniGridEnv):
         assert j < self.num_rows
         return self.room_grid[j][i]
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create the grid
         self.grid = Grid(width, height)
 

+ 65 - 13
gym_minigrid/minigrid.py

@@ -69,11 +69,11 @@ class WorldObj:
         """Can the agent overlap with this?"""
         return False
 
-    def canPickup(self):
+    def can_pickup(self):
         """Can the agent pick this up?"""
         return False
 
-    def canContain(self):
+    def can_contain(self):
         """Can this contain another object?"""
         return False
 
@@ -86,9 +86,11 @@ class WorldObj:
         return False
 
     def render(self, r):
-        assert False
+        """Draw this object with the given renderer"""
+        raise NotImplementedError
 
     def _set_color(self, r):
+        """Set the color of this object as the active drawing color"""
         c = COLORS[self.color]
         r.setLineColor(c[0], c[1], c[2])
         r.setColor(c[0], c[1], c[2])
@@ -189,6 +191,9 @@ class LockedDoor(WorldObj):
         """The agent can only walk over this cell when the door is open"""
         return self.is_open
 
+    def see_behind(self):
+        return self.is_open
+
     def render(self, r):
         c = COLORS[self.color]
         r.setLineColor(c[0], c[1], c[2])
@@ -226,7 +231,7 @@ class Key(WorldObj):
     def __init__(self, color='blue'):
         super(Key, self).__init__('key', color)
 
-    def canPickup(self):
+    def can_pickup(self):
         return True
 
     def render(self, r):
@@ -263,7 +268,7 @@ class Ball(WorldObj):
     def __init__(self, color='blue'):
         super(Ball, self).__init__('ball', color)
 
-    def canPickup(self):
+    def can_pickup(self):
         return True
 
     def render(self, r):
@@ -275,7 +280,7 @@ class Box(WorldObj):
         super(Box, self).__init__('box', color)
         self.contains = contains
 
-    def canPickup(self):
+    def can_pickup(self):
         return True
 
     def render(self, r):
@@ -596,6 +601,45 @@ class Grid:
 
         return mask
 
+    def process_vis_prop(
+        grid,
+        agent_pos
+    ):
+        mask = np.zeros(shape=(grid.width, grid.height), dtype=np.bool)
+
+        mask[agent_pos[0], agent_pos[1]] = True
+
+        for j in reversed(range(1, grid.height)):
+            for i in range(0, grid.width-1):
+                if not mask[i, j]:
+                    continue
+
+                cell = grid.get(i, j)
+                if cell and not cell.see_behind():
+                    continue
+
+                mask[i+1, j] = True
+                mask[i+1, j-1] = True
+                mask[i, j-1] = True
+
+            for i in reversed(range(1, grid.width)):
+                if not mask[i, j]:
+                    continue
+
+                cell = grid.get(i, j)
+                if cell and not cell.see_behind():
+                    continue
+
+                mask[i-1, j-1] = True
+                mask[i-1, j] = True
+                mask[i, j-1] = True
+
+        for j in range(0, grid.height):
+            for i in range(0, grid.width):
+                if not mask[i, j]:
+                    grid.set(i, j, None)
+                    #grid.set(i, j, Wall('red'))
+
 class MiniGridEnv(gym.Env):
     """
     2D grid world game environment
@@ -623,7 +667,12 @@ class MiniGridEnv(gym.Env):
         # Wait/stay put/do nothing
         wait = 6
 
-    def __init__(self, grid_size=16, max_steps=100):
+    def __init__(
+        self,
+        grid_size=16,
+        max_steps=100,
+        see_through_walls=False
+    ):
         # Action enumeration for this environment
         self.actions = MiniGridEnv.Actions
 
@@ -654,6 +703,7 @@ class MiniGridEnv(gym.Env):
         # Environment configuration
         self.grid_size = grid_size
         self.max_steps = max_steps
+        self.see_through_walls = see_through_walls
 
         # Starting position and direction for the agent
         self.start_pos = None
@@ -667,9 +717,9 @@ class MiniGridEnv(gym.Env):
         # Generate a new random grid at the start of each episode
         # To keep the same grid for each episode, call env.seed() with
         # the same seed before calling env.reset()
-        self._genGrid(self.grid_size, self.grid_size)
+        self._gen_grid(self.grid_size, self.grid_size)
 
-        # These fields should be defined by _genGrid
+        # These fields should be defined by _gen_grid
         assert self.start_pos != None
         assert self.start_dir != None
 
@@ -788,8 +838,8 @@ class MiniGridEnv(gym.Env):
 
         return "\n".join([" ".join(line) for line in new_array])
 
-    def _genGrid(self, width, height):
-        assert False, "_genGrid needs to be implemented by each environment"
+    def _gen_grid(self, width, height):
+        assert False, "_gen_grid needs to be implemented by each environment"
 
     def _randInt(self, low, high):
         """
@@ -1005,7 +1055,7 @@ class MiniGridEnv(gym.Env):
 
         # Pick up an object
         elif action == self.actions.pickup:
-            if fwdCell and fwdCell.canPickup():
+            if fwdCell and fwdCell.can_pickup():
                 if self.carrying is None:
                     self.carrying = fwdCell
                     self.grid.set(*fwdPos, None)
@@ -1057,7 +1107,9 @@ class MiniGridEnv(gym.Env):
             grid.set(*agent_pos, None)
 
         # Process occluders and visibility
-        grid.process_vis(agent_pos=(3, 6))
+        # Note that this incurs some performance cost
+        if not self.see_through_walls:
+            grid.process_vis_prop(agent_pos=(3, 6))
 
         # Encode the partially observable view into a numpy array
         image = grid.encode()