浏览代码

Faster visibility algorithm. Method renamings.

Maxime Chevalier-Boisvert 7 年之前
父节点
当前提交
bfd0f76513

+ 8 - 7
README.md

@@ -84,17 +84,18 @@ python3 pytorch_rl/enjoy.py --env-name MiniGrid-Empty-6x6-v0 --load-dir ./traine
 
 
 MiniGrid is built to support tasks involving natural language and sparse rewards.
 MiniGrid is built to support tasks involving natural language and sparse rewards.
 The observations are dictionaries, with an 'image' field, partially observable
 The observations are dictionaries, with an 'image' field, partially observable
-view of the environment, and a 'mission' field which is a textual string
-describing the objective the agent should reach to get a reward. Using
-dictionaries makes it easy for you to add additional information to observations
+view of the environment, a 'mission' field which is a textual string
+describing the objective the agent should reach to get a reward, and a 'direction'
+field which can be used as an optional compass. Using dictionaries makes it
+easy for you to add additional information to observations
 if you need to, without having to force everything into a single tensor.
 if you need to, without having to force everything into a single tensor.
-If your RL code expects a tensor for observations, please take a look at
+If your RL code expects one single tensor for observations, please take a look at
 `FlatObsWrapper` in
 `FlatObsWrapper` in
 [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py).
 [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py).
 
 
 The partially observable view of the environment uses a compact and efficient
 The partially observable view of the environment uses a compact and efficient
-encoding, with just 3 input values per visible grid cell, 147 values total.
-If you want to obtain an array of RGB pixels instead, see the `getObsRender` method in
+encoding, with just 3 input values per visible grid cell, 7x7x3 values total.
+If you want to obtain an array of RGB pixels instead, see the `get_obs_render` method in
 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py).
 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py).
 
 
 Structure of the world:
 Structure of the world:
@@ -117,7 +118,7 @@ Actions in the basic environment:
 
 
 By default, sparse rewards for reaching a goal square are provided, but you can
 By default, sparse rewards for reaching a goal square are provided, but you can
 define your own reward function by creating a class derived from MiniGridEnv. Extending
 define your own reward function by creating a class derived from MiniGridEnv. Extending
-the environment with new object types or action should be very easy very easy.
+the environment with new object types or action should be very easy.
 If you wish to do this, you should take a look at the
 If you wish to do this, you should take a look at the
 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file.
 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file.
 
 

+ 1 - 1
gym_minigrid/envs/doorkey.py

@@ -9,7 +9,7 @@ class DoorKeyEnv(MiniGridEnv):
     def __init__(self, size=8):
     def __init__(self, size=8):
         super().__init__(grid_size=size, max_steps=4 * size)
         super().__init__(grid_size=size, max_steps=4 * size)
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create an empty grid
         # Create an empty grid
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 

+ 9 - 7
gym_minigrid/envs/empty.py

@@ -7,17 +7,19 @@ class EmptyEnv(MiniGridEnv):
     """
     """
 
 
     def __init__(self, size=8):
     def __init__(self, size=8):
-        super().__init__(grid_size=size, max_steps=3*size)
-
-    def _genGrid(self, width, height):
+        super().__init__(
+            grid_size=size,
+            max_steps=3*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
+    def _gen_grid(self, width, height):
         # Create an empty grid
         # Create an empty grid
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 
         # Generate the surrounding walls
         # Generate the surrounding walls
-        self.grid.horzWall(0, 0)
-        self.grid.horzWall(0, height-1)
-        self.grid.vertWall(0, 0)
-        self.grid.vertWall(width-1, 0)
+        self.grid.wallRect(0, 0, width, height)
 
 
         # Place the agent in the top-left corner
         # Place the agent in the top-left corner
         self.start_pos = (1, 1)
         self.start_pos = (1, 1)

+ 9 - 2
gym_minigrid/envs/fetch.py

@@ -13,10 +13,17 @@ class FetchEnv(MiniGridEnv):
         numObjs=3
         numObjs=3
     ):
     ):
         self.numObjs = numObjs
         self.numObjs = numObjs
-        super().__init__(grid_size=size, max_steps=5*size)
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 
         # Generate the surrounding walls
         # Generate the surrounding walls

+ 9 - 2
gym_minigrid/envs/gotodoor.py

@@ -12,10 +12,17 @@ class GoToDoorEnv(MiniGridEnv):
         size=5
         size=5
     ):
     ):
         assert size >= 5
         assert size >= 5
-        super().__init__(grid_size=size, max_steps=10*size)
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create the grid
         # Create the grid
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 

+ 9 - 2
gym_minigrid/envs/gotoobject.py

@@ -13,10 +13,17 @@ class GoToObjectEnv(MiniGridEnv):
         numObjs=2
         numObjs=2
     ):
     ):
         self.numObjs = numObjs
         self.numObjs = numObjs
-        super().__init__(grid_size=size, max_steps=5*size)
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 
         # Generate the surrounding walls
         # Generate the surrounding walls

+ 1 - 1
gym_minigrid/envs/lockedroom.py

@@ -38,7 +38,7 @@ class LockedRoom(MiniGridEnv):
             'image': self.observation_space
             'image': self.observation_space
         })
         })
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create the grid
         # Create the grid
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 

+ 1 - 1
gym_minigrid/envs/multiroom.py

@@ -38,7 +38,7 @@ class MultiRoomEnv(MiniGridEnv):
             max_steps=self.maxNumRooms * 20
             max_steps=self.maxNumRooms * 20
         )
         )
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         roomList = []
         roomList = []
 
 
         # Choose a random number of rooms to generate
         # Choose a random number of rooms to generate

+ 1 - 1
gym_minigrid/envs/playground_v0.py

@@ -11,7 +11,7 @@ class PlaygroundV0(MiniGridEnv):
         super().__init__(grid_size=19, max_steps=100)
         super().__init__(grid_size=19, max_steps=100)
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create the grid
         # Create the grid
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 

+ 9 - 2
gym_minigrid/envs/putnear.py

@@ -13,10 +13,17 @@ class PutNearEnv(MiniGridEnv):
         numObjs=2
         numObjs=2
     ):
     ):
         self.numObjs = numObjs
         self.numObjs = numObjs
-        super().__init__(grid_size=size, max_steps=5*size)
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 
         # Generate the surrounding walls
         # Generate the surrounding walls

+ 1 - 1
gym_minigrid/envs/roomgrid.py

@@ -77,7 +77,7 @@ class RoomGrid(MiniGridEnv):
         assert j < self.num_rows
         assert j < self.num_rows
         return self.room_grid[j][i]
         return self.room_grid[j][i]
 
 
-    def _genGrid(self, width, height):
+    def _gen_grid(self, width, height):
         # Create the grid
         # Create the grid
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
 
 

+ 65 - 13
gym_minigrid/minigrid.py

@@ -69,11 +69,11 @@ class WorldObj:
         """Can the agent overlap with this?"""
         """Can the agent overlap with this?"""
         return False
         return False
 
 
-    def canPickup(self):
+    def can_pickup(self):
         """Can the agent pick this up?"""
         """Can the agent pick this up?"""
         return False
         return False
 
 
-    def canContain(self):
+    def can_contain(self):
         """Can this contain another object?"""
         """Can this contain another object?"""
         return False
         return False
 
 
@@ -86,9 +86,11 @@ class WorldObj:
         return False
         return False
 
 
     def render(self, r):
     def render(self, r):
-        assert False
+        """Draw this object with the given renderer"""
+        raise NotImplementedError
 
 
     def _set_color(self, r):
     def _set_color(self, r):
+        """Set the color of this object as the active drawing color"""
         c = COLORS[self.color]
         c = COLORS[self.color]
         r.setLineColor(c[0], c[1], c[2])
         r.setLineColor(c[0], c[1], c[2])
         r.setColor(c[0], c[1], c[2])
         r.setColor(c[0], c[1], c[2])
@@ -189,6 +191,9 @@ class LockedDoor(WorldObj):
         """The agent can only walk over this cell when the door is open"""
         """The agent can only walk over this cell when the door is open"""
         return self.is_open
         return self.is_open
 
 
+    def see_behind(self):
+        return self.is_open
+
     def render(self, r):
     def render(self, r):
         c = COLORS[self.color]
         c = COLORS[self.color]
         r.setLineColor(c[0], c[1], c[2])
         r.setLineColor(c[0], c[1], c[2])
@@ -226,7 +231,7 @@ class Key(WorldObj):
     def __init__(self, color='blue'):
     def __init__(self, color='blue'):
         super(Key, self).__init__('key', color)
         super(Key, self).__init__('key', color)
 
 
-    def canPickup(self):
+    def can_pickup(self):
         return True
         return True
 
 
     def render(self, r):
     def render(self, r):
@@ -263,7 +268,7 @@ class Ball(WorldObj):
     def __init__(self, color='blue'):
     def __init__(self, color='blue'):
         super(Ball, self).__init__('ball', color)
         super(Ball, self).__init__('ball', color)
 
 
-    def canPickup(self):
+    def can_pickup(self):
         return True
         return True
 
 
     def render(self, r):
     def render(self, r):
@@ -275,7 +280,7 @@ class Box(WorldObj):
         super(Box, self).__init__('box', color)
         super(Box, self).__init__('box', color)
         self.contains = contains
         self.contains = contains
 
 
-    def canPickup(self):
+    def can_pickup(self):
         return True
         return True
 
 
     def render(self, r):
     def render(self, r):
@@ -596,6 +601,45 @@ class Grid:
 
 
         return mask
         return mask
 
 
+    def process_vis_prop(
+        grid,
+        agent_pos
+    ):
+        mask = np.zeros(shape=(grid.width, grid.height), dtype=np.bool)
+
+        mask[agent_pos[0], agent_pos[1]] = True
+
+        for j in reversed(range(1, grid.height)):
+            for i in range(0, grid.width-1):
+                if not mask[i, j]:
+                    continue
+
+                cell = grid.get(i, j)
+                if cell and not cell.see_behind():
+                    continue
+
+                mask[i+1, j] = True
+                mask[i+1, j-1] = True
+                mask[i, j-1] = True
+
+            for i in reversed(range(1, grid.width)):
+                if not mask[i, j]:
+                    continue
+
+                cell = grid.get(i, j)
+                if cell and not cell.see_behind():
+                    continue
+
+                mask[i-1, j-1] = True
+                mask[i-1, j] = True
+                mask[i, j-1] = True
+
+        for j in range(0, grid.height):
+            for i in range(0, grid.width):
+                if not mask[i, j]:
+                    grid.set(i, j, None)
+                    #grid.set(i, j, Wall('red'))
+
 class MiniGridEnv(gym.Env):
 class MiniGridEnv(gym.Env):
     """
     """
     2D grid world game environment
     2D grid world game environment
@@ -623,7 +667,12 @@ class MiniGridEnv(gym.Env):
         # Wait/stay put/do nothing
         # Wait/stay put/do nothing
         wait = 6
         wait = 6
 
 
-    def __init__(self, grid_size=16, max_steps=100):
+    def __init__(
+        self,
+        grid_size=16,
+        max_steps=100,
+        see_through_walls=False
+    ):
         # Action enumeration for this environment
         # Action enumeration for this environment
         self.actions = MiniGridEnv.Actions
         self.actions = MiniGridEnv.Actions
 
 
@@ -654,6 +703,7 @@ class MiniGridEnv(gym.Env):
         # Environment configuration
         # Environment configuration
         self.grid_size = grid_size
         self.grid_size = grid_size
         self.max_steps = max_steps
         self.max_steps = max_steps
+        self.see_through_walls = see_through_walls
 
 
         # Starting position and direction for the agent
         # Starting position and direction for the agent
         self.start_pos = None
         self.start_pos = None
@@ -667,9 +717,9 @@ class MiniGridEnv(gym.Env):
         # Generate a new random grid at the start of each episode
         # Generate a new random grid at the start of each episode
         # To keep the same grid for each episode, call env.seed() with
         # To keep the same grid for each episode, call env.seed() with
         # the same seed before calling env.reset()
         # the same seed before calling env.reset()
-        self._genGrid(self.grid_size, self.grid_size)
+        self._gen_grid(self.grid_size, self.grid_size)
 
 
-        # These fields should be defined by _genGrid
+        # These fields should be defined by _gen_grid
         assert self.start_pos != None
         assert self.start_pos != None
         assert self.start_dir != None
         assert self.start_dir != None
 
 
@@ -788,8 +838,8 @@ class MiniGridEnv(gym.Env):
 
 
         return "\n".join([" ".join(line) for line in new_array])
         return "\n".join([" ".join(line) for line in new_array])
 
 
-    def _genGrid(self, width, height):
-        assert False, "_genGrid needs to be implemented by each environment"
+    def _gen_grid(self, width, height):
+        assert False, "_gen_grid needs to be implemented by each environment"
 
 
     def _randInt(self, low, high):
     def _randInt(self, low, high):
         """
         """
@@ -1005,7 +1055,7 @@ class MiniGridEnv(gym.Env):
 
 
         # Pick up an object
         # Pick up an object
         elif action == self.actions.pickup:
         elif action == self.actions.pickup:
-            if fwdCell and fwdCell.canPickup():
+            if fwdCell and fwdCell.can_pickup():
                 if self.carrying is None:
                 if self.carrying is None:
                     self.carrying = fwdCell
                     self.carrying = fwdCell
                     self.grid.set(*fwdPos, None)
                     self.grid.set(*fwdPos, None)
@@ -1057,7 +1107,9 @@ class MiniGridEnv(gym.Env):
             grid.set(*agent_pos, None)
             grid.set(*agent_pos, None)
 
 
         # Process occluders and visibility
         # Process occluders and visibility
-        grid.process_vis(agent_pos=(3, 6))
+        # Note that this incurs some performance cost
+        if not self.see_through_walls:
+            grid.process_vis_prop(agent_pos=(3, 6))
 
 
         # Encode the partially observable view into a numpy array
         # Encode the partially observable view into a numpy array
         image = grid.encode()
         image = grid.encode()