瀏覽代碼

Renamed fields to match PEP8 convention

Maxime Chevalier-Boisvert 7 年之前
父節點
當前提交
ec9e19efe7

+ 8 - 14
gym_minigrid/envs/doorkey.py

@@ -7,7 +7,7 @@ class DoorKeyEnv(MiniGridEnv):
     """
     """
 
 
     def __init__(self, size=8):
     def __init__(self, size=8):
-        super().__init__(gridSize=size, maxSteps=4 * size)
+        super().__init__(grid_size=size, max_steps=4 * size)
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):
         # Create an empty grid
         # Create an empty grid
@@ -24,28 +24,22 @@ class DoorKeyEnv(MiniGridEnv):
         self.grid.vertWall(splitIdx, 0)
         self.grid.vertWall(splitIdx, 0)
 
 
         # Place the agent at a random position and orientation
         # Place the agent at a random position and orientation
-        self.startPos = self._randPos(
+        self.start_pos = self._randPos(
             1, splitIdx,
             1, splitIdx,
             1, height-1
             1, height-1
         )
         )
-        self.startDir = self._randInt(0, 4)
+        self.start_dir = self._randInt(0, 4)
 
 
         # Place a door in the wall
         # Place a door in the wall
         doorIdx = self._randInt(1, width-2)
         doorIdx = self._randInt(1, width-2)
         self.grid.set(splitIdx, doorIdx, LockedDoor('yellow'))
         self.grid.set(splitIdx, doorIdx, LockedDoor('yellow'))
 
 
         # Place a yellow key on the left side
         # Place a yellow key on the left side
-        while True:
-            pos = self._randPos(
-                1, splitIdx,
-                1, height-1
-            )
-            if pos == self.startPos:
-                continue
-            if self.grid.get(*pos) != None:
-                continue
-            self.grid.set(*pos, Key('yellow'))
-            break
+        self.placeObj(
+            obj=Key('yellow'),
+            top=(0, 0),
+            size=(splitIdx, height)
+        )
 
 
         self.mission = "use the key to open the door and then get to the goal"
         self.mission = "use the key to open the door and then get to the goal"
 
 

+ 5 - 1
gym_minigrid/envs/empty.py

@@ -7,7 +7,7 @@ class EmptyEnv(MiniGridEnv):
     """
     """
 
 
     def __init__(self, size=8):
     def __init__(self, size=8):
-        super().__init__(gridSize=size, maxSteps=3*size)
+        super().__init__(grid_size=size, max_steps=3*size)
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):
         # Create an empty grid
         # Create an empty grid
@@ -19,6 +19,10 @@ class EmptyEnv(MiniGridEnv):
         self.grid.vertWall(0, 0)
         self.grid.vertWall(0, 0)
         self.grid.vertWall(width-1, 0)
         self.grid.vertWall(width-1, 0)
 
 
+        # Place the agent in the top-left corner
+        self.start_pos = (1, 1)
+        self.start_dir = 0
+
         # Place a goal square in the bottom-right corner
         # Place a goal square in the bottom-right corner
         self.grid.set(width - 2, height - 2, Goal())
         self.grid.set(width - 2, height - 2, Goal())
 
 

+ 1 - 1
gym_minigrid/envs/fetch.py

@@ -13,7 +13,7 @@ class FetchEnv(MiniGridEnv):
         numObjs=3
         numObjs=3
     ):
     ):
         self.numObjs = numObjs
         self.numObjs = numObjs
-        super().__init__(gridSize=size, maxSteps=5*size)
+        super().__init__(grid_size=size, max_steps=5*size)
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):

+ 10 - 7
gym_minigrid/envs/gotodoor.py

@@ -12,7 +12,7 @@ class GoToDoorEnv(MiniGridEnv):
         size=5
         size=5
     ):
     ):
         assert size >= 5
         assert size >= 5
-        super().__init__(gridSize=size, maxSteps=10*size)
+        super().__init__(grid_size=size, max_steps=10*size)
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):
@@ -46,19 +46,22 @@ class GoToDoorEnv(MiniGridEnv):
             color = doorColors[idx]
             color = doorColors[idx]
             self.grid.set(*pos, Door(color))
             self.grid.set(*pos, Door(color))
 
 
+        # Randomize the agent start position and orientation
+        self.placeAgent(size=(width, height))
+
         # Select a random target door
         # Select a random target door
         doorIdx = self._randInt(0, len(doorPos))
         doorIdx = self._randInt(0, len(doorPos))
-        self.targetPos = doorPos[doorIdx]
-        self.targetColor = doorColors[doorIdx]
+        self.target_pos = doorPos[doorIdx]
+        self.target_color = doorColors[doorIdx]
 
 
         # Generate the mission string
         # Generate the mission string
-        self.mission = 'go to the %s door' % self.targetColor
+        self.mission = 'go to the %s door' % self.target_color
 
 
     def step(self, action):
     def step(self, action):
-        obs, reward, done, info = MiniGridEnv.step(self, action)
+        obs, reward, done, info = super().step(action)
 
 
-        ax, ay = self.agentPos
-        tx, ty = self.targetPos
+        ax, ay = self.agent_pos
+        tx, ty = self.target_pos
 
 
         # Don't let the agent open any of the doors
         # Don't let the agent open any of the doors
         if action == self.actions.toggle:
         if action == self.actions.toggle:

+ 7 - 7
gym_minigrid/envs/gotoobject.py

@@ -13,7 +13,7 @@ class GoToObjectEnv(MiniGridEnv):
         numObjs=2
         numObjs=2
     ):
     ):
         self.numObjs = numObjs
         self.numObjs = numObjs
-        super().__init__(gridSize=size, maxSteps=5*size)
+        super().__init__(grid_size=size, max_steps=5*size)
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):
@@ -48,23 +48,23 @@ class GoToObjectEnv(MiniGridEnv):
             objs.append((objType, objColor))
             objs.append((objType, objColor))
             objPos.append(pos)
             objPos.append(pos)
 
 
-        # Randomize the player start position and orientation
+        # Randomize the agent start position and orientation
         self.placeAgent()
         self.placeAgent()
 
 
         # Choose a random object to be picked up
         # Choose a random object to be picked up
         objIdx = self._randInt(0, len(objs))
         objIdx = self._randInt(0, len(objs))
-        self.targetType, self.targetColor = objs[objIdx]
-        self.targetPos = objPos[objIdx]
+        self.targetType, self.target_color = objs[objIdx]
+        self.target_pos = objPos[objIdx]
 
 
-        descStr = '%s %s' % (self.targetColor, self.targetType)
+        descStr = '%s %s' % (self.target_color, self.targetType)
         self.mission = 'go to the %s' % descStr
         self.mission = 'go to the %s' % descStr
         #print(self.mission)
         #print(self.mission)
 
 
     def step(self, action):
     def step(self, action):
         obs, reward, done, info = MiniGridEnv.step(self, action)
         obs, reward, done, info = MiniGridEnv.step(self, action)
 
 
-        ax, ay = self.agentPos
-        tx, ty = self.targetPos
+        ax, ay = self.agent_pos
+        tx, ty = self.target_pos
 
 
         # Toggle/pickup action terminates the episode
         # Toggle/pickup action terminates the episode
         if action == self.actions.toggle:
         if action == self.actions.toggle:

+ 3 - 3
gym_minigrid/envs/lockedroom.py

@@ -32,7 +32,7 @@ class LockedRoom(MiniGridEnv):
         self
         self
     ):
     ):
         size = 19
         size = 19
-        super().__init__(gridSize=size, maxSteps=10*size)
+        super().__init__(grid_size=size, max_steps=10*size)
 
 
         self.observation_space = spaces.Dict({
         self.observation_space = spaces.Dict({
             'image': self.observation_space
             'image': self.observation_space
@@ -106,11 +106,11 @@ class LockedRoom(MiniGridEnv):
         self.grid.set(*keyPos, Key(lockedRoom.color))
         self.grid.set(*keyPos, Key(lockedRoom.color))
 
 
         # Randomize the player start position and orientation
         # Randomize the player start position and orientation
-        self.startPos = self._randPos(
+        self.start_pos = self._randPos(
             lWallIdx + 1, rWallIdx,
             lWallIdx + 1, rWallIdx,
             1, height-1
             1, height-1
         )
         )
-        self.startDir = self._randInt(0, 4)
+        self.start_dir = self._randInt(0, 4)
 
 
         # Generate the mission string
         # Generate the mission string
         self.mission = (
         self.mission = (

+ 7 - 23
gym_minigrid/envs/multiroom.py

@@ -34,12 +34,11 @@ class MultiRoomEnv(MiniGridEnv):
         self.rooms = []
         self.rooms = []
 
 
         super(MultiRoomEnv, self).__init__(
         super(MultiRoomEnv, self).__init__(
-            gridSize=25,
-            maxSteps=self.maxNumRooms * 20
+            grid_size=25,
+            max_steps=self.maxNumRooms * 20
         )
         )
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):
-
         roomList = []
         roomList = []
 
 
         # Choose a random number of rooms to generate
         # Choose a random number of rooms to generate
@@ -70,15 +69,6 @@ class MultiRoomEnv(MiniGridEnv):
         assert len(roomList) > 0
         assert len(roomList) > 0
         self.rooms = roomList
         self.rooms = roomList
 
 
-        # Randomize the starting agent position and direction
-        topX, topY = roomList[0].top
-        sizeX, sizeY = roomList[0].size
-        self.startPos = (
-            self._randInt(topX + 1, topX + sizeX - 2),
-            self._randInt(topY + 1, topY + sizeY - 2)
-        )
-        self.startDir = self._randInt(0, 4)
-
         # Create the grid
         # Create the grid
         self.grid = Grid(width, height)
         self.grid = Grid(width, height)
         wall = Wall()
         wall = Wall()
@@ -118,17 +108,11 @@ class MultiRoomEnv(MiniGridEnv):
                 prevRoom = roomList[idx-1]
                 prevRoom = roomList[idx-1]
                 prevRoom.exitDoorPos = room.entryDoorPos
                 prevRoom.exitDoorPos = room.entryDoorPos
 
 
-        # Place the final goal
-        while True:
-            self.goalPos = (
-                self._randInt(topX + 1, topX + sizeX - 1),
-                self._randInt(topY + 1, topY + sizeY - 1)
-            )
+        # Randomize the starting agent position and direction
+        self.placeAgent(roomList[0].top, roomList[0].size)
 
 
-            # Make sure the goal doesn't overlap with the agent
-            if self.goalPos != self.startPos:
-                self.grid.set(*self.goalPos, Goal())
-                break
+        # Place the final goal in the last room
+        self.placeObj(Goal(), roomList[-1].top, roomList[-1].size)
 
 
         self.mission = 'traverse the rooms to get to the goal'
         self.mission = 'traverse the rooms to get to the goal'
 
 
@@ -174,7 +158,7 @@ class MultiRoomEnv(MiniGridEnv):
         # If the room is out of the grid, can't place a room here
         # If the room is out of the grid, can't place a room here
         if topX < 0 or topY < 0:
         if topX < 0 or topY < 0:
             return False
             return False
-        if topX + sizeX > self.gridSize or topY + sizeY >= self.gridSize:
+        if topX + sizeX > self.grid_size or topY + sizeY >= self.grid_size:
             return False
             return False
 
 
         # If the room intersects with previous rooms, can't place it here
         # If the room intersects with previous rooms, can't place it here

+ 1 - 1
gym_minigrid/envs/playground_v0.py

@@ -8,7 +8,7 @@ class PlaygroundV0(MiniGridEnv):
     """
     """
 
 
     def __init__(self):
     def __init__(self):
-        super().__init__(gridSize=19, maxSteps=100)
+        super().__init__(grid_size=19, max_steps=100)
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):

+ 29 - 39
gym_minigrid/envs/putnear.py

@@ -13,7 +13,7 @@ class PutNearEnv(MiniGridEnv):
         numObjs=2
         numObjs=2
     ):
     ):
         self.numObjs = numObjs
         self.numObjs = numObjs
-        super().__init__(gridSize=size, maxSteps=5*size)
+        super().__init__(grid_size=size, max_steps=5*size)
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):
@@ -26,13 +26,12 @@ class PutNearEnv(MiniGridEnv):
         self.grid.vertWall(width-1, 0)
         self.grid.vertWall(width-1, 0)
 
 
         # Types and colors of objects we can generate
         # Types and colors of objects we can generate
-        types = ['key', 'ball']
-        colors = list(COLORS.keys())
+        types = ['key', 'ball', 'box']
 
 
         objs = []
         objs = []
         objPos = []
         objPos = []
 
 
-        def nearObj(p1):
+        def near_obj(env, p1):
             for p2 in objPos:
             for p2 in objPos:
                 dx = p1[0] - p2[0]
                 dx = p1[0] - p2[0]
                 dy = p1[1] - p2[1]
                 dy = p1[1] - p2[1]
@@ -43,7 +42,7 @@ class PutNearEnv(MiniGridEnv):
         # Until we have generated all the objects
         # Until we have generated all the objects
         while len(objs) < self.numObjs:
         while len(objs) < self.numObjs:
             objType = self._randElem(types)
             objType = self._randElem(types)
-            objColor = self._randElem(colors)
+            objColor = self._randElem(COLOR_NAMES)
 
 
             # If this object already exists, try again
             # If this object already exists, try again
             if (objType, objColor) in objs:
             if (objType, objColor) in objs:
@@ -56,64 +55,55 @@ class PutNearEnv(MiniGridEnv):
             elif objType == 'box':
             elif objType == 'box':
                 obj = Box(objColor)
                 obj = Box(objColor)
 
 
-            while True:
-                pos = (
-                    self._randInt(1, width - 1),
-                    self._randInt(1, height - 1)
-                )
-                if nearObj(pos):
-                    continue
-                if pos == self.startPos:
-                    continue
-                self.grid.set(*pos, obj)
-                break
+            pos = self.placeObj(obj, reject_fn=near_obj)
 
 
             objs.append((objType, objColor))
             objs.append((objType, objColor))
             objPos.append(pos)
             objPos.append(pos)
 
 
+        # Randomize the agent start position and orientation
+        self.placeAgent()
+
         # Choose a random object to be moved
         # Choose a random object to be moved
         objIdx = self._randInt(0, len(objs))
         objIdx = self._randInt(0, len(objs))
-        self.moveType, self.moveColor = objs[objIdx]
-        self.movePos = objPos[objIdx]
+        self.move_type, self.moveColor = objs[objIdx]
+        self.move_pos = objPos[objIdx]
 
 
         # Choose a target object (to put the first object next to)
         # Choose a target object (to put the first object next to)
         while True:
         while True:
             targetIdx = self._randInt(0, len(objs))
             targetIdx = self._randInt(0, len(objs))
             if targetIdx != objIdx:
             if targetIdx != objIdx:
                 break
                 break
-        self.targetType, self.targetColor = objs[targetIdx]
-        self.targetPos = objPos[targetIdx]
+        self.target_type, self.target_color = objs[targetIdx]
+        self.target_pos = objPos[targetIdx]
 
 
         self.mission = 'put the %s %s near the %s %s' % (
         self.mission = 'put the %s %s near the %s %s' % (
             self.moveColor,
             self.moveColor,
-            self.moveType,
-            self.targetColor,
-            self.targetType
+            self.move_type,
+            self.target_color,
+            self.target_type
         )
         )
 
 
     def step(self, action):
     def step(self, action):
         preCarrying = self.carrying
         preCarrying = self.carrying
 
 
-        obs, reward, done, info = MiniGridEnv.step(self, action)
+        obs, reward, done, info = super().step(action)
 
 
         u, v = self.getDirVec()
         u, v = self.getDirVec()
-        ox, oy = (self.agentPos[0] + u, self.agentPos[1] + v)
-        tx, ty = self.targetPos
-
-        # Pickup/drop action
-        if action == self.actions.toggle:
-            # If we picked up the wrong object, terminate the episode
-            if self.carrying:
-                if self.carrying.type != self.moveType or self.carrying.color != self.moveColor:
-                    done = True
-
-            # If successfully dropping an object near the target
-            if preCarrying:
-                if self.grid.get(ox, oy) is preCarrying:
-                    if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
-                        reward = 1
+        ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v)
+        tx, ty = self.target_pos
+
+        # If we picked up the wrong object, terminate the episode
+        if action == self.actions.pickup and self.carrying:
+            if self.carrying.type != self.move_type or self.carrying.color != self.moveColor:
                 done = True
                 done = True
 
 
+        # If successfully dropping an object near the target
+        if action == self.actions.drop and preCarrying:
+            if self.grid.get(ox, oy) is preCarrying:
+                if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
+                    reward = 1
+            done = True
+
         return obs, reward, done, info
         return obs, reward, done, info
 
 
 class PutNear8x8N3(PutNearEnv):
 class PutNear8x8N3(PutNearEnv):

+ 4 - 4
gym_minigrid/envs/roomgrid.py

@@ -54,7 +54,7 @@ class RoomGrid(MiniGridEnv):
         self.num_rows = num_cols
         self.num_rows = num_cols
 
 
         grid_size = (room_size - 1) * num_cols + 1
         grid_size = (room_size - 1) * num_cols + 1
-        super().__init__(gridSize=grid_size, maxSteps=max_steps)
+        super().__init__(grid_size=grid_size, max_steps=max_steps)
 
 
         self.reward_range = (0, 1)
         self.reward_range = (0, 1)
 
 
@@ -125,11 +125,11 @@ class RoomGrid(MiniGridEnv):
                     room.door_pos[3] = room.neighbors[3].door_pos[1]
                     room.door_pos[3] = room.neighbors[3].door_pos[1]
 
 
         # The agent starts in the middle, facing right
         # The agent starts in the middle, facing right
-        self.startPos = (
+        self.start_pos = (
             (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2),
             (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2),
             (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2)
             (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2)
         )
         )
-        self.startDir = 0
+        self.start_dir = 0
 
 
         # By default, this environment has no mission
         # By default, this environment has no mission
         self.mission = ''
         self.mission = ''
@@ -182,7 +182,7 @@ class RoomGrid(MiniGridEnv):
         starting position
         starting position
         """
         """
 
 
-        start_room = self.room_from_pos(*self.startPos)
+        start_room = self.room_from_pos(*self.start_pos)
 
 
         def find_reach():
         def find_reach():
             reach = set()
             reach = set()

+ 92 - 83
gym_minigrid/minigrid.py

@@ -559,7 +559,7 @@ class MiniGridEnv(gym.Env):
         # Wait/stay put/do nothing
         # Wait/stay put/do nothing
         wait = 6
         wait = 6
 
 
-    def __init__(self, gridSize=16, maxSteps=100):
+    def __init__(self, grid_size=16, max_steps=100):
         # Action enumeration for this environment
         # Action enumeration for this environment
         self.actions = MiniGridEnv.Actions
         self.actions = MiniGridEnv.Actions
 
 
@@ -582,21 +582,56 @@ class MiniGridEnv(gym.Env):
         self.reward_range = (-1, 1000)
         self.reward_range = (-1, 1000)
 
 
         # Renderer object used to render the whole grid (full-scale)
         # Renderer object used to render the whole grid (full-scale)
-        self.gridRender = None
+        self.grid_render = None
 
 
         # Renderer used to render observations (small-scale agent view)
         # Renderer used to render observations (small-scale agent view)
-        self.obsRender = None
+        self.obs_render = None
 
 
         # Environment configuration
         # Environment configuration
-        self.gridSize = gridSize
-        self.maxSteps = maxSteps
-        self.startPos = (1, 1)
-        self.startDir = 0
+        self.grid_size = grid_size
+        self.max_steps = max_steps
+
+        # Starting position and direction for the agent
+        self.start_pos = None
+        self.start_dir = None
 
 
         # Initialize the state
         # Initialize the state
         self.seed()
         self.seed()
         self.reset()
         self.reset()
 
 
+    def reset(self):
+        # Generate a new random grid at the start of each episode
+        # To keep the same grid for each episode, call env.seed() with
+        # the same seed before calling env.reset()
+        self._genGrid(self.grid_size, self.grid_size)
+
+        # These fields should be defined by _genGrid
+        assert self.start_pos != None
+        assert self.start_dir != None
+
+        # Check that the agent doesn't overlap with an object
+        assert self.grid.get(*self.start_pos) is None
+
+        # Place the agent in the starting position and direction
+        self.agent_pos = self.start_pos
+        self.agent_dir = self.start_dir
+
+        # Item picked up, being carried, initially nothing
+        self.carrying = None
+
+        # Step count since episode start
+        self.step_count = 0
+
+        # Return first observation
+        obs = self._genObs()
+        return obs
+
+    def seed(self, seed=1337):
+        # Seed the random number generator
+        self.np_random, _ = seeding.np_random(seed)
+
+        return [seed]
+
     def __str__(self):
     def __str__(self):
         """
         """
         Produce a pretty string of the environment's grid along with the agent.
         Produce a pretty string of the environment's grid along with the agent.
@@ -681,39 +716,13 @@ class MiniGridEnv(gym.Env):
             new_array.append(new_line)
             new_array.append(new_line)
 
 
         # Add the agent
         # Add the agent
-        new_array[self.agentPos[1]][self.agentPos[0]] = AGENT_DIR_TO_IDS[self.agentDir]
+        new_array[self.agent_pos[1]][self.agent_pos[0]] = AGENT_DIR_TO_IDS[self.agent_dir]
 
 
         return "\n".join([" ".join(line) for line in new_array])
         return "\n".join([" ".join(line) for line in new_array])
 
 
     def _genGrid(self, width, height):
     def _genGrid(self, width, height):
         assert False, "_genGrid needs to be implemented by each environment"
         assert False, "_genGrid needs to be implemented by each environment"
 
 
-    def reset(self):
-        # Generate a new random grid at the start of each episode
-        # To keep the same grid for each episode, call env.seed() with
-        # the same seed before calling env.reset()
-        self._genGrid(self.gridSize, self.gridSize)
-
-        # Place the agent in the starting position and direction
-        self.agentPos = self.startPos
-        self.agentDir = self.startDir
-
-        # Item picked up, being carried, initially nothing
-        self.carrying = None
-
-        # Step count since episode start
-        self.stepCount = 0
-
-        # Return first observation
-        obs = self._genObs()
-        return obs
-
-    def seed(self, seed=1337):
-        # Seed the random number generator
-        self.np_random, _ = seeding.np_random(seed)
-
-        return [seed]
-
     def _randInt(self, low, high):
     def _randInt(self, low, high):
         """
         """
         Generate random integer in [low,high[
         Generate random integer in [low,high[
@@ -766,7 +775,7 @@ class MiniGridEnv(gym.Env):
                 continue
                 continue
 
 
             # Don't place the object where the agent is
             # Don't place the object where the agent is
-            if pos == self.startPos:
+            if pos == self.start_pos:
                 continue
                 continue
 
 
             # Check if there is a filtering criterion
             # Check if there is a filtering criterion
@@ -779,21 +788,21 @@ class MiniGridEnv(gym.Env):
 
 
         return pos
         return pos
 
 
-    def placeAgent(self, randDir=True):
+    def placeAgent(self, top=None, size=None, randDir=True):
         """
         """
         Set the agent's starting point at an empty position in the grid
         Set the agent's starting point at an empty position in the grid
         """
         """
 
 
-        pos = self.placeObj(None)
-        self.startPos = pos
+        pos = self.placeObj(None, top, size)
+        self.start_pos = pos
 
 
         if randDir:
         if randDir:
-            self.startDir = self._randInt(0, 4)
+            self.start_dir = self._randInt(0, 4)
 
 
         return pos
         return pos
 
 
     def getStepsRemaining(self):
     def getStepsRemaining(self):
-        return self.maxSteps - self.stepCount
+        return self.max_steps - self.step_count
 
 
     def getDirVec(self):
     def getDirVec(self):
         """
         """
@@ -802,16 +811,16 @@ class MiniGridEnv(gym.Env):
         """
         """
 
 
         # Pointing right
         # Pointing right
-        if self.agentDir == 0:
+        if self.agent_dir == 0:
             return (1, 0)
             return (1, 0)
         # Down (positive Y)
         # Down (positive Y)
-        elif self.agentDir == 1:
+        elif self.agent_dir == 1:
             return (0, 1)
             return (0, 1)
         # Pointing left
         # Pointing left
-        elif self.agentDir == 2:
+        elif self.agent_dir == 2:
             return (-1, 0)
             return (-1, 0)
         # Up (negative Y)
         # Up (negative Y)
-        elif self.agentDir == 3:
+        elif self.agent_dir == 3:
             return (0, -1)
             return (0, -1)
         else:
         else:
             assert False
             assert False
@@ -823,21 +832,21 @@ class MiniGridEnv(gym.Env):
         """
         """
 
 
         # Facing right
         # Facing right
-        if self.agentDir == 0:
-            topX = self.agentPos[0]
-            topY = self.agentPos[1] - AGENT_VIEW_SIZE // 2
+        if self.agent_dir == 0:
+            topX = self.agent_pos[0]
+            topY = self.agent_pos[1] - AGENT_VIEW_SIZE // 2
         # Facing down
         # Facing down
-        elif self.agentDir == 1:
-            topX = self.agentPos[0] - AGENT_VIEW_SIZE // 2
-            topY = self.agentPos[1]
+        elif self.agent_dir == 1:
+            topX = self.agent_pos[0] - AGENT_VIEW_SIZE // 2
+            topY = self.agent_pos[1]
         # Facing left
         # Facing left
-        elif self.agentDir == 2:
-            topX = self.agentPos[0] - AGENT_VIEW_SIZE + 1
-            topY = self.agentPos[1] - AGENT_VIEW_SIZE // 2
+        elif self.agent_dir == 2:
+            topX = self.agent_pos[0] - AGENT_VIEW_SIZE + 1
+            topY = self.agent_pos[1] - AGENT_VIEW_SIZE // 2
         # Facing up
         # Facing up
-        elif self.agentDir == 3:
-            topX = self.agentPos[0] - AGENT_VIEW_SIZE // 2
-            topY = self.agentPos[1] - AGENT_VIEW_SIZE + 1
+        elif self.agent_dir == 3:
+            topX = self.agent_pos[0] - AGENT_VIEW_SIZE // 2
+            topY = self.agent_pos[1] - AGENT_VIEW_SIZE + 1
         else:
         else:
             assert False, "invalid agent direction"
             assert False, "invalid agent direction"
 
 
@@ -855,35 +864,35 @@ class MiniGridEnv(gym.Env):
         return (x >= topX and x < botX and y >= topY and y < botY)
         return (x >= topX and x < botX and y >= topY and y < botY)
 
 
     def step(self, action):
     def step(self, action):
-        self.stepCount += 1
+        self.step_count += 1
 
 
         reward = 0
         reward = 0
         done = False
         done = False
 
 
         # Get the position in front of the agent
         # Get the position in front of the agent
         u, v = self.getDirVec()
         u, v = self.getDirVec()
-        fwdPos = (self.agentPos[0] + u, self.agentPos[1] + v)
+        fwdPos = (self.agent_pos[0] + u, self.agent_pos[1] + v)
 
 
         # Get the contents of the cell in front of the agent
         # Get the contents of the cell in front of the agent
         fwdCell = self.grid.get(*fwdPos)
         fwdCell = self.grid.get(*fwdPos)
 
 
         # Rotate left
         # Rotate left
         if action == self.actions.left:
         if action == self.actions.left:
-            self.agentDir -= 1
-            if self.agentDir < 0:
-                self.agentDir += 4
+            self.agent_dir -= 1
+            if self.agent_dir < 0:
+                self.agent_dir += 4
 
 
         # Rotate right
         # Rotate right
         elif action == self.actions.right:
         elif action == self.actions.right:
-            self.agentDir = (self.agentDir + 1) % 4
+            self.agent_dir = (self.agent_dir + 1) % 4
 
 
         # Move forward
         # Move forward
         elif action == self.actions.forward:
         elif action == self.actions.forward:
             if fwdCell == None or fwdCell.canOverlap():
             if fwdCell == None or fwdCell.canOverlap():
-                self.agentPos = fwdPos
+                self.agent_pos = fwdPos
             if fwdCell != None and fwdCell.type == 'goal':
             if fwdCell != None and fwdCell.type == 'goal':
                 done = True
                 done = True
-                reward = 1000 - self.stepCount
+                reward = 1000 - self.step_count
 
 
         # Pick up an object
         # Pick up an object
         elif action == self.actions.pickup:
         elif action == self.actions.pickup:
@@ -910,7 +919,7 @@ class MiniGridEnv(gym.Env):
         else:
         else:
             assert False, "unknown action"
             assert False, "unknown action"
 
 
-        if self.stepCount >= self.maxSteps:
+        if self.step_count >= self.max_steps:
             done = True
             done = True
 
 
         obs = self._genObs()
         obs = self._genObs()
@@ -926,17 +935,17 @@ class MiniGridEnv(gym.Env):
 
 
         grid = self.grid.slice(topX, topY, AGENT_VIEW_SIZE, AGENT_VIEW_SIZE)
         grid = self.grid.slice(topX, topY, AGENT_VIEW_SIZE, AGENT_VIEW_SIZE)
 
 
-        for i in range(self.agentDir + 1):
+        for i in range(self.agent_dir + 1):
             grid = grid.rotateLeft()
             grid = grid.rotateLeft()
 
 
         # Make it so the agent sees what it's carrying
         # Make it so the agent sees what it's carrying
         # We do this by placing the carried object at the agent's position
         # We do this by placing the carried object at the agent's position
         # in the agent's partially observable view
         # in the agent's partially observable view
-        agentPos = grid.width // 2, grid.height - 1
+        agent_pos = grid.width // 2, grid.height - 1
         if self.carrying:
         if self.carrying:
-            grid.set(*agentPos, self.carrying)
+            grid.set(*agent_pos, self.carrying)
         else:
         else:
-            grid.set(*agentPos, None)
+            grid.set(*agent_pos, None)
 
 
         # Encode the partially observable view into a numpy array
         # Encode the partially observable view into a numpy array
         image = grid.encode()
         image = grid.encode()
@@ -949,7 +958,7 @@ class MiniGridEnv(gym.Env):
         # - a textual mission string (instructions for the agent)
         # - a textual mission string (instructions for the agent)
         obs = {
         obs = {
             'image': image,
             'image': image,
-            'direction': self.agentDir,
+            'direction': self.agent_dir,
             'mission': self.mission
             'mission': self.mission
         }
         }
 
 
@@ -960,13 +969,13 @@ class MiniGridEnv(gym.Env):
         Render an agent observation for visualization
         Render an agent observation for visualization
         """
         """
 
 
-        if self.obsRender == None:
-            self.obsRender = Renderer(
+        if self.obs_render == None:
+            self.obs_render = Renderer(
                 AGENT_VIEW_SIZE * CELL_PIXELS // 2,
                 AGENT_VIEW_SIZE * CELL_PIXELS // 2,
                 AGENT_VIEW_SIZE * CELL_PIXELS // 2
                 AGENT_VIEW_SIZE * CELL_PIXELS // 2
             )
             )
 
 
-        r = self.obsRender
+        r = self.obs_render
 
 
         r.beginFrame()
         r.beginFrame()
 
 
@@ -1002,18 +1011,18 @@ class MiniGridEnv(gym.Env):
         """
         """
 
 
         if close:
         if close:
-            if self.gridRender:
-                self.gridRender.close()
+            if self.grid_render:
+                self.grid_render.close()
             return
             return
 
 
-        if self.gridRender is None:
-            self.gridRender = Renderer(
-                self.gridSize * CELL_PIXELS,
-                self.gridSize * CELL_PIXELS,
+        if self.grid_render is None:
+            self.grid_render = Renderer(
+                self.grid_size * CELL_PIXELS,
+                self.grid_size * CELL_PIXELS,
                 True if mode == 'human' else False
                 True if mode == 'human' else False
             )
             )
 
 
-        r = self.gridRender
+        r = self.grid_render
 
 
         r.beginFrame()
         r.beginFrame()
 
 
@@ -1023,10 +1032,10 @@ class MiniGridEnv(gym.Env):
         # Draw the agent
         # Draw the agent
         r.push()
         r.push()
         r.translate(
         r.translate(
-            CELL_PIXELS * (self.agentPos[0] + 0.5),
-            CELL_PIXELS * (self.agentPos[1] + 0.5)
+            CELL_PIXELS * (self.agent_pos[0] + 0.5),
+            CELL_PIXELS * (self.agent_pos[1] + 0.5)
         )
         )
-        r.rotate(self.agentDir * 90)
+        r.rotate(self.agent_dir * 90)
         r.setLineColor(255, 0, 0)
         r.setLineColor(255, 0, 0)
         r.setColor(255, 0, 0)
         r.setColor(255, 0, 0)
         r.drawPolygon([
         r.drawPolygon([

+ 3 - 4
run_tests.py

@@ -36,7 +36,8 @@ for envName in envList:
     env.reset()
     env.reset()
 
 
     # Run for a few episodes
     # Run for a few episodes
-    for i in range(5 * env.maxSteps):
+    num_episodes = 0
+    while num_episodes < 5:
         # Pick a random action
         # Pick a random action
         action = random.randint(0, env.action_space.n - 1)
         action = random.randint(0, env.action_space.n - 1)
 
 
@@ -53,11 +54,9 @@ for envName in envList:
         assert reward <= env.reward_range[1], reward
         assert reward <= env.reward_range[1], reward
 
 
         if done:
         if done:
+            num_episodes += 1
             env.reset()
             env.reset()
 
 
-            # Check that the agent doesn't overlap with an object
-            assert env.grid.get(*env.agentPos) is None
-
         env.render('rgb_array')
         env.render('rgb_array')
 
 
     env.close()
     env.close()

+ 1 - 1
standalone.py

@@ -67,7 +67,7 @@ def main():
 
 
         obs, reward, done, info = env.step(action)
         obs, reward, done, info = env.step(action)
 
 
-        print('step=%s, reward=%s' % (env.stepCount, reward))
+        print('step=%s, reward=%s' % (env.step_count, reward))
 
 
         if done:
         if done:
             print('done!')
             print('done!')