浏览代码

Added tests. Moved envs into own source files.

Maxime Chevalier-Boisvert 7 年之前
父节点
当前提交
5daf219a68
共有 6 个文件被更改,包括 399 次插入384 次删除
  1. 2 0
      gym_minigrid/envs/__init__.py
  2. 115 0
      gym_minigrid/envs/fetch.py
  3. 0 6
      gym_minigrid/envs/fourroomqa.py
  4. 264 0
      gym_minigrid/envs/multiroom.py
  5. 0 374
      gym_minigrid/envs/simple_envs.py
  6. 18 4
      run_tests.py

+ 2 - 0
gym_minigrid/envs/__init__.py

@@ -1,2 +1,4 @@
 from gym_minigrid.envs.simple_envs import *
+from gym_minigrid.envs.multiroom import *
+from gym_minigrid.envs.fetch import *
 from gym_minigrid.envs.fourroomqa import *

+ 115 - 0
gym_minigrid/envs/fetch.py

@@ -0,0 +1,115 @@
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+
+class FetchEnv(MiniGridEnv):
+    """
+    Environment in which the agent has to fetch a random object
+    named using English text strings
+    """
+
+    def __init__(
+        self,
+        size=8,
+        numObjs=3
+    ):
+        self.numObjs = numObjs
+        super().__init__(gridSize=size, maxSteps=5*size)
+
+    def _genGrid(self, width, height):
+        assert width == height
+        gridSz = width
+
+        # Create a grid surrounded by walls
+        grid = Grid(width, height)
+        for i in range(0, width):
+            grid.set(i, 0, Wall())
+            grid.set(i, height-1, Wall())
+        for j in range(0, height):
+            grid.set(0, j, Wall())
+            grid.set(width-1, j, Wall())
+
+        types = ['key', 'ball']
+        colors = list(COLORS.keys())
+
+        objs = []
+
+        # For each object to be generated
+        for i in range(0, self.numObjs):
+            objType = self._randElem(types)
+            objColor = self._randElem(colors)
+
+            if objType == 'key':
+                obj = Key(objColor)
+            elif objType == 'ball':
+                obj = Ball(objColor)
+
+            while True:
+                pos = (
+                    self._randInt(1, gridSz - 1),
+                    self._randInt(1, gridSz - 1)
+                )
+
+                if pos != self.startPos:
+                    grid.set(*pos, obj)
+                    break
+
+            objs.append(obj)
+
+        # Choose a random object to be picked up
+        target = objs[self._randInt(0, len(objs))]
+        self.targetType = target.type
+        self.targetColor = target.color
+
+        descStr = '%s %s' % (self.targetColor, self.targetType)
+
+        # Generate the mission string
+        idx = self._randInt(0, 5)
+        if idx == 0:
+            self.mission = 'get a %s' % descStr
+        elif idx == 1:
+            self.mission = 'go get a %s' % descStr
+        elif idx == 2:
+            self.mission = 'fetch a %s' % descStr
+        elif idx == 3:
+            self.mission = 'go fetch a %s' % descStr
+        elif idx == 4:
+            self.mission = 'you must fetch a %s' % descStr
+        assert hasattr(self, 'mission')
+
+        return grid
+
+    def _reset(self):
+        obs = MiniGridEnv._reset(self)
+
+        obs = {
+            'image': obs,
+            'mission': self.mission,
+            'advice' : ''
+        }
+
+        return obs
+
+    def _step(self, action):
+        obs, reward, done, info = MiniGridEnv._step(self, action)
+
+        if self.carrying:
+            if self.carrying.color == self.targetColor and \
+               self.carrying.type == self.targetType:
+                reward = 1000 - self.stepCount
+                done = True
+            else:
+                reward = -1
+                done = True
+
+        obs = {
+            'image': obs,
+            'mission': self.mission,
+            'advice': ''
+        }
+
+        return obs, reward, done, info
+
+register(
+    id='MiniGrid-Fetch-8x8-v0',
+    entry_point='gym_minigrid.envs:FetchEnv'
+)

+ 0 - 6
gym_minigrid/envs/fourroomqa.py

@@ -181,12 +181,6 @@ class FourRoomQAEnv(MiniGridEnv):
 
         # TODO: how many X in the Y room question type
 
-
-
-
-
-
-
         print(self.question)
         print(self.answer)
 

+ 264 - 0
gym_minigrid/envs/multiroom.py

@@ -0,0 +1,264 @@
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+
+class Room:
+    def __init__(self,
+        top,
+        size,
+        entryDoorPos,
+        exitDoorPos
+    ):
+        self.top = top
+        self.size = size
+        self.entryDoorPos = entryDoorPos
+        self.exitDoorPos = exitDoorPos
+
+class MultiRoomEnv(MiniGridEnv):
+    """
+    Environment with multiple rooms (subgoals)
+    """
+
+    def __init__(self,
+        minNumRooms,
+        maxNumRooms,
+        maxRoomSize=10
+    ):
+        assert minNumRooms > 0
+        assert maxNumRooms >= minNumRooms
+        assert maxRoomSize >= 4
+
+        self.minNumRooms = minNumRooms
+        self.maxNumRooms = maxNumRooms
+        self.maxRoomSize = maxRoomSize
+
+        self.rooms = []
+
+        super(MultiRoomEnv, self).__init__(
+            gridSize=25,
+            maxSteps=self.maxNumRooms * 20
+        )
+
+    def _genGrid(self, width, height):
+
+        roomList = []
+
+        # Choose a random number of rooms to generate
+        numRooms = self._randInt(self.minNumRooms, self.maxNumRooms+1)
+
+        while len(roomList) < numRooms:
+            curRoomList = []
+
+            entryDoorPos = (
+                self._randInt(0, width - 2),
+                self._randInt(0, width - 2)
+            )
+
+            # Recursively place the rooms
+            self._placeRoom(
+                numRooms,
+                roomList=curRoomList,
+                minSz=4,
+                maxSz=self.maxRoomSize,
+                entryDoorWall=2,
+                entryDoorPos=entryDoorPos
+            )
+
+            if len(curRoomList) > len(roomList):
+                roomList = curRoomList
+
+        # Store the list of rooms in this environment
+        assert len(roomList) > 0
+        self.rooms = roomList
+
+        # Randomize the starting agent position and direction
+        topX, topY = roomList[0].top
+        sizeX, sizeY = roomList[0].size
+        self.startPos = (
+            self._randInt(topX + 1, topX + sizeX - 2),
+            self._randInt(topY + 1, topY + sizeY - 2)
+        )
+        self.startDir = self._randInt(0, 4)
+
+        # Create the grid
+        grid = Grid(width, height)
+        wall = Wall()
+
+        prevDoorColor = None
+
+        # For each room
+        for idx, room in enumerate(roomList):
+
+            topX, topY = room.top
+            sizeX, sizeY = room.size
+
+            # Draw the top and bottom walls
+            for i in range(0, sizeX):
+                grid.set(topX + i, topY, wall)
+                grid.set(topX + i, topY + sizeY - 1, wall)
+
+            # Draw the left and right walls
+            for j in range(0, sizeY):
+                grid.set(topX, topY + j, wall)
+                grid.set(topX + sizeX - 1, topY + j, wall)
+
+            # If this isn't the first room, place the entry door
+            if idx > 0:
+                # Pick a door color different from the previous one
+                doorColors = set(COLORS.keys())
+                if prevDoorColor:
+                    doorColors.remove(prevDoorColor)
+                doorColor = self._randElem(doorColors)
+
+                entryDoor = Door(doorColor)
+                grid.set(*room.entryDoorPos, entryDoor)
+                prevDoorColor = doorColor
+
+                prevRoom = roomList[idx-1]
+                prevRoom.exitDoorPos = room.entryDoorPos
+
+        # Place the final goal
+        while True:
+            self.goalPos = (
+                self._randInt(topX + 1, topX + sizeX - 1),
+                self._randInt(topY + 1, topY + sizeY - 1)
+            )
+
+            # Make sure the goal doesn't overlap with the agent
+            if self.goalPos != self.startPos:
+                grid.set(*self.goalPos, Goal())
+                break
+
+        return grid
+
+    def _placeRoom(
+        self,
+        numLeft,
+        roomList,
+        minSz,
+        maxSz,
+        entryDoorWall,
+        entryDoorPos
+    ):
+        # Choose the room size randomly
+        sizeX = self._randInt(minSz, maxSz+1)
+        sizeY = self._randInt(minSz, maxSz+1)
+
+        # The first room will be at the door position
+        if len(roomList) == 0:
+            topX, topY = entryDoorPos
+        # Entry on the right
+        elif entryDoorWall == 0:
+            topX = entryDoorPos[0] - sizeX + 1
+            y = entryDoorPos[1]
+            topY = self._randInt(y - sizeY + 2, y)
+        # Entry wall on the south
+        elif entryDoorWall == 1:
+            x = entryDoorPos[0]
+            topX = self._randInt(x - sizeX + 2, x)
+            topY = entryDoorPos[1] - sizeY + 1
+        # Entry wall on the left
+        elif entryDoorWall == 2:
+            topX = entryDoorPos[0]
+            y = entryDoorPos[1]
+            topY = self._randInt(y - sizeY + 2, y)
+        # Entry wall on the top
+        elif entryDoorWall == 3:
+            x = entryDoorPos[0]
+            topX = self._randInt(x - sizeX + 2, x)
+            topY = entryDoorPos[1]
+        else:
+            assert False, entryDoorWall
+
+        # If the room is out of the grid, can't place a room here
+        if topX < 0 or topY < 0:
+            return False
+        if topX + sizeX > self.gridSize or topY + sizeY >= self.gridSize:
+            return False
+
+        # If the room intersects with previous rooms, can't place it here
+        for room in roomList[:-1]:
+            nonOverlap = \
+                topX + sizeX < room.top[0] or \
+                room.top[0] + room.size[0] <= topX or \
+                topY + sizeY < room.top[1] or \
+                room.top[1] + room.size[1] <= topY
+
+            if not nonOverlap:
+                return False
+
+        # Add this room to the list
+        roomList.append(Room(
+            (topX, topY),
+            (sizeX, sizeY),
+            entryDoorPos,
+            None
+        ))
+
+        # If this was the last room, stop
+        if numLeft == 1:
+            return True
+
+        # Try placing the next room
+        for i in range(0, 8):
+
+            # Pick which wall to place the out door on
+            wallSet = set((0, 1, 2, 3))
+            wallSet.remove(entryDoorWall)
+            exitDoorWall = self._randElem(wallSet)
+            nextEntryWall = (exitDoorWall + 2) % 4
+
+            # Pick the exit door position
+            # Exit on right wall
+            if exitDoorWall == 0:
+                exitDoorPos = (
+                    topX + sizeX - 1,
+                    topY + self._randInt(1, sizeY - 1)
+                )
+            # Exit on south wall
+            elif exitDoorWall == 1:
+                exitDoorPos = (
+                    topX + self._randInt(1, sizeX - 1),
+                    topY + sizeY - 1
+                )
+            # Exit on left wall
+            elif exitDoorWall == 2:
+                exitDoorPos = (
+                    topX,
+                    topY + self._randInt(1, sizeY - 1)
+                )
+            # Exit on north wall
+            elif exitDoorWall == 3:
+                exitDoorPos = (
+                    topX + self._randInt(1, sizeX - 1),
+                    topY
+                )
+            else:
+                assert False
+
+            # Recursively create the other rooms
+            success = self._placeRoom(
+                numLeft - 1,
+                roomList=roomList,
+                minSz=minSz,
+                maxSz=maxSz,
+                entryDoorWall=nextEntryWall,
+                entryDoorPos=exitDoorPos
+            )
+
+            if success:
+                break
+
+        return True
+
+class MultiRoomEnvN6(MultiRoomEnv):
+    def __init__(self):
+        super(MultiRoomEnvN6, self).__init__(
+            minNumRooms=6,
+            maxNumRooms=6
+        )
+
+register(
+    id='MiniGrid-MultiRoom-N6-v0',
+    entry_point='gym_minigrid.envs:MultiRoomEnvN6',
+    reward_threshold=1000.0
+)

+ 0 - 374
gym_minigrid/envs/simple_envs.py

@@ -92,377 +92,3 @@ register(
     id='MiniGrid-DoorKey-16x16-v0',
     entry_point='gym_minigrid.envs:DoorKeyEnv16x16'
 )
-
-class Room:
-    def __init__(self,
-        top,
-        size,
-        entryDoorPos,
-        exitDoorPos
-    ):
-        self.top = top
-        self.size = size
-        self.entryDoorPos = entryDoorPos
-        self.exitDoorPos = exitDoorPos
-
-class MultiRoomEnv(MiniGridEnv):
-    """
-    Environment with multiple rooms (subgoals)
-    """
-
-    def __init__(self,
-        minNumRooms,
-        maxNumRooms,
-        maxRoomSize=10
-    ):
-        assert minNumRooms > 0
-        assert maxNumRooms >= minNumRooms
-        assert maxRoomSize >= 4
-
-        self.minNumRooms = minNumRooms
-        self.maxNumRooms = maxNumRooms
-        self.maxRoomSize = maxRoomSize
-
-        self.rooms = []
-
-        super(MultiRoomEnv, self).__init__(
-            gridSize=25,
-            maxSteps=self.maxNumRooms * 20
-        )
-
-    def _genGrid(self, width, height):
-
-        roomList = []
-
-        # Choose a random number of rooms to generate
-        numRooms = self._randInt(self.minNumRooms, self.maxNumRooms+1)
-
-        while len(roomList) < numRooms:
-            curRoomList = []
-
-            entryDoorPos = (
-                self._randInt(0, width - 2),
-                self._randInt(0, width - 2)
-            )
-
-            # Recursively place the rooms
-            self._placeRoom(
-                numRooms,
-                roomList=curRoomList,
-                minSz=4,
-                maxSz=self.maxRoomSize,
-                entryDoorWall=2,
-                entryDoorPos=entryDoorPos
-            )
-
-            if len(curRoomList) > len(roomList):
-                roomList = curRoomList
-
-        # Store the list of rooms in this environment
-        assert len(roomList) > 0
-        self.rooms = roomList
-
-        # Randomize the starting agent position and direction
-        topX, topY = roomList[0].top
-        sizeX, sizeY = roomList[0].size
-        self.startPos = (
-            self._randInt(topX + 1, topX + sizeX - 2),
-            self._randInt(topY + 1, topY + sizeY - 2)
-        )
-        self.startDir = self._randInt(0, 4)
-
-        # Create the grid
-        grid = Grid(width, height)
-        wall = Wall()
-
-        prevDoorColor = None
-
-        # For each room
-        for idx, room in enumerate(roomList):
-
-            topX, topY = room.top
-            sizeX, sizeY = room.size
-
-            # Draw the top and bottom walls
-            for i in range(0, sizeX):
-                grid.set(topX + i, topY, wall)
-                grid.set(topX + i, topY + sizeY - 1, wall)
-
-            # Draw the left and right walls
-            for j in range(0, sizeY):
-                grid.set(topX, topY + j, wall)
-                grid.set(topX + sizeX - 1, topY + j, wall)
-
-            # If this isn't the first room, place the entry door
-            if idx > 0:
-                # Pick a door color different from the previous one
-                doorColors = set(COLORS.keys())
-                if prevDoorColor:
-                    doorColors.remove(prevDoorColor)
-                doorColor = self._randElem(doorColors)
-
-                entryDoor = Door(doorColor)
-                grid.set(*room.entryDoorPos, entryDoor)
-                prevDoorColor = doorColor
-
-                prevRoom = roomList[idx-1]
-                prevRoom.exitDoorPos = room.entryDoorPos
-
-        # Place the final goal
-        while True:
-            self.goalPos = (
-                self._randInt(topX + 1, topX + sizeX - 1),
-                self._randInt(topY + 1, topY + sizeY - 1)
-            )
-
-            # Make sure the goal doesn't overlap with the agent
-            if self.goalPos != self.startPos:
-                grid.set(*self.goalPos, Goal())
-                break
-
-        return grid
-
-    def _placeRoom(
-        self,
-        numLeft,
-        roomList,
-        minSz,
-        maxSz,
-        entryDoorWall,
-        entryDoorPos
-    ):
-        # Choose the room size randomly
-        sizeX = self._randInt(minSz, maxSz+1)
-        sizeY = self._randInt(minSz, maxSz+1)
-
-        # The first room will be at the door position
-        if len(roomList) == 0:
-            topX, topY = entryDoorPos
-        # Entry on the right
-        elif entryDoorWall == 0:
-            topX = entryDoorPos[0] - sizeX + 1
-            y = entryDoorPos[1]
-            topY = self._randInt(y - sizeY + 2, y)
-        # Entry wall on the south
-        elif entryDoorWall == 1:
-            x = entryDoorPos[0]
-            topX = self._randInt(x - sizeX + 2, x)
-            topY = entryDoorPos[1] - sizeY + 1
-        # Entry wall on the left
-        elif entryDoorWall == 2:
-            topX = entryDoorPos[0]
-            y = entryDoorPos[1]
-            topY = self._randInt(y - sizeY + 2, y)
-        # Entry wall on the top
-        elif entryDoorWall == 3:
-            x = entryDoorPos[0]
-            topX = self._randInt(x - sizeX + 2, x)
-            topY = entryDoorPos[1]
-        else:
-            assert False, entryDoorWall
-
-        # If the room is out of the grid, can't place a room here
-        if topX < 0 or topY < 0:
-            return False
-        if topX + sizeX > self.gridSize or topY + sizeY >= self.gridSize:
-            return False
-
-        # If the room intersects with previous rooms, can't place it here
-        for room in roomList[:-1]:
-            nonOverlap = \
-                topX + sizeX < room.top[0] or \
-                room.top[0] + room.size[0] <= topX or \
-                topY + sizeY < room.top[1] or \
-                room.top[1] + room.size[1] <= topY
-
-            if not nonOverlap:
-                return False
-
-        # Add this room to the list
-        roomList.append(Room(
-            (topX, topY),
-            (sizeX, sizeY),
-            entryDoorPos,
-            None
-        ))
-
-        # If this was the last room, stop
-        if numLeft == 1:
-            return True
-
-        # Try placing the next room
-        for i in range(0, 8):
-
-            # Pick which wall to place the out door on
-            wallSet = set((0, 1, 2, 3))
-            wallSet.remove(entryDoorWall)
-            exitDoorWall = self._randElem(wallSet)
-            nextEntryWall = (exitDoorWall + 2) % 4
-
-            # Pick the exit door position
-            # Exit on right wall
-            if exitDoorWall == 0:
-                exitDoorPos = (
-                    topX + sizeX - 1,
-                    topY + self._randInt(1, sizeY - 1)
-                )
-            # Exit on south wall
-            elif exitDoorWall == 1:
-                exitDoorPos = (
-                    topX + self._randInt(1, sizeX - 1),
-                    topY + sizeY - 1
-                )
-            # Exit on left wall
-            elif exitDoorWall == 2:
-                exitDoorPos = (
-                    topX,
-                    topY + self._randInt(1, sizeY - 1)
-                )
-            # Exit on north wall
-            elif exitDoorWall == 3:
-                exitDoorPos = (
-                    topX + self._randInt(1, sizeX - 1),
-                    topY
-                )
-            else:
-                assert False
-
-            # Recursively create the other rooms
-            success = self._placeRoom(
-                numLeft - 1,
-                roomList=roomList,
-                minSz=minSz,
-                maxSz=maxSz,
-                entryDoorWall=nextEntryWall,
-                entryDoorPos=exitDoorPos
-            )
-
-            if success:
-                break
-
-        return True
-
-class MultiRoomEnvN6(MultiRoomEnv):
-    def __init__(self):
-        super(MultiRoomEnvN6, self).__init__(
-            minNumRooms=6,
-            maxNumRooms=6
-        )
-
-register(
-    id='MiniGrid-MultiRoom-N6-v0',
-    entry_point='gym_minigrid.envs:MultiRoomEnvN6',
-    reward_threshold=1000.0
-)
-
-class FetchEnv(MiniGridEnv):
-    """
-    Environment in which the agent has to fetch a random object
-    named using English text strings
-    """
-
-    def __init__(
-        self,
-        size=8,
-        numObjs=3):
-        self.numObjs = numObjs
-        super(FetchEnv, self).__init__(gridSize=size, maxSteps=5*size)
-
-    def _genGrid(self, width, height):
-        assert width == height
-        gridSz = width
-
-        # Create a grid surrounded by walls
-        grid = Grid(width, height)
-        for i in range(0, width):
-            grid.set(i, 0, Wall())
-            grid.set(i, height-1, Wall())
-        for j in range(0, height):
-            grid.set(0, j, Wall())
-            grid.set(width-1, j, Wall())
-
-        types = ['key', 'ball']
-        colors = list(COLORS.keys())
-
-        objs = []
-
-        # For each object to be generated
-        for i in range(0, self.numObjs):
-            objType = self._randElem(types)
-            objColor = self._randElem(colors)
-
-            if objType == 'key':
-                obj = Key(objColor)
-            elif objType == 'ball':
-                obj = Ball(objColor)
-
-            while True:
-                pos = (
-                    self._randInt(1, gridSz - 1),
-                    self._randInt(1, gridSz - 1)
-                )
-
-                if pos != self.startPos:
-                    grid.set(*pos, obj)
-                    break
-
-            objs.append(obj)
-
-        # Choose a random object to be picked up
-        target = objs[self._randInt(0, len(objs))]
-        self.targetType = target.type
-        self.targetColor = target.color
-
-        descStr = '%s %s' % (self.targetColor, self.targetType)
-
-        # Generate the mission string
-        idx = self._randInt(0, 5)
-        if idx == 0:
-            self.mission = 'get a %s' % descStr
-        elif idx == 1:
-            self.mission = 'go get a %s' % descStr
-        elif idx == 2:
-            self.mission = 'fetch a %s' % descStr
-        elif idx == 3:
-            self.mission = 'go fetch a %s' % descStr
-        elif idx == 4:
-            self.mission = 'you must fetch a %s' % descStr
-        assert hasattr(self, 'mission')
-
-        return grid
-
-    def _reset(self):
-        obs = MiniGridEnv._reset(self)
-
-        obs = {
-            'image': obs,
-            'mission': self.mission,
-            'advice' : ''
-        }
-
-        return obs
-
-    def _step(self, action):
-        obs, reward, done, info = MiniGridEnv._step(self, action)
-
-        if self.carrying:
-            if self.carrying.color == self.targetColor and \
-               self.carrying.type == self.targetType:
-                reward = 1000 - self.stepCount
-                done = True
-            else:
-                reward = -1000
-                done = True
-
-        obs = {
-            'image': obs,
-            'mission': self.mission,
-            'advice': ''
-        }
-
-        return obs, reward, done, info
-
-register(
-    id='MiniGrid-Fetch-8x8-v0',
-    entry_point='gym_minigrid.envs:FetchEnv'
-)

+ 18 - 4
run_tests.py

@@ -2,7 +2,9 @@
 
 import random
 import gym
+import numpy as np
 from gym_minigrid.register import envSet
+from gym_minigrid.minigrid import Grid
 
 print('%d environments registered' % len(envSet))
 
@@ -17,10 +19,22 @@ for envName in sorted(envSet):
     env.seed()
     env.reset()
 
-    numActions = env.action_space.n
-    for i in range(500):
-        action = random.randint(0, numActions - 1)
-        env.step(action)
+    # Run for a few episodes
+    for i in range(5 * env.maxSteps):
+        # Pick a random action
+        action = random.randint(0, env.action_space.n - 1)
+
+        obs, reward, done, info = env.step(action)
+
+        # Test observation encode/decode roundtrip
+        if type(obs) is np.ndarray:
+            grid = Grid.decode(obs)
+            obs2 = grid.encode()
+            assert np.array_equal(obs2, obs)
+
+        assert reward >= env.reward_range[0], reward
+        assert reward <= env.reward_range[1], reward
+
         env.render('rgb_array')
 
     env.close()