multiroom.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. from minigrid.core.constants import COLOR_NAMES
  2. from minigrid.core.grid import Grid
  3. from minigrid.core.mission import MissionSpace
  4. from minigrid.core.world_object import Door, Goal, Wall
  5. from minigrid.minigrid import MiniGridEnv
  6. class MultiRoom:
  7. def __init__(self, top, size, entryDoorPos, exitDoorPos):
  8. self.top = top
  9. self.size = size
  10. self.entryDoorPos = entryDoorPos
  11. self.exitDoorPos = exitDoorPos
  12. class MultiRoomEnv(MiniGridEnv):
  13. """
  14. ### Description
  15. This environment has a series of connected rooms with doors that must be
  16. opened in order to get to the next room. The final room has the green goal
  17. square the agent must get to. This environment is extremely difficult to
  18. solve using RL alone. However, by gradually increasing the number of rooms
  19. and building a curriculum, the environment can be solved.
  20. ### Mission Space
  21. "traverse the rooms to get to the goal"
  22. ### Action Space
  23. | Num | Name | Action |
  24. |-----|--------------|---------------------------|
  25. | 0 | left | Turn left |
  26. | 1 | right | Turn right |
  27. | 2 | forward | Move forward |
  28. | 3 | pickup | Unused |
  29. | 4 | drop | Unused |
  30. | 5 | toggle | Toggle/activate an object |
  31. | 6 | done | Unused |
  32. ### Observation Encoding
  33. - Each tile is encoded as a 3 dimensional tuple:
  34. `(OBJECT_IDX, COLOR_IDX, STATE)`
  35. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  36. [minigrid/minigrid.py](minigrid/minigrid.py)
  37. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  38. ### Rewards
  39. A reward of '1' is given for success, and '0' for failure.
  40. ### Termination
  41. The episode ends if any one of the following conditions is met:
  42. 1. The agent reaches the goal.
  43. 2. Timeout (see `max_steps`).
  44. ### Registered Configurations
  45. S: size of map SxS.
  46. N: number of rooms.
  47. - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
  48. - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
  49. - `MiniGrid-MultiRoom-N6-v0` (six rooms)
  50. """
  51. def __init__(self, minNumRooms, maxNumRooms, maxRoomSize=10, **kwargs):
  52. assert minNumRooms > 0
  53. assert maxNumRooms >= minNumRooms
  54. assert maxRoomSize >= 4
  55. self.minNumRooms = minNumRooms
  56. self.maxNumRooms = maxNumRooms
  57. self.maxRoomSize = maxRoomSize
  58. self.rooms = []
  59. mission_space = MissionSpace(
  60. mission_func=lambda: "traverse the rooms to get to the goal"
  61. )
  62. self.size = 25
  63. super().__init__(
  64. mission_space=mission_space,
  65. width=self.size,
  66. height=self.size,
  67. max_steps=self.maxNumRooms * 20,
  68. **kwargs
  69. )
  70. def _gen_grid(self, width, height):
  71. roomList = []
  72. # Choose a random number of rooms to generate
  73. numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)
  74. while len(roomList) < numRooms:
  75. curRoomList = []
  76. entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))
  77. # Recursively place the rooms
  78. self._placeRoom(
  79. numRooms,
  80. roomList=curRoomList,
  81. minSz=4,
  82. maxSz=self.maxRoomSize,
  83. entryDoorWall=2,
  84. entryDoorPos=entryDoorPos,
  85. )
  86. if len(curRoomList) > len(roomList):
  87. roomList = curRoomList
  88. # Store the list of rooms in this environment
  89. assert len(roomList) > 0
  90. self.rooms = roomList
  91. # Create the grid
  92. self.grid = Grid(width, height)
  93. wall = Wall()
  94. prevDoorColor = None
  95. # For each room
  96. for idx, room in enumerate(roomList):
  97. topX, topY = room.top
  98. sizeX, sizeY = room.size
  99. # Draw the top and bottom walls
  100. for i in range(0, sizeX):
  101. self.grid.set(topX + i, topY, wall)
  102. self.grid.set(topX + i, topY + sizeY - 1, wall)
  103. # Draw the left and right walls
  104. for j in range(0, sizeY):
  105. self.grid.set(topX, topY + j, wall)
  106. self.grid.set(topX + sizeX - 1, topY + j, wall)
  107. # If this isn't the first room, place the entry door
  108. if idx > 0:
  109. # Pick a door color different from the previous one
  110. doorColors = set(COLOR_NAMES)
  111. if prevDoorColor:
  112. doorColors.remove(prevDoorColor)
  113. # Note: the use of sorting here guarantees determinism,
  114. # This is needed because Python's set is not deterministic
  115. doorColor = self._rand_elem(sorted(doorColors))
  116. entryDoor = Door(doorColor)
  117. self.grid.set(room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)
  118. prevDoorColor = doorColor
  119. prevRoom = roomList[idx - 1]
  120. prevRoom.exitDoorPos = room.entryDoorPos
  121. # Randomize the starting agent position and direction
  122. self.place_agent(roomList[0].top, roomList[0].size)
  123. # Place the final goal in the last room
  124. self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
  125. self.mission = "traverse the rooms to get to the goal"
  126. def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall, entryDoorPos):
  127. # Choose the room size randomly
  128. sizeX = self._rand_int(minSz, maxSz + 1)
  129. sizeY = self._rand_int(minSz, maxSz + 1)
  130. # The first room will be at the door position
  131. if len(roomList) == 0:
  132. topX, topY = entryDoorPos
  133. # Entry on the right
  134. elif entryDoorWall == 0:
  135. topX = entryDoorPos[0] - sizeX + 1
  136. y = entryDoorPos[1]
  137. topY = self._rand_int(y - sizeY + 2, y)
  138. # Entry wall on the south
  139. elif entryDoorWall == 1:
  140. x = entryDoorPos[0]
  141. topX = self._rand_int(x - sizeX + 2, x)
  142. topY = entryDoorPos[1] - sizeY + 1
  143. # Entry wall on the left
  144. elif entryDoorWall == 2:
  145. topX = entryDoorPos[0]
  146. y = entryDoorPos[1]
  147. topY = self._rand_int(y - sizeY + 2, y)
  148. # Entry wall on the top
  149. elif entryDoorWall == 3:
  150. x = entryDoorPos[0]
  151. topX = self._rand_int(x - sizeX + 2, x)
  152. topY = entryDoorPos[1]
  153. else:
  154. assert False, entryDoorWall
  155. # If the room is out of the grid, can't place a room here
  156. if topX < 0 or topY < 0:
  157. return False
  158. if topX + sizeX > self.width or topY + sizeY >= self.height:
  159. return False
  160. # If the room intersects with previous rooms, can't place it here
  161. for room in roomList[:-1]:
  162. nonOverlap = (
  163. topX + sizeX < room.top[0]
  164. or room.top[0] + room.size[0] <= topX
  165. or topY + sizeY < room.top[1]
  166. or room.top[1] + room.size[1] <= topY
  167. )
  168. if not nonOverlap:
  169. return False
  170. # Add this room to the list
  171. roomList.append(MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))
  172. # If this was the last room, stop
  173. if numLeft == 1:
  174. return True
  175. # Try placing the next room
  176. for i in range(0, 8):
  177. # Pick which wall to place the out door on
  178. wallSet = {0, 1, 2, 3}
  179. wallSet.remove(entryDoorWall)
  180. exitDoorWall = self._rand_elem(sorted(wallSet))
  181. nextEntryWall = (exitDoorWall + 2) % 4
  182. # Pick the exit door position
  183. # Exit on right wall
  184. if exitDoorWall == 0:
  185. exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))
  186. # Exit on south wall
  187. elif exitDoorWall == 1:
  188. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
  189. # Exit on left wall
  190. elif exitDoorWall == 2:
  191. exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))
  192. # Exit on north wall
  193. elif exitDoorWall == 3:
  194. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)
  195. else:
  196. assert False
  197. # Recursively create the other rooms
  198. success = self._placeRoom(
  199. numLeft - 1,
  200. roomList=roomList,
  201. minSz=minSz,
  202. maxSz=maxSz,
  203. entryDoorWall=nextEntryWall,
  204. entryDoorPos=exitDoorPos,
  205. )
  206. if success:
  207. break
  208. return True