multiroom.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. from gym_minigrid.minigrid import (
  2. COLOR_NAMES,
  3. Door,
  4. Goal,
  5. Grid,
  6. MiniGridEnv,
  7. MissionSpace,
  8. Wall,
  9. )
  10. class MultiRoom:
  11. def __init__(self, top, size, entryDoorPos, exitDoorPos):
  12. self.top = top
  13. self.size = size
  14. self.entryDoorPos = entryDoorPos
  15. self.exitDoorPos = exitDoorPos
  16. class MultiRoomEnv(MiniGridEnv):
  17. """
  18. ### Description
  19. This environment has a series of connected rooms with doors that must be
  20. opened in order to get to the next room. The final room has the green goal
  21. square the agent must get to. This environment is extremely difficult to
  22. solve using RL alone. However, by gradually increasing the number of rooms
  23. and building a curriculum, the environment can be solved.
  24. ### Mission Space
  25. "traverse the rooms to get to the goal"
  26. ### Action Space
  27. | Num | Name | Action |
  28. |-----|--------------|---------------------------|
  29. | 0 | left | Turn left |
  30. | 1 | right | Turn right |
  31. | 2 | forward | Move forward |
  32. | 3 | pickup | Unused |
  33. | 4 | drop | Unused |
  34. | 5 | toggle | Toggle/activate an object |
  35. | 6 | done | Unused |
  36. ### Observation Encoding
  37. - Each tile is encoded as a 3 dimensional tuple:
  38. `(OBJECT_IDX, COLOR_IDX, STATE)`
  39. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  40. [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
  41. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  42. ### Rewards
  43. A reward of '1' is given for success, and '0' for failure.
  44. ### Termination
  45. The episode ends if any one of the following conditions is met:
  46. 1. The agent reaches the goal.
  47. 2. Timeout (see `max_steps`).
  48. ### Registered Configurations
  49. S: size of map SxS.
  50. N: number of rooms.
  51. - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
  52. - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
  53. - `MiniGrid-MultiRoom-N6-v0` (six rooms)
  54. """
  55. def __init__(self, minNumRooms, maxNumRooms, maxRoomSize=10, **kwargs):
  56. assert minNumRooms > 0
  57. assert maxNumRooms >= minNumRooms
  58. assert maxRoomSize >= 4
  59. self.minNumRooms = minNumRooms
  60. self.maxNumRooms = maxNumRooms
  61. self.maxRoomSize = maxRoomSize
  62. self.rooms = []
  63. mission_space = MissionSpace(
  64. mission_func=lambda: "traverse the rooms to get to the goal"
  65. )
  66. self.size = 25
  67. super().__init__(
  68. mission_space=mission_space,
  69. width=self.size,
  70. height=self.size,
  71. max_steps=self.maxNumRooms * 20,
  72. **kwargs
  73. )
  74. def _gen_grid(self, width, height):
  75. roomList = []
  76. # Choose a random number of rooms to generate
  77. numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)
  78. while len(roomList) < numRooms:
  79. curRoomList = []
  80. entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))
  81. # Recursively place the rooms
  82. self._placeRoom(
  83. numRooms,
  84. roomList=curRoomList,
  85. minSz=4,
  86. maxSz=self.maxRoomSize,
  87. entryDoorWall=2,
  88. entryDoorPos=entryDoorPos,
  89. )
  90. if len(curRoomList) > len(roomList):
  91. roomList = curRoomList
  92. # Store the list of rooms in this environment
  93. assert len(roomList) > 0
  94. self.rooms = roomList
  95. # Create the grid
  96. self.grid = Grid(width, height)
  97. wall = Wall()
  98. prevDoorColor = None
  99. # For each room
  100. for idx, room in enumerate(roomList):
  101. topX, topY = room.top
  102. sizeX, sizeY = room.size
  103. # Draw the top and bottom walls
  104. for i in range(0, sizeX):
  105. self.grid.set(topX + i, topY, wall)
  106. self.grid.set(topX + i, topY + sizeY - 1, wall)
  107. # Draw the left and right walls
  108. for j in range(0, sizeY):
  109. self.grid.set(topX, topY + j, wall)
  110. self.grid.set(topX + sizeX - 1, topY + j, wall)
  111. # If this isn't the first room, place the entry door
  112. if idx > 0:
  113. # Pick a door color different from the previous one
  114. doorColors = set(COLOR_NAMES)
  115. if prevDoorColor:
  116. doorColors.remove(prevDoorColor)
  117. # Note: the use of sorting here guarantees determinism,
  118. # This is needed because Python's set is not deterministic
  119. doorColor = self._rand_elem(sorted(doorColors))
  120. entryDoor = Door(doorColor)
  121. self.grid.set(room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)
  122. prevDoorColor = doorColor
  123. prevRoom = roomList[idx - 1]
  124. prevRoom.exitDoorPos = room.entryDoorPos
  125. # Randomize the starting agent position and direction
  126. self.place_agent(roomList[0].top, roomList[0].size)
  127. # Place the final goal in the last room
  128. self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
  129. self.mission = "traverse the rooms to get to the goal"
  130. def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall, entryDoorPos):
  131. # Choose the room size randomly
  132. sizeX = self._rand_int(minSz, maxSz + 1)
  133. sizeY = self._rand_int(minSz, maxSz + 1)
  134. # The first room will be at the door position
  135. if len(roomList) == 0:
  136. topX, topY = entryDoorPos
  137. # Entry on the right
  138. elif entryDoorWall == 0:
  139. topX = entryDoorPos[0] - sizeX + 1
  140. y = entryDoorPos[1]
  141. topY = self._rand_int(y - sizeY + 2, y)
  142. # Entry wall on the south
  143. elif entryDoorWall == 1:
  144. x = entryDoorPos[0]
  145. topX = self._rand_int(x - sizeX + 2, x)
  146. topY = entryDoorPos[1] - sizeY + 1
  147. # Entry wall on the left
  148. elif entryDoorWall == 2:
  149. topX = entryDoorPos[0]
  150. y = entryDoorPos[1]
  151. topY = self._rand_int(y - sizeY + 2, y)
  152. # Entry wall on the top
  153. elif entryDoorWall == 3:
  154. x = entryDoorPos[0]
  155. topX = self._rand_int(x - sizeX + 2, x)
  156. topY = entryDoorPos[1]
  157. else:
  158. assert False, entryDoorWall
  159. # If the room is out of the grid, can't place a room here
  160. if topX < 0 or topY < 0:
  161. return False
  162. if topX + sizeX > self.width or topY + sizeY >= self.height:
  163. return False
  164. # If the room intersects with previous rooms, can't place it here
  165. for room in roomList[:-1]:
  166. nonOverlap = (
  167. topX + sizeX < room.top[0]
  168. or room.top[0] + room.size[0] <= topX
  169. or topY + sizeY < room.top[1]
  170. or room.top[1] + room.size[1] <= topY
  171. )
  172. if not nonOverlap:
  173. return False
  174. # Add this room to the list
  175. roomList.append(MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))
  176. # If this was the last room, stop
  177. if numLeft == 1:
  178. return True
  179. # Try placing the next room
  180. for i in range(0, 8):
  181. # Pick which wall to place the out door on
  182. wallSet = {0, 1, 2, 3}
  183. wallSet.remove(entryDoorWall)
  184. exitDoorWall = self._rand_elem(sorted(wallSet))
  185. nextEntryWall = (exitDoorWall + 2) % 4
  186. # Pick the exit door position
  187. # Exit on right wall
  188. if exitDoorWall == 0:
  189. exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))
  190. # Exit on south wall
  191. elif exitDoorWall == 1:
  192. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
  193. # Exit on left wall
  194. elif exitDoorWall == 2:
  195. exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))
  196. # Exit on north wall
  197. elif exitDoorWall == 3:
  198. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)
  199. else:
  200. assert False
  201. # Recursively create the other rooms
  202. success = self._placeRoom(
  203. numLeft - 1,
  204. roomList=roomList,
  205. minSz=minSz,
  206. maxSz=maxSz,
  207. entryDoorWall=nextEntryWall,
  208. entryDoorPos=exitDoorPos,
  209. )
  210. if success:
  211. break
  212. return True