multiroom.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. from typing import Optional
  2. from minigrid.core.constants import COLOR_NAMES
  3. from minigrid.core.grid import Grid
  4. from minigrid.core.mission import MissionSpace
  5. from minigrid.core.world_object import Door, Goal, Wall
  6. from minigrid.minigrid_env import MiniGridEnv
  7. class MultiRoom:
  8. def __init__(self, top, size, entryDoorPos, exitDoorPos):
  9. self.top = top
  10. self.size = size
  11. self.entryDoorPos = entryDoorPos
  12. self.exitDoorPos = exitDoorPos
  13. class MultiRoomEnv(MiniGridEnv):
  14. """
  15. <p>
  16. <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/multi-room.gif" alt="multi-room" width="200px"/>
  17. </p>
  18. ### Description
  19. This environment has a series of connected rooms with doors that must be
  20. opened in order to get to the next room. The final room has the green goal
  21. square the agent must get to. This environment is extremely difficult to
  22. solve using RL alone. However, by gradually increasing the number of rooms
  23. and building a curriculum, the environment can be solved.
  24. ### Mission Space
  25. "traverse the rooms to get to the goal"
  26. ### Action Space
  27. | Num | Name | Action |
  28. |-----|--------------|---------------------------|
  29. | 0 | left | Turn left |
  30. | 1 | right | Turn right |
  31. | 2 | forward | Move forward |
  32. | 3 | pickup | Unused |
  33. | 4 | drop | Unused |
  34. | 5 | toggle | Toggle/activate an object |
  35. | 6 | done | Unused |
  36. ### Observation Encoding
  37. - Each tile is encoded as a 3 dimensional tuple:
  38. `(OBJECT_IDX, COLOR_IDX, STATE)`
  39. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  40. [minigrid/minigrid.py](minigrid/minigrid.py)
  41. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  42. ### Rewards
  43. A reward of '1' is given for success, and '0' for failure.
  44. ### Termination
  45. The episode ends if any one of the following conditions is met:
  46. 1. The agent reaches the goal.
  47. 2. Timeout (see `max_steps`).
  48. ### Registered Configurations
  49. S: size of map SxS.
  50. N: number of rooms.
  51. - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
  52. - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
  53. - `MiniGrid-MultiRoom-N6-v0` (six rooms)
  54. """
  55. def __init__(
  56. self,
  57. minNumRooms,
  58. maxNumRooms,
  59. maxRoomSize=10,
  60. max_steps: Optional[int] = None,
  61. **kwargs
  62. ):
  63. assert minNumRooms > 0
  64. assert maxNumRooms >= minNumRooms
  65. assert maxRoomSize >= 4
  66. self.minNumRooms = minNumRooms
  67. self.maxNumRooms = maxNumRooms
  68. self.maxRoomSize = maxRoomSize
  69. self.rooms = []
  70. mission_space = MissionSpace(
  71. mission_func=lambda: "traverse the rooms to get to the goal"
  72. )
  73. self.size = 25
  74. if max_steps is None:
  75. max_steps = maxNumRooms * 20
  76. super().__init__(
  77. mission_space=mission_space,
  78. width=self.size,
  79. height=self.size,
  80. max_steps=max_steps,
  81. **kwargs
  82. )
  83. def _gen_grid(self, width, height):
  84. roomList = []
  85. # Choose a random number of rooms to generate
  86. numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)
  87. while len(roomList) < numRooms:
  88. curRoomList = []
  89. entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))
  90. # Recursively place the rooms
  91. self._placeRoom(
  92. numRooms,
  93. roomList=curRoomList,
  94. minSz=4,
  95. maxSz=self.maxRoomSize,
  96. entryDoorWall=2,
  97. entryDoorPos=entryDoorPos,
  98. )
  99. if len(curRoomList) > len(roomList):
  100. roomList = curRoomList
  101. # Store the list of rooms in this environment
  102. assert len(roomList) > 0
  103. self.rooms = roomList
  104. # Create the grid
  105. self.grid = Grid(width, height)
  106. wall = Wall()
  107. prevDoorColor = None
  108. # For each room
  109. for idx, room in enumerate(roomList):
  110. topX, topY = room.top
  111. sizeX, sizeY = room.size
  112. # Draw the top and bottom walls
  113. for i in range(0, sizeX):
  114. self.grid.set(topX + i, topY, wall)
  115. self.grid.set(topX + i, topY + sizeY - 1, wall)
  116. # Draw the left and right walls
  117. for j in range(0, sizeY):
  118. self.grid.set(topX, topY + j, wall)
  119. self.grid.set(topX + sizeX - 1, topY + j, wall)
  120. # If this isn't the first room, place the entry door
  121. if idx > 0:
  122. # Pick a door color different from the previous one
  123. doorColors = set(COLOR_NAMES)
  124. if prevDoorColor:
  125. doorColors.remove(prevDoorColor)
  126. # Note: the use of sorting here guarantees determinism,
  127. # This is needed because Python's set is not deterministic
  128. doorColor = self._rand_elem(sorted(doorColors))
  129. entryDoor = Door(doorColor)
  130. self.grid.set(room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)
  131. prevDoorColor = doorColor
  132. prevRoom = roomList[idx - 1]
  133. prevRoom.exitDoorPos = room.entryDoorPos
  134. # Randomize the starting agent position and direction
  135. self.place_agent(roomList[0].top, roomList[0].size)
  136. # Place the final goal in the last room
  137. self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
  138. self.mission = "traverse the rooms to get to the goal"
  139. def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall, entryDoorPos):
  140. # Choose the room size randomly
  141. sizeX = self._rand_int(minSz, maxSz + 1)
  142. sizeY = self._rand_int(minSz, maxSz + 1)
  143. # The first room will be at the door position
  144. if len(roomList) == 0:
  145. topX, topY = entryDoorPos
  146. # Entry on the right
  147. elif entryDoorWall == 0:
  148. topX = entryDoorPos[0] - sizeX + 1
  149. y = entryDoorPos[1]
  150. topY = self._rand_int(y - sizeY + 2, y)
  151. # Entry wall on the south
  152. elif entryDoorWall == 1:
  153. x = entryDoorPos[0]
  154. topX = self._rand_int(x - sizeX + 2, x)
  155. topY = entryDoorPos[1] - sizeY + 1
  156. # Entry wall on the left
  157. elif entryDoorWall == 2:
  158. topX = entryDoorPos[0]
  159. y = entryDoorPos[1]
  160. topY = self._rand_int(y - sizeY + 2, y)
  161. # Entry wall on the top
  162. elif entryDoorWall == 3:
  163. x = entryDoorPos[0]
  164. topX = self._rand_int(x - sizeX + 2, x)
  165. topY = entryDoorPos[1]
  166. else:
  167. assert False, entryDoorWall
  168. # If the room is out of the grid, can't place a room here
  169. if topX < 0 or topY < 0:
  170. return False
  171. if topX + sizeX > self.width or topY + sizeY >= self.height:
  172. return False
  173. # If the room intersects with previous rooms, can't place it here
  174. for room in roomList[:-1]:
  175. nonOverlap = (
  176. topX + sizeX < room.top[0]
  177. or room.top[0] + room.size[0] <= topX
  178. or topY + sizeY < room.top[1]
  179. or room.top[1] + room.size[1] <= topY
  180. )
  181. if not nonOverlap:
  182. return False
  183. # Add this room to the list
  184. roomList.append(MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))
  185. # If this was the last room, stop
  186. if numLeft == 1:
  187. return True
  188. # Try placing the next room
  189. for i in range(0, 8):
  190. # Pick which wall to place the out door on
  191. wallSet = {0, 1, 2, 3}
  192. wallSet.remove(entryDoorWall)
  193. exitDoorWall = self._rand_elem(sorted(wallSet))
  194. nextEntryWall = (exitDoorWall + 2) % 4
  195. # Pick the exit door position
  196. # Exit on right wall
  197. if exitDoorWall == 0:
  198. exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))
  199. # Exit on south wall
  200. elif exitDoorWall == 1:
  201. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
  202. # Exit on left wall
  203. elif exitDoorWall == 2:
  204. exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))
  205. # Exit on north wall
  206. elif exitDoorWall == 3:
  207. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)
  208. else:
  209. assert False
  210. # Recursively create the other rooms
  211. success = self._placeRoom(
  212. numLeft - 1,
  213. roomList=roomList,
  214. minSz=minSz,
  215. maxSz=maxSz,
  216. entryDoorWall=nextEntryWall,
  217. entryDoorPos=exitDoorPos,
  218. )
  219. if success:
  220. break
  221. return True