multiroom.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. from typing import Optional
  2. from minigrid.core.constants import COLOR_NAMES
  3. from minigrid.core.grid import Grid
  4. from minigrid.core.mission import MissionSpace
  5. from minigrid.core.world_object import Door, Goal, Wall
  6. from minigrid.minigrid_env import MiniGridEnv
  7. class MultiRoom:
  8. def __init__(self, top, size, entryDoorPos, exitDoorPos):
  9. self.top = top
  10. self.size = size
  11. self.entryDoorPos = entryDoorPos
  12. self.exitDoorPos = exitDoorPos
  13. class MultiRoomEnv(MiniGridEnv):
  14. """
  15. <p>
  16. <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/multi-room.gif" alt="multi-room" width="200px"/>
  17. </p>
  18. ### Description
  19. This environment has a series of connected rooms with doors that must be
  20. opened in order to get to the next room. The final room has the green goal
  21. square the agent must get to. This environment is extremely difficult to
  22. solve using RL alone. However, by gradually increasing the number of rooms
  23. and building a curriculum, the environment can be solved.
  24. ### Mission Space
  25. "traverse the rooms to get to the goal"
  26. ### Action Space
  27. | Num | Name | Action |
  28. |-----|--------------|---------------------------|
  29. | 0 | left | Turn left |
  30. | 1 | right | Turn right |
  31. | 2 | forward | Move forward |
  32. | 3 | pickup | Unused |
  33. | 4 | drop | Unused |
  34. | 5 | toggle | Toggle/activate an object |
  35. | 6 | done | Unused |
  36. ### Observation Encoding
  37. - Each tile is encoded as a 3 dimensional tuple:
  38. `(OBJECT_IDX, COLOR_IDX, STATE)`
  39. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  40. [minigrid/minigrid.py](minigrid/minigrid.py)
  41. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  42. ### Rewards
  43. A reward of '1' is given for success, and '0' for failure.
  44. ### Termination
  45. The episode ends if any one of the following conditions is met:
  46. 1. The agent reaches the goal.
  47. 2. Timeout (see `max_steps`).
  48. ### Registered Configurations
  49. S: size of map SxS.
  50. N: number of rooms.
  51. - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
  52. - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
  53. - `MiniGrid-MultiRoom-N6-v0` (six rooms)
  54. """
  55. def __init__(
  56. self,
  57. minNumRooms,
  58. maxNumRooms,
  59. maxRoomSize=10,
  60. max_steps: Optional[int] = None,
  61. **kwargs
  62. ):
  63. assert minNumRooms > 0
  64. assert maxNumRooms >= minNumRooms
  65. assert maxRoomSize >= 4
  66. self.minNumRooms = minNumRooms
  67. self.maxNumRooms = maxNumRooms
  68. self.maxRoomSize = maxRoomSize
  69. self.rooms = []
  70. mission_space = MissionSpace(mission_func=self._gen_mission)
  71. self.size = 25
  72. if max_steps is None:
  73. max_steps = maxNumRooms * 20
  74. super().__init__(
  75. mission_space=mission_space,
  76. width=self.size,
  77. height=self.size,
  78. max_steps=max_steps,
  79. **kwargs
  80. )
  81. @staticmethod
  82. def _gen_mission():
  83. return "traverse the rooms to get to the goal"
  84. def _gen_grid(self, width, height):
  85. roomList = []
  86. # Choose a random number of rooms to generate
  87. numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)
  88. while len(roomList) < numRooms:
  89. curRoomList = []
  90. entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))
  91. # Recursively place the rooms
  92. self._placeRoom(
  93. numRooms,
  94. roomList=curRoomList,
  95. minSz=4,
  96. maxSz=self.maxRoomSize,
  97. entryDoorWall=2,
  98. entryDoorPos=entryDoorPos,
  99. )
  100. if len(curRoomList) > len(roomList):
  101. roomList = curRoomList
  102. # Store the list of rooms in this environment
  103. assert len(roomList) > 0
  104. self.rooms = roomList
  105. # Create the grid
  106. self.grid = Grid(width, height)
  107. wall = Wall()
  108. prevDoorColor = None
  109. # For each room
  110. for idx, room in enumerate(roomList):
  111. topX, topY = room.top
  112. sizeX, sizeY = room.size
  113. # Draw the top and bottom walls
  114. for i in range(0, sizeX):
  115. self.grid.set(topX + i, topY, wall)
  116. self.grid.set(topX + i, topY + sizeY - 1, wall)
  117. # Draw the left and right walls
  118. for j in range(0, sizeY):
  119. self.grid.set(topX, topY + j, wall)
  120. self.grid.set(topX + sizeX - 1, topY + j, wall)
  121. # If this isn't the first room, place the entry door
  122. if idx > 0:
  123. # Pick a door color different from the previous one
  124. doorColors = set(COLOR_NAMES)
  125. if prevDoorColor:
  126. doorColors.remove(prevDoorColor)
  127. # Note: the use of sorting here guarantees determinism,
  128. # This is needed because Python's set is not deterministic
  129. doorColor = self._rand_elem(sorted(doorColors))
  130. entryDoor = Door(doorColor)
  131. self.grid.set(room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)
  132. prevDoorColor = doorColor
  133. prevRoom = roomList[idx - 1]
  134. prevRoom.exitDoorPos = room.entryDoorPos
  135. # Randomize the starting agent position and direction
  136. self.place_agent(roomList[0].top, roomList[0].size)
  137. # Place the final goal in the last room
  138. self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
  139. self.mission = "traverse the rooms to get to the goal"
  140. def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall, entryDoorPos):
  141. # Choose the room size randomly
  142. sizeX = self._rand_int(minSz, maxSz + 1)
  143. sizeY = self._rand_int(minSz, maxSz + 1)
  144. # The first room will be at the door position
  145. if len(roomList) == 0:
  146. topX, topY = entryDoorPos
  147. # Entry on the right
  148. elif entryDoorWall == 0:
  149. topX = entryDoorPos[0] - sizeX + 1
  150. y = entryDoorPos[1]
  151. topY = self._rand_int(y - sizeY + 2, y)
  152. # Entry wall on the south
  153. elif entryDoorWall == 1:
  154. x = entryDoorPos[0]
  155. topX = self._rand_int(x - sizeX + 2, x)
  156. topY = entryDoorPos[1] - sizeY + 1
  157. # Entry wall on the left
  158. elif entryDoorWall == 2:
  159. topX = entryDoorPos[0]
  160. y = entryDoorPos[1]
  161. topY = self._rand_int(y - sizeY + 2, y)
  162. # Entry wall on the top
  163. elif entryDoorWall == 3:
  164. x = entryDoorPos[0]
  165. topX = self._rand_int(x - sizeX + 2, x)
  166. topY = entryDoorPos[1]
  167. else:
  168. assert False, entryDoorWall
  169. # If the room is out of the grid, can't place a room here
  170. if topX < 0 or topY < 0:
  171. return False
  172. if topX + sizeX > self.width or topY + sizeY >= self.height:
  173. return False
  174. # If the room intersects with previous rooms, can't place it here
  175. for room in roomList[:-1]:
  176. nonOverlap = (
  177. topX + sizeX < room.top[0]
  178. or room.top[0] + room.size[0] <= topX
  179. or topY + sizeY < room.top[1]
  180. or room.top[1] + room.size[1] <= topY
  181. )
  182. if not nonOverlap:
  183. return False
  184. # Add this room to the list
  185. roomList.append(MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))
  186. # If this was the last room, stop
  187. if numLeft == 1:
  188. return True
  189. # Try placing the next room
  190. for i in range(0, 8):
  191. # Pick which wall to place the out door on
  192. wallSet = {0, 1, 2, 3}
  193. wallSet.remove(entryDoorWall)
  194. exitDoorWall = self._rand_elem(sorted(wallSet))
  195. nextEntryWall = (exitDoorWall + 2) % 4
  196. # Pick the exit door position
  197. # Exit on right wall
  198. if exitDoorWall == 0:
  199. exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))
  200. # Exit on south wall
  201. elif exitDoorWall == 1:
  202. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
  203. # Exit on left wall
  204. elif exitDoorWall == 2:
  205. exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))
  206. # Exit on north wall
  207. elif exitDoorWall == 3:
  208. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)
  209. else:
  210. assert False
  211. # Recursively create the other rooms
  212. success = self._placeRoom(
  213. numLeft - 1,
  214. roomList=roomList,
  215. minSz=minSz,
  216. maxSz=maxSz,
  217. entryDoorWall=nextEntryWall,
  218. entryDoorPos=exitDoorPos,
  219. )
  220. if success:
  221. break
  222. return True