multiroom.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. from __future__ import annotations
  2. from minigrid.core.constants import COLOR_NAMES
  3. from minigrid.core.grid import Grid
  4. from minigrid.core.mission import MissionSpace
  5. from minigrid.core.world_object import Door, Goal, Wall
  6. from minigrid.minigrid_env import MiniGridEnv
  7. class MultiRoom:
  8. def __init__(self, top, size, entryDoorPos, exitDoorPos):
  9. self.top = top
  10. self.size = size
  11. self.entryDoorPos = entryDoorPos
  12. self.exitDoorPos = exitDoorPos
  13. class MultiRoomEnv(MiniGridEnv):
  14. """
  15. ## Description
  16. This environment has a series of connected rooms with doors that must be
  17. opened in order to get to the next room. The final room has the green goal
  18. square the agent must get to. This environment is extremely difficult to
  19. solve using RL alone. However, by gradually increasing the number of rooms
  20. and building a curriculum, the environment can be solved.
  21. ## Mission Space
  22. "traverse the rooms to get to the goal"
  23. ## Action Space
  24. | Num | Name | Action |
  25. |-----|--------------|---------------------------|
  26. | 0 | left | Turn left |
  27. | 1 | right | Turn right |
  28. | 2 | forward | Move forward |
  29. | 3 | pickup | Unused |
  30. | 4 | drop | Unused |
  31. | 5 | toggle | Toggle/activate an object |
  32. | 6 | done | Unused |
  33. ## Observation Encoding
  34. - Each tile is encoded as a 3 dimensional tuple:
  35. `(OBJECT_IDX, COLOR_IDX, STATE)`
  36. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  37. [minigrid/core/constants.py](minigrid/core/constants.py)
  38. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  39. ## Rewards
  40. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  41. ## Termination
  42. The episode ends if any one of the following conditions is met:
  43. 1. The agent reaches the goal.
  44. 2. Timeout (see `max_steps`).
  45. ## Registered Configurations
  46. - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
  47. - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
  48. - `MiniGrid-MultiRoom-N6-v0` (six rooms)
  49. ## Arguments
  50. * `minNumRooms`: The minimum number of rooms generated
  51. * `maxNumRooms`: The maximum number of rooms generated
  52. * `maxRoomSize=10`: The maximum room size
  53. * `width=25`: The width of the map
  54. * `height=25`: The height of the map
  55. * `max_steps=None`: If none, `maxNumRooms * 20` else the integer passed
  56. """
  57. def __init__(
  58. self,
  59. minNumRooms,
  60. maxNumRooms,
  61. maxRoomSize=10,
  62. width=25,
  63. height=25,
  64. max_steps: int | None = None,
  65. **kwargs,
  66. ):
  67. assert minNumRooms > 0
  68. assert maxNumRooms >= minNumRooms
  69. assert maxRoomSize >= 4
  70. self.minNumRooms = minNumRooms
  71. self.maxNumRooms = maxNumRooms
  72. self.maxRoomSize = maxRoomSize
  73. self.rooms = []
  74. mission_space = MissionSpace(mission_func=self._gen_mission)
  75. if max_steps is None:
  76. max_steps = maxNumRooms * 20
  77. super().__init__(
  78. mission_space=mission_space,
  79. width=width,
  80. height=height,
  81. max_steps=max_steps,
  82. **kwargs,
  83. )
  84. @staticmethod
  85. def _gen_mission():
  86. return "traverse the rooms to get to the goal"
  87. def _gen_grid(self, width, height):
  88. roomList = []
  89. # Choose a random number of rooms to generate
  90. numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)
  91. while len(roomList) < numRooms:
  92. curRoomList = []
  93. entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))
  94. # Recursively place the rooms
  95. self._placeRoom(
  96. numRooms,
  97. roomList=curRoomList,
  98. minSz=4,
  99. maxSz=self.maxRoomSize,
  100. entryDoorWall=2,
  101. entryDoorPos=entryDoorPos,
  102. )
  103. if len(curRoomList) > len(roomList):
  104. roomList = curRoomList
  105. # Store the list of rooms in this environment
  106. assert len(roomList) > 0
  107. self.rooms = roomList
  108. # Create the grid
  109. self.grid = Grid(width, height)
  110. wall = Wall()
  111. prevDoorColor = None
  112. # For each room
  113. for idx, room in enumerate(roomList):
  114. topX, topY = room.top
  115. sizeX, sizeY = room.size
  116. # Draw the top and bottom walls
  117. for i in range(0, sizeX):
  118. self.grid.set(topX + i, topY, wall)
  119. self.grid.set(topX + i, topY + sizeY - 1, wall)
  120. # Draw the left and right walls
  121. for j in range(0, sizeY):
  122. self.grid.set(topX, topY + j, wall)
  123. self.grid.set(topX + sizeX - 1, topY + j, wall)
  124. # If this isn't the first room, place the entry door
  125. if idx > 0:
  126. # Pick a door color different from the previous one
  127. doorColors = set(COLOR_NAMES)
  128. if prevDoorColor:
  129. doorColors.remove(prevDoorColor)
  130. # Note: the use of sorting here guarantees determinism,
  131. # This is needed because Python's set is not deterministic
  132. doorColor = self._rand_elem(sorted(doorColors))
  133. entryDoor = Door(doorColor)
  134. self.grid.set(room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)
  135. prevDoorColor = doorColor
  136. prevRoom = roomList[idx - 1]
  137. prevRoom.exitDoorPos = room.entryDoorPos
  138. # Randomize the starting agent position and direction
  139. self.place_agent(roomList[0].top, roomList[0].size)
  140. # Place the final goal in the last room
  141. self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
  142. self.mission = "traverse the rooms to get to the goal"
  143. def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall, entryDoorPos):
  144. # Choose the room size randomly
  145. sizeX = self._rand_int(minSz, maxSz + 1)
  146. sizeY = self._rand_int(minSz, maxSz + 1)
  147. # The first room will be at the door position
  148. if len(roomList) == 0:
  149. topX, topY = entryDoorPos
  150. # Entry on the right
  151. elif entryDoorWall == 0:
  152. topX = entryDoorPos[0] - sizeX + 1
  153. y = entryDoorPos[1]
  154. topY = self._rand_int(y - sizeY + 2, y)
  155. # Entry wall on the south
  156. elif entryDoorWall == 1:
  157. x = entryDoorPos[0]
  158. topX = self._rand_int(x - sizeX + 2, x)
  159. topY = entryDoorPos[1] - sizeY + 1
  160. # Entry wall on the left
  161. elif entryDoorWall == 2:
  162. topX = entryDoorPos[0]
  163. y = entryDoorPos[1]
  164. topY = self._rand_int(y - sizeY + 2, y)
  165. # Entry wall on the top
  166. elif entryDoorWall == 3:
  167. x = entryDoorPos[0]
  168. topX = self._rand_int(x - sizeX + 2, x)
  169. topY = entryDoorPos[1]
  170. else:
  171. assert False, entryDoorWall
  172. # If the room is out of the grid, can't place a room here
  173. if topX < 0 or topY < 0:
  174. return False
  175. if topX + sizeX > self.width or topY + sizeY >= self.height:
  176. return False
  177. # If the room intersects with previous rooms, can't place it here
  178. for room in roomList[:-1]:
  179. nonOverlap = (
  180. topX + sizeX < room.top[0]
  181. or room.top[0] + room.size[0] <= topX
  182. or topY + sizeY < room.top[1]
  183. or room.top[1] + room.size[1] <= topY
  184. )
  185. if not nonOverlap:
  186. return False
  187. # Add this room to the list
  188. roomList.append(MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))
  189. # If this was the last room, stop
  190. if numLeft == 1:
  191. return True
  192. # Try placing the next room
  193. for i in range(0, 8):
  194. # Pick which wall to place the out door on
  195. wallSet = {0, 1, 2, 3}
  196. wallSet.remove(entryDoorWall)
  197. exitDoorWall = self._rand_elem(sorted(wallSet))
  198. nextEntryWall = (exitDoorWall + 2) % 4
  199. # Pick the exit door position
  200. # Exit on right wall
  201. if exitDoorWall == 0:
  202. exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))
  203. # Exit on south wall
  204. elif exitDoorWall == 1:
  205. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
  206. # Exit on left wall
  207. elif exitDoorWall == 2:
  208. exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))
  209. # Exit on north wall
  210. elif exitDoorWall == 3:
  211. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)
  212. else:
  213. assert False
  214. # Recursively create the other rooms
  215. success = self._placeRoom(
  216. numLeft - 1,
  217. roomList=roomList,
  218. minSz=minSz,
  219. maxSz=maxSz,
  220. entryDoorWall=nextEntryWall,
  221. entryDoorPos=exitDoorPos,
  222. )
  223. if success:
  224. break
  225. return True