obstructedmaze.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. from typing import Optional
  2. from minigrid.core.constants import COLOR_NAMES, DIR_TO_VEC
  3. from minigrid.core.mission import MissionSpace
  4. from minigrid.core.roomgrid import RoomGrid
  5. from minigrid.core.world_object import Ball, Box, Key
  6. class ObstructedMazeEnv(RoomGrid):
  7. """
  8. <p>
  9. <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-1Dl.png" alt="ObstructedMaze-1Dl" width="200px"/>
  10. <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-1Dlh.png" alt="ObstructedMaze-1Dlh" width="200px"/>
  11. <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-1Dlhb.png" alt="ObstructedMaze-1Dlhb" width="200px"/>
  12. <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-1Q.png" alt="ObstructedMaze-1Q" width="200px"/>
  13. </p>
  14. <p>
  15. <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-2Dl.png" alt="ObstructedMaze-2Dl" width="200px"/>
  16. <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-2Dlh.png" alt="ObstructedMaze-2Dlh" width="200px"/>
  17. <img style="float:left" src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-2Dlhb.png" alt="ObstructedMaze-2Dlhb" width="200px"/>
  18. <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-2Q.png" alt="ObstructedMaze-2Q" width="200px"/>
  19. </p>
  20. <p>
  21. <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/ObstructedMaze-4Q.png" alt="ObstructedMaze-4Q" width="200px"/>
  22. </p>
  23. ### Description
  24. The agent has to pick up a box which is placed in a corner of a 3x3 maze.
  25. The doors are locked, the keys are hidden in boxes and doors are obstructed
  26. by balls. This environment can be solved without relying on language.
  27. ### Mission Space
  28. "pick up the {COLOR_NAMES[0]} ball"
  29. ### Action Space
  30. | Num | Name | Action |
  31. |-----|--------------|---------------------------|
  32. | 0 | left | Turn left |
  33. | 1 | right | Turn right |
  34. | 2 | forward | Move forward |
  35. | 3 | pickup | Pick up an object |
  36. | 4 | drop | Unused |
  37. | 5 | toggle | Toggle/activate an object |
  38. | 6 | done | Unused |
  39. ### Observation Encoding
  40. - Each tile is encoded as a 3 dimensional tuple:
  41. `(OBJECT_IDX, COLOR_IDX, STATE)`
  42. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  43. [minigrid/minigrid.py](minigrid/minigrid.py)
  44. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  45. ### Rewards
  46. A reward of '1' is given for success, and '0' for failure.
  47. ### Termination
  48. The episode ends if any one of the following conditions is met:
  49. 1. The agent picks up the blue ball.
  50. 2. Timeout (see `max_steps`).
  51. ### Registered Configurations
  52. "NDl" are the number of doors locked.
  53. "h" if the key is hidden in a box.
  54. "b" if the door is obstructed by a ball.
  55. "Q" number of quarters that will have doors and keys out of the 9 that the
  56. map already has.
  57. "Full" 3x3 maze with "h" and "b" options.
  58. - `MiniGrid-ObstructedMaze-1Dl-v0`
  59. - `MiniGrid-ObstructedMaze-1Dlh-v0`
  60. - `MiniGrid-ObstructedMaze-1Dlhb-v0`
  61. - `MiniGrid-ObstructedMaze-2Dl-v0`
  62. - `MiniGrid-ObstructedMaze-2Dlh-v0`
  63. - `MiniGrid-ObstructedMaze-2Dlhb-v0`
  64. - `MiniGrid-ObstructedMaze-1Q-v0`
  65. - `MiniGrid-ObstructedMaze-2Q-v0`
  66. - `MiniGrid-ObstructedMaze-Full-v0`
  67. """
  68. def __init__(
  69. self,
  70. num_rows,
  71. num_cols,
  72. num_rooms_visited,
  73. max_steps: Optional[int] = None,
  74. **kwargs,
  75. ):
  76. room_size = 6
  77. if max_steps is None:
  78. max_steps = 4 * num_rooms_visited * room_size**2
  79. mission_space = MissionSpace(
  80. mission_func=self._gen_mission,
  81. ordered_placeholders=[[COLOR_NAMES[0]]],
  82. )
  83. super().__init__(
  84. mission_space=mission_space,
  85. room_size=room_size,
  86. num_rows=num_rows,
  87. num_cols=num_cols,
  88. max_steps=max_steps,
  89. **kwargs,
  90. )
  91. self.obj = Ball() # initialize the obj attribute, that will be changed later on
  92. @staticmethod
  93. def _gen_mission(color: str):
  94. return f"pick up the {color} ball"
  95. def _gen_grid(self, width, height):
  96. super()._gen_grid(width, height)
  97. # Define all possible colors for doors
  98. self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES))
  99. # Define the color of the ball to pick up
  100. self.ball_to_find_color = COLOR_NAMES[0]
  101. # Define the color of the balls that obstruct doors
  102. self.blocking_ball_color = COLOR_NAMES[1]
  103. # Define the color of boxes in which keys are hidden
  104. self.box_color = COLOR_NAMES[2]
  105. self.mission = "pick up the %s ball" % self.ball_to_find_color
  106. def step(self, action):
  107. obs, reward, terminated, truncated, info = super().step(action)
  108. if action == self.actions.pickup:
  109. if self.carrying and self.carrying == self.obj:
  110. reward = self._reward()
  111. terminated = True
  112. return obs, reward, terminated, truncated, info
  113. def add_door(
  114. self,
  115. i,
  116. j,
  117. door_idx=0,
  118. color=None,
  119. locked=False,
  120. key_in_box=False,
  121. blocked=False,
  122. ):
  123. """
  124. Add a door. If the door must be locked, it also adds the key.
  125. If the key must be hidden, it is put in a box. If the door must
  126. be obstructed, it adds a ball in front of the door.
  127. """
  128. door, door_pos = super().add_door(i, j, door_idx, color, locked=locked)
  129. if blocked:
  130. vec = DIR_TO_VEC[door_idx]
  131. blocking_ball = Ball(self.blocking_ball_color) if blocked else None
  132. self.grid.set(door_pos[0] - vec[0], door_pos[1] - vec[1], blocking_ball)
  133. if locked:
  134. obj = Key(door.color)
  135. if key_in_box:
  136. box = Box(self.box_color)
  137. box.contains = obj
  138. obj = box
  139. self.place_in_room(i, j, obj)
  140. return door, door_pos
  141. class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
  142. """
  143. A blue ball is hidden in a 2x1 maze. A locked door separates
  144. rooms. Doors are obstructed by a ball and keys are hidden in boxes.
  145. """
  146. def __init__(self, key_in_box=True, blocked=True, **kwargs):
  147. self.key_in_box = key_in_box
  148. self.blocked = blocked
  149. super().__init__(num_rows=1, num_cols=2, num_rooms_visited=2, **kwargs)
  150. def _gen_grid(self, width, height):
  151. super()._gen_grid(width, height)
  152. self.add_door(
  153. 0,
  154. 0,
  155. door_idx=0,
  156. color=self.door_colors[0],
  157. locked=True,
  158. key_in_box=self.key_in_box,
  159. blocked=self.blocked,
  160. )
  161. self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color)
  162. self.place_agent(0, 0)
  163. class ObstructedMaze_Full(ObstructedMazeEnv):
  164. """
  165. A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
  166. are locked, doors are obstructed by a ball and keys are hidden in
  167. boxes.
  168. """
  169. def __init__(
  170. self,
  171. agent_room=(1, 1),
  172. key_in_box=True,
  173. blocked=True,
  174. num_quarters=4,
  175. num_rooms_visited=25,
  176. **kwargs,
  177. ):
  178. self.agent_room = agent_room
  179. self.key_in_box = key_in_box
  180. self.blocked = blocked
  181. self.num_quarters = num_quarters
  182. super().__init__(
  183. num_rows=3, num_cols=3, num_rooms_visited=num_rooms_visited, **kwargs
  184. )
  185. def _gen_grid(self, width, height):
  186. super()._gen_grid(width, height)
  187. middle_room = (1, 1)
  188. # Define positions of "side rooms" i.e. rooms that are neither
  189. # corners nor the center.
  190. side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][: self.num_quarters]
  191. for i in range(len(side_rooms)):
  192. side_room = side_rooms[i]
  193. # Add a door between the center room and the side room
  194. self.add_door(
  195. *middle_room, door_idx=i, color=self.door_colors[i], locked=False
  196. )
  197. for k in [-1, 1]:
  198. # Add a door to each side of the side room
  199. self.add_door(
  200. *side_room,
  201. locked=True,
  202. door_idx=(i + k) % 4,
  203. color=self.door_colors[(i + k) % len(self.door_colors)],
  204. key_in_box=self.key_in_box,
  205. blocked=self.blocked,
  206. )
  207. corners = [(2, 0), (2, 2), (0, 2), (0, 0)][: self.num_quarters]
  208. ball_room = self._rand_elem(corners)
  209. self.obj, _ = self.add_object(
  210. ball_room[0], ball_room[1], "ball", color=self.ball_to_find_color
  211. )
  212. self.place_agent(*self.agent_room)
  213. class ObstructedMaze_2Dl(ObstructedMaze_Full):
  214. def __init__(self, **kwargs):
  215. super().__init__((2, 1), False, False, 1, 4, **kwargs)
  216. class ObstructedMaze_2Dlh(ObstructedMaze_Full):
  217. def __init__(self, **kwargs):
  218. super().__init__((2, 1), True, False, 1, 4, **kwargs)
  219. class ObstructedMaze_2Dlhb(ObstructedMaze_Full):
  220. def __init__(self, **kwargs):
  221. super().__init__((2, 1), True, True, 1, 4, **kwargs)