obstructedmaze.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. from gym_minigrid.minigrid import COLOR_NAMES, DIR_TO_VEC, Ball, Box, Key
  2. from gym_minigrid.register import register
  3. from gym_minigrid.roomgrid import RoomGrid
  4. class ObstructedMazeEnv(RoomGrid):
  5. """
  6. A blue ball is hidden in the maze. Doors may be locked,
  7. doors may be obstructed by a ball and keys may be hidden in boxes.
  8. """
  9. def __init__(self, num_rows, num_cols, num_rooms_visited, **kwargs):
  10. room_size = 6
  11. max_steps = 4 * num_rooms_visited * room_size**2
  12. super().__init__(
  13. room_size=room_size,
  14. num_rows=num_rows,
  15. num_cols=num_cols,
  16. max_steps=max_steps,
  17. **kwargs
  18. )
  19. self.obj = Ball() # intiale the obj attribute, that will be changed later on
  20. def _gen_grid(self, width, height):
  21. super()._gen_grid(width, height)
  22. # Define all possible colors for doors
  23. self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES))
  24. # Define the color of the ball to pick up
  25. self.ball_to_find_color = COLOR_NAMES[0]
  26. # Define the color of the balls that obstruct doors
  27. self.blocking_ball_color = COLOR_NAMES[1]
  28. # Define the color of boxes in which keys are hidden
  29. self.box_color = COLOR_NAMES[2]
  30. self.mission = "pick up the %s ball" % self.ball_to_find_color
  31. def step(self, action):
  32. obs, reward, terminated, truncated, info = super().step(action)
  33. if action == self.actions.pickup:
  34. if self.carrying and self.carrying == self.obj:
  35. reward = self._reward()
  36. terminated = True
  37. return obs, reward, terminated, truncated, info
  38. def add_door(
  39. self,
  40. i,
  41. j,
  42. door_idx=0,
  43. color=None,
  44. locked=False,
  45. key_in_box=False,
  46. blocked=False,
  47. ):
  48. """
  49. Add a door. If the door must be locked, it also adds the key.
  50. If the key must be hidden, it is put in a box. If the door must
  51. be obstructed, it adds a ball in front of the door.
  52. """
  53. door, door_pos = super().add_door(i, j, door_idx, color, locked=locked)
  54. if blocked:
  55. vec = DIR_TO_VEC[door_idx]
  56. blocking_ball = Ball(self.blocking_ball_color) if blocked else None
  57. self.grid.set(door_pos[0] - vec[0],
  58. door_pos[1] - vec[1], blocking_ball)
  59. if locked:
  60. obj = Key(door.color)
  61. if key_in_box:
  62. box = Box(self.box_color)
  63. box.contains = obj
  64. obj = box
  65. self.place_in_room(i, j, obj)
  66. return door, door_pos
  67. class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
  68. """
  69. A blue ball is hidden in a 2x1 maze. A locked door separates
  70. rooms. Doors are obstructed by a ball and keys are hidden in boxes.
  71. """
  72. def __init__(self, key_in_box=True, blocked=True, **kwargs):
  73. self.key_in_box = key_in_box
  74. self.blocked = blocked
  75. super().__init__(num_rows=1, num_cols=2, num_rooms_visited=2, **kwargs)
  76. def _gen_grid(self, width, height):
  77. super()._gen_grid(width, height)
  78. self.add_door(
  79. 0,
  80. 0,
  81. door_idx=0,
  82. color=self.door_colors[0],
  83. locked=True,
  84. key_in_box=self.key_in_box,
  85. blocked=self.blocked,
  86. )
  87. self.obj, _ = self.add_object(
  88. 1, 0, "ball", color=self.ball_to_find_color)
  89. self.place_agent(0, 0)
  90. class ObstructedMaze_Full(ObstructedMazeEnv):
  91. """
  92. A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
  93. are locked, doors are obstructed by a ball and keys are hidden in
  94. boxes.
  95. """
  96. def __init__(
  97. self,
  98. agent_room=(1, 1),
  99. key_in_box=True,
  100. blocked=True,
  101. num_quarters=4,
  102. num_rooms_visited=25,
  103. **kwargs
  104. ):
  105. self.agent_room = agent_room
  106. self.key_in_box = key_in_box
  107. self.blocked = blocked
  108. self.num_quarters = num_quarters
  109. super().__init__(
  110. num_rows=3, num_cols=3, num_rooms_visited=num_rooms_visited, **kwargs
  111. )
  112. def _gen_grid(self, width, height):
  113. super()._gen_grid(width, height)
  114. middle_room = (1, 1)
  115. # Define positions of "side rooms" i.e. rooms that are neither
  116. # corners nor the center.
  117. side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][: self.num_quarters]
  118. for i in range(len(side_rooms)):
  119. side_room = side_rooms[i]
  120. # Add a door between the center room and the side room
  121. self.add_door(
  122. *middle_room, door_idx=i, color=self.door_colors[i], locked=False
  123. )
  124. for k in [-1, 1]:
  125. # Add a door to each side of the side room
  126. self.add_door(
  127. *side_room,
  128. locked=True,
  129. door_idx=(i + k) % 4,
  130. color=self.door_colors[(i + k) % len(self.door_colors)],
  131. key_in_box=self.key_in_box,
  132. blocked=self.blocked
  133. )
  134. corners = [(2, 0), (2, 2), (0, 2), (0, 0)][: self.num_quarters]
  135. ball_room = self._rand_elem(corners)
  136. self.obj, _ = self.add_object(
  137. ball_room[0], ball_room[1], "ball", color=self.ball_to_find_color
  138. )
  139. self.place_agent(*self.agent_room)
  140. class ObstructedMaze_2Dl(ObstructedMaze_Full):
  141. def __init__(self, **kwargs):
  142. super().__init__((2, 1), False, False, 1, 4, **kwargs)
  143. class ObstructedMaze_2Dlh(ObstructedMaze_Full):
  144. def __init__(self, **kwargs):
  145. super().__init__((2, 1), True, False, 1, 4, **kwargs)
  146. class ObstructedMaze_2Dlhb(ObstructedMaze_Full):
  147. def __init__(self, **kwargs):
  148. super().__init__((2, 1), True, True, 1, 4, **kwargs)
  149. register(
  150. id="MiniGrid-ObstructedMaze-1Dl-v0",
  151. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_1Dlhb",
  152. key_in_box=False,
  153. blocked=False,
  154. )
  155. register(
  156. id="MiniGrid-ObstructedMaze-1Dlh-v0",
  157. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_1Dlhb",
  158. key_in_box=True,
  159. blocked=False,
  160. )
  161. register(
  162. id="MiniGrid-ObstructedMaze-1Dlhb-v0",
  163. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_1Dlhb",
  164. )
  165. register(
  166. id="MiniGrid-ObstructedMaze-2Dl-v0",
  167. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_Full",
  168. agent_room=(2, 1),
  169. key_in_box=False,
  170. blocked=False,
  171. num_quarters=1,
  172. num_rooms_visited=4,
  173. )
  174. register(
  175. id="MiniGrid-ObstructedMaze-2Dlh-v0",
  176. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_Full",
  177. agent_room=(2, 1),
  178. key_in_box=True,
  179. blocked=False,
  180. num_quarters=1,
  181. num_rooms_visited=4,
  182. )
  183. register(
  184. id="MiniGrid-ObstructedMaze-2Dlhb-v0",
  185. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_Full",
  186. agent_room=(2, 1),
  187. key_in_box=True,
  188. blocked=True,
  189. num_quarters=1,
  190. num_rooms_visited=4,
  191. )
  192. register(
  193. id="MiniGrid-ObstructedMaze-1Q-v0",
  194. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_Full",
  195. agent_room=(1, 1),
  196. key_in_box=True,
  197. blocked=True,
  198. num_quarters=1,
  199. num_rooms_visited=5,
  200. )
  201. register(
  202. id="MiniGrid-ObstructedMaze-2Q-v0",
  203. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_Full",
  204. agent_room=(2, 1),
  205. key_in_box=True,
  206. blocked=True,
  207. num_quarters=2,
  208. num_rooms_visited=11,
  209. )
  210. register(
  211. id="MiniGrid-ObstructedMaze-Full-v0",
  212. entry_point="gym_minigrid.envs.obstructedmaze:ObstructedMaze_Full",
  213. )