unlock.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. """
  2. Copied and adapted from https://github.com/mila-iqia/babyai.
  3. Levels described in the Baby AI ICLR 2019 submission, with the `Unlock` instruction.
  4. """
  5. from __future__ import annotations
  6. from minigrid.core.constants import COLOR_NAMES
  7. from minigrid.core.world_object import Ball, Box, Key
  8. from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
  9. from minigrid.envs.babyai.core.verifier import ObjDesc, OpenInstr, PickupInstr
  10. class Unlock(RoomGridLevel):
  11. """
  12. ## Description
  13. Unlock a door.
  14. Competencies: Maze, Open, Unlock. No unblocking.
  15. ## Mission Space
  16. "open the {color} door"
  17. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  18. "yellow" or "grey".
  19. ## Action Space
  20. | Num | Name | Action |
  21. |-----|--------------|-------------------|
  22. | 0 | left | Turn left |
  23. | 1 | right | Turn right |
  24. | 2 | forward | Move forward |
  25. | 3 | pickup | Pick up an object |
  26. | 4 | drop | Unused |
  27. | 5 | toggle | Unused |
  28. | 6 | done | Unused |
  29. ## Observation Encoding
  30. - Each tile is encoded as a 3 dimensional tuple:
  31. `(OBJECT_IDX, COLOR_IDX, STATE)`
  32. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  33. [minigrid/minigrid.py](minigrid/minigrid.py)
  34. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  35. ## Rewards
  36. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  37. ## Termination
  38. The episode ends if any one of the following conditions is met:
  39. 1. The agent opens the correct door.
  40. 2. Timeout (see `max_steps`).
  41. ## Registered Configurations
  42. - `BabyAI-Unlock-v0`
  43. """
  44. def gen_mission(self):
  45. # Add a locked door to a random room
  46. id = self._rand_int(0, self.num_cols)
  47. jd = self._rand_int(0, self.num_rows)
  48. door, pos = self.add_door(id, jd, locked=True)
  49. locked_room = self.get_room(id, jd)
  50. # Add the key to a different room
  51. while True:
  52. ik = self._rand_int(0, self.num_cols)
  53. jk = self._rand_int(0, self.num_rows)
  54. if ik is id and jk is jd:
  55. continue
  56. self.add_object(ik, jk, "key", door.color)
  57. break
  58. # With 50% probability, ensure that the locked door is the only
  59. # door of that color
  60. if self._rand_bool():
  61. colors = list(filter(lambda c: c is not door.color, COLOR_NAMES))
  62. self.connect_all(door_colors=colors)
  63. else:
  64. self.connect_all()
  65. # Add distractors to all but the locked room.
  66. # We do this to speed up the reachability test,
  67. # which otherwise will reject all levels with
  68. # objects in the locked room.
  69. for i in range(self.num_cols):
  70. for j in range(self.num_rows):
  71. if i is not id or j is not jd:
  72. self.add_distractors(i, j, num_distractors=3, all_unique=False)
  73. # The agent must be placed after all the object to respect constraints
  74. while True:
  75. self.place_agent()
  76. start_room = self.room_from_pos(*self.agent_pos)
  77. # Ensure that we are not placing the agent in the locked room
  78. if start_room is locked_room:
  79. continue
  80. break
  81. self.check_objs_reachable()
  82. self.instrs = OpenInstr(ObjDesc(door.type, door.color))
  83. class UnlockLocal(RoomGridLevel):
  84. """
  85. ## Description
  86. Fetch a key and unlock a door
  87. (in the current room)
  88. ## Mission Space
  89. "open the door"
  90. ## Action Space
  91. | Num | Name | Action |
  92. |-----|--------------|-------------------|
  93. | 0 | left | Turn left |
  94. | 1 | right | Turn right |
  95. | 2 | forward | Move forward |
  96. | 3 | pickup | Pick up an object |
  97. | 4 | drop | Unused |
  98. | 5 | toggle | Unused |
  99. | 6 | done | Unused |
  100. ## Observation Encoding
  101. - Each tile is encoded as a 3 dimensional tuple:
  102. `(OBJECT_IDX, COLOR_IDX, STATE)`
  103. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  104. [minigrid/minigrid.py](minigrid/minigrid.py)
  105. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  106. ## Rewards
  107. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  108. ## Termination
  109. The episode ends if any one of the following conditions is met:
  110. 1. The agent opens the door.
  111. 2. Timeout (see `max_steps`).
  112. ## Registered Configurations
  113. - `BabyAI-UnlockLocal-v0`
  114. - `BabyAI-UnlockLocalDist-v0`
  115. """
  116. def __init__(self, distractors=False, **kwargs):
  117. self.distractors = distractors
  118. super().__init__(**kwargs)
  119. def gen_mission(self):
  120. door, _ = self.add_door(1, 1, locked=True)
  121. self.add_object(1, 1, "key", door.color)
  122. if self.distractors:
  123. self.add_distractors(1, 1, num_distractors=3)
  124. self.place_agent(1, 1)
  125. self.instrs = OpenInstr(ObjDesc(door.type))
  126. class KeyInBox(RoomGridLevel):
  127. """
  128. ## Description
  129. Unlock a door. Key is in a box (in the current room).
  130. ## Mission Space
  131. "open the door"
  132. ## Action Space
  133. | Num | Name | Action |
  134. |-----|--------------|-------------------|
  135. | 0 | left | Turn left |
  136. | 1 | right | Turn right |
  137. | 2 | forward | Move forward |
  138. | 3 | pickup | Pick up an object |
  139. | 4 | drop | Unused |
  140. | 5 | toggle | Unused |
  141. | 6 | done | Unused |
  142. ## Observation Encoding
  143. - Each tile is encoded as a 3 dimensional tuple:
  144. `(OBJECT_IDX, COLOR_IDX, STATE)`
  145. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  146. [minigrid/minigrid.py](minigrid/minigrid.py)
  147. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  148. ## Rewards
  149. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  150. ## Termination
  151. The episode ends if any one of the following conditions is met:
  152. 1. The agent opens the door.
  153. 2. Timeout (see `max_steps`).
  154. ## Registered Configurations
  155. - `BabyAI-KeyInBox-v0`
  156. """
  157. def __init__(self, **kwargs):
  158. super().__init__(**kwargs)
  159. def gen_mission(self):
  160. door, _ = self.add_door(1, 1, locked=True)
  161. # Put the key in the box, then place the box in the room
  162. key = Key(door.color)
  163. box = Box(self._rand_color(), key)
  164. self.place_in_room(1, 1, box)
  165. self.place_agent(1, 1)
  166. self.instrs = OpenInstr(ObjDesc(door.type))
  167. class UnlockPickup(RoomGridLevel):
  168. """
  169. ## Description
  170. Unlock a door, then pick up a box in another room
  171. ## Mission Space
  172. "pick up the {color} box"
  173. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  174. "yellow" or "grey".
  175. ## Action Space
  176. | Num | Name | Action |
  177. |-----|--------------|-------------------|
  178. | 0 | left | Turn left |
  179. | 1 | right | Turn right |
  180. | 2 | forward | Move forward |
  181. | 3 | pickup | Pick up an object |
  182. | 4 | drop | Unused |
  183. | 5 | toggle | Unused |
  184. | 6 | done | Unused |
  185. ## Observation Encoding
  186. - Each tile is encoded as a 3 dimensional tuple:
  187. `(OBJECT_IDX, COLOR_IDX, STATE)`
  188. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  189. [minigrid/minigrid.py](minigrid/minigrid.py)
  190. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  191. ## Rewards
  192. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  193. ## Termination
  194. The episode ends if any one of the following conditions is met:
  195. 1. The agent picks up the correct box.
  196. 2. Timeout (see `max_steps`).
  197. ## Registered Configurations
  198. - `BabyAI-UnlockPickup-v0`
  199. - `BabyAI-UnlockPickupDist-v0`
  200. """
  201. def __init__(self, distractors=False, max_steps: int | None = None, **kwargs):
  202. self.distractors = distractors
  203. room_size = 6
  204. if max is None:
  205. max_steps = 8 * room_size**2
  206. super().__init__(
  207. num_rows=1, num_cols=2, room_size=6, max_steps=max_steps, **kwargs
  208. )
  209. def gen_mission(self):
  210. # Add a random object to the room on the right
  211. obj, _ = self.add_object(1, 0, kind="box")
  212. # Make sure the two rooms are directly connected by a locked door
  213. door, _ = self.add_door(0, 0, 0, locked=True)
  214. # Add a key to unlock the door
  215. self.add_object(0, 0, "key", door.color)
  216. if self.distractors:
  217. self.add_distractors(num_distractors=4)
  218. self.place_agent(0, 0)
  219. self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))
  220. class BlockedUnlockPickup(RoomGridLevel):
  221. """
  222. ## Description
  223. Unlock a door blocked by a ball, then pick up a box
  224. in another room
  225. ## Mission Space
  226. "pick up the box"
  227. ## Action Space
  228. | Num | Name | Action |
  229. |-----|--------------|-------------------|
  230. | 0 | left | Turn left |
  231. | 1 | right | Turn right |
  232. | 2 | forward | Move forward |
  233. | 3 | pickup | Pick up an object |
  234. | 4 | drop | Unused |
  235. | 5 | toggle | Unused |
  236. | 6 | done | Unused |
  237. ## Observation Encoding
  238. - Each tile is encoded as a 3 dimensional tuple:
  239. `(OBJECT_IDX, COLOR_IDX, STATE)`
  240. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  241. [minigrid/minigrid.py](minigrid/minigrid.py)
  242. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  243. ## Rewards
  244. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  245. ## Termination
  246. The episode ends if any one of the following conditions is met:
  247. 1. The agent picks up the box.
  248. 2. Timeout (see `max_steps`).
  249. ## Registered Configurations
  250. - `BabyAI-BlockedUnlockPickup-v0`
  251. """
  252. def __init__(self, max_steps: int | None = None, **kwargs):
  253. room_size = 6
  254. if max_steps is None:
  255. max_steps = 16 * room_size**2
  256. super().__init__(
  257. num_rows=1, num_cols=2, room_size=room_size, max_steps=max_steps, **kwargs
  258. )
  259. def gen_mission(self):
  260. # Add a box to the room on the right
  261. obj, _ = self.add_object(1, 0, kind="box")
  262. # Make sure the two rooms are directly connected by a locked door
  263. door, pos = self.add_door(0, 0, 0, locked=True)
  264. # Block the door with a ball
  265. color = self._rand_color()
  266. self.grid.set(pos[0] - 1, pos[1], Ball(color))
  267. # Add a key to unlock the door
  268. self.add_object(0, 0, "key", door.color)
  269. self.place_agent(0, 0)
  270. self.instrs = PickupInstr(ObjDesc(obj.type))
  271. class UnlockToUnlock(RoomGridLevel):
  272. """
  273. ## Description
  274. Unlock a door A that requires to unlock a door B before
  275. ## Mission Space
  276. "pick up the ball"
  277. ## Action Space
  278. | Num | Name | Action |
  279. |-----|--------------|-------------------|
  280. | 0 | left | Turn left |
  281. | 1 | right | Turn right |
  282. | 2 | forward | Move forward |
  283. | 3 | pickup | Pick up an object |
  284. | 4 | drop | Unused |
  285. | 5 | toggle | Unused |
  286. | 6 | done | Unused |
  287. ## Observation Encoding
  288. - Each tile is encoded as a 3 dimensional tuple:
  289. `(OBJECT_IDX, COLOR_IDX, STATE)`
  290. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  291. [minigrid/minigrid.py](minigrid/minigrid.py)
  292. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  293. ## Rewards
  294. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  295. ## Termination
  296. The episode ends if any one of the following conditions is met:
  297. 1. The agent picks up the ball.
  298. 2. Timeout (see `max_steps`).
  299. ## Registered Configurations
  300. - `BabyAI-UnlockToUnlock-v0`
  301. """
  302. def __init__(self, max_steps: int | None = None, **kwargs):
  303. room_size = 6
  304. if max_steps is None:
  305. max_steps = 30 * room_size**2
  306. super().__init__(
  307. num_rows=1, num_cols=3, room_size=room_size, max_steps=max_steps, **kwargs
  308. )
  309. def gen_mission(self):
  310. colors = self._rand_subset(COLOR_NAMES, 2)
  311. # Add a door of color A connecting left and middle room
  312. self.add_door(0, 0, door_idx=0, color=colors[0], locked=True)
  313. # Add a key of color A in the room on the right
  314. self.add_object(2, 0, kind="key", color=colors[0])
  315. # Add a door of color B connecting middle and right room
  316. self.add_door(1, 0, door_idx=0, color=colors[1], locked=True)
  317. # Add a key of color B in the middle room
  318. self.add_object(1, 0, kind="key", color=colors[1])
  319. obj, _ = self.add_object(0, 0, kind="ball")
  320. self.place_agent(1, 0)
  321. self.instrs = PickupInstr(ObjDesc(obj.type))