goto.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813
  1. """
  2. Copied and adapted from https://github.com/mila-iqia/babyai.
  3. Levels described in the Baby AI ICLR 2019 submission, with the `Go to` instruction.
  4. """
  5. from __future__ import annotations
  6. from minigrid.envs.babyai.core.levelgen import LevelGen
  7. from minigrid.envs.babyai.core.roomgrid_level import RejectSampling, RoomGridLevel
  8. from minigrid.envs.babyai.core.verifier import GoToInstr, ObjDesc
  9. class GoToRedBallGrey(RoomGridLevel):
  10. """
  11. ## Description
  12. Go to the red ball, single room, with distractors.
  13. The distractors are all grey to reduce perceptual complexity.
  14. This level has distractors but doesn't make use of language.
  15. ## Mission Space
  16. "go to the red ball"
  17. ## Action Space
  18. | Num | Name | Action |
  19. |-----|--------------|-------------------|
  20. | 0 | left | Turn left |
  21. | 1 | right | Turn right |
  22. | 2 | forward | Move forward |
  23. | 3 | pickup | Pick up an object |
  24. | 4 | drop | Unused |
  25. | 5 | toggle | Unused |
  26. | 6 | done | Unused |
  27. ## Observation Encoding
  28. - Each tile is encoded as a 3 dimensional tuple:
  29. `(OBJECT_IDX, COLOR_IDX, STATE)`
  30. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  31. [minigrid/minigrid.py](minigrid/minigrid.py)
  32. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  33. ## Rewards
  34. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  35. ## Termination
  36. The episode ends if any one of the following conditions is met:
  37. 1. The agent goes to the red ball.
  38. 2. Timeout (see `max_steps`).
  39. ## Registered Configurations
  40. - `BabyAI-GoToRedBallGrey-v0`
  41. """
  42. def __init__(self, room_size=8, num_dists=7, **kwargs):
  43. self.num_dists = num_dists
  44. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  45. def gen_mission(self):
  46. self.place_agent()
  47. obj, _ = self.add_object(0, 0, "ball", "red")
  48. dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  49. for dist in dists:
  50. dist.color = "grey"
  51. # Make sure no unblocking is required
  52. self.check_objs_reachable()
  53. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  54. class GoToRedBall(RoomGridLevel):
  55. """
  56. ## Description
  57. Go to the red ball, single room, with distractors.
  58. This level has distractors but doesn't make use of language.
  59. ## Mission Space
  60. "go to the red ball"
  61. ## Action Space
  62. | Num | Name | Action |
  63. |-----|--------------|-------------------|
  64. | 0 | left | Turn left |
  65. | 1 | right | Turn right |
  66. | 2 | forward | Move forward |
  67. | 3 | pickup | Pick up an object |
  68. | 4 | drop | Unused |
  69. | 5 | toggle | Unused |
  70. | 6 | done | Unused |
  71. ## Observation Encoding
  72. - Each tile is encoded as a 3 dimensional tuple:
  73. `(OBJECT_IDX, COLOR_IDX, STATE)`
  74. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  75. [minigrid/minigrid.py](minigrid/minigrid.py)
  76. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  77. ## Rewards
  78. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  79. ## Termination
  80. The episode ends if any one of the following conditions is met:
  81. 1. The agent goes to the red ball.
  82. 2. Timeout (see `max_steps`).
  83. ## Registered Configurations
  84. - `BabyAI-GoToRedBall-v0`
  85. """
  86. def __init__(self, room_size=8, num_dists=7, **kwargs):
  87. self.num_dists = num_dists
  88. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  89. def gen_mission(self):
  90. self.place_agent()
  91. obj, _ = self.add_object(0, 0, "ball", "red")
  92. self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  93. # Make sure no unblocking is required
  94. self.check_objs_reachable()
  95. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  96. class GoToRedBallNoDists(GoToRedBall):
  97. """
  98. ## Description
  99. Go to the red ball. No distractors present.
  100. ## Mission Space
  101. "go to the red ball"
  102. ## Action Space
  103. | Num | Name | Action |
  104. |-----|--------------|-------------------|
  105. | 0 | left | Turn left |
  106. | 1 | right | Turn right |
  107. | 2 | forward | Move forward |
  108. | 3 | pickup | Pick up an object |
  109. | 4 | drop | Unused |
  110. | 5 | toggle | Unused |
  111. | 6 | done | Unused |
  112. ## Observation Encoding
  113. - Each tile is encoded as a 3 dimensional tuple:
  114. `(OBJECT_IDX, COLOR_IDX, STATE)`
  115. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  116. [minigrid/minigrid.py](minigrid/minigrid.py)
  117. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  118. ## Rewards
  119. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  120. ## Termination
  121. The episode ends if any one of the following conditions is met:
  122. 1. The agent goes to the red ball.
  123. 2. Timeout (see `max_steps`).
  124. ## Registered Configurations
  125. - `BabyAI-GoToRedBallNoDists-v0`
  126. """
  127. def __init__(self, **kwargs):
  128. super().__init__(room_size=8, num_dists=0, **kwargs)
  129. class GoToObj(RoomGridLevel):
  130. """
  131. ## Description
  132. Go to an object, inside a single room with no doors, no distractors. The
  133. naming convention `GoToObjS{X}` represents a room of size `X`.
  134. ## Mission Space
  135. "go to the {color} {type}"
  136. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  137. "yellow" or "grey".
  138. {type} is the type of the object. Can be "ball", "box" or "key".
  139. ## Action Space
  140. | Num | Name | Action |
  141. |-----|--------------|-------------------|
  142. | 0 | left | Turn left |
  143. | 1 | right | Turn right |
  144. | 2 | forward | Move forward |
  145. | 3 | pickup | Pick up an object |
  146. | 4 | drop | Unused |
  147. | 5 | toggle | Unused |
  148. | 6 | done | Unused |
  149. ## Observation Encoding
  150. - Each tile is encoded as a 3 dimensional tuple:
  151. `(OBJECT_IDX, COLOR_IDX, STATE)`
  152. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  153. [minigrid/minigrid.py](minigrid/minigrid.py)
  154. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  155. ## Rewards
  156. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  157. ## Termination
  158. The episode ends if any one of the following conditions is met:
  159. 1. The agent goes to the object.
  160. 2. Timeout (see `max_steps`).
  161. ## Registered Configurations
  162. - `BabyAI-GoToObj-v0`
  163. - `BabyAI-GoToObjS4-v0`
  164. - `BabyAI-GoToObjS6-v0`
  165. """
  166. def __init__(self, room_size=8, **kwargs):
  167. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  168. def gen_mission(self):
  169. self.place_agent()
  170. objs = self.add_distractors(num_distractors=1)
  171. obj = objs[0]
  172. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  173. class GoToLocal(RoomGridLevel):
  174. """
  175. ## Description
  176. Go to an object, inside a single room with no doors, no distractors. The
  177. naming convention `GoToLocalS{X}N{Y}` represents a room of size `X` with
  178. distractor number `Y`.
  179. ## Mission Space
  180. "go to the {color} {type}"
  181. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  182. "yellow" or "grey".
  183. {type} is the type of the object. Can be "ball", "box" or "key".
  184. ## Action Space
  185. | Num | Name | Action |
  186. |-----|--------------|-------------------|
  187. | 0 | left | Turn left |
  188. | 1 | right | Turn right |
  189. | 2 | forward | Move forward |
  190. | 3 | pickup | Pick up an object |
  191. | 4 | drop | Unused |
  192. | 5 | toggle | Unused |
  193. | 6 | done | Unused |
  194. ## Observation Encoding
  195. - Each tile is encoded as a 3 dimensional tuple:
  196. `(OBJECT_IDX, COLOR_IDX, STATE)`
  197. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  198. [minigrid/minigrid.py](minigrid/minigrid.py)
  199. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  200. ## Rewards
  201. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  202. ## Termination
  203. The episode ends if any one of the following conditions is met:
  204. 1. The agent goes to the object.
  205. 2. Timeout (see `max_steps`).
  206. ## Registered Configurations
  207. - `BabyAI-GoToLocal-v0`
  208. - `BabyAI-GoToLocalS5N2-v0`
  209. - `BabyAI-GoToLocalS6N2-v0`
  210. - `BabyAI-GoToLocalS6N3-v0`
  211. - `BabyAI-GoToLocalS6N4-v0`
  212. - `BabyAI-GoToLocalS7N4-v0`
  213. - `BabyAI-GoToLocalS7N5-v0`
  214. - `BabyAI-GoToLocalS8N2-v0`
  215. - `BabyAI-GoToLocalS8N3-v0`
  216. - `BabyAI-GoToLocalS8N4-v0`
  217. - `BabyAI-GoToLocalS8N5-v0`
  218. - `BabyAI-GoToLocalS8N6-v0`
  219. - `BabyAI-GoToLocalS8N7-v0`
  220. """
  221. def __init__(self, room_size=8, num_dists=8, **kwargs):
  222. self.num_dists = num_dists
  223. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  224. def gen_mission(self):
  225. self.place_agent()
  226. objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  227. self.check_objs_reachable()
  228. obj = self._rand_elem(objs)
  229. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  230. class GoTo(RoomGridLevel):
  231. """
  232. ## Description
  233. Go to an object, the object may be in another room. Many distractors.
  234. ## Mission Space
  235. "go to a/the {color} {type}"
  236. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  237. "yellow" or "grey".
  238. {type} is the type of the object. Can be "ball", "box" or "key".
  239. ## Action Space
  240. | Num | Name | Action |
  241. |-----|--------------|-------------------|
  242. | 0 | left | Turn left |
  243. | 1 | right | Turn right |
  244. | 2 | forward | Move forward |
  245. | 3 | pickup | Pick up an object |
  246. | 4 | drop | Unused |
  247. | 5 | toggle | Unused |
  248. | 6 | done | Unused |
  249. ## Observation Encoding
  250. - Each tile is encoded as a 3 dimensional tuple:
  251. `(OBJECT_IDX, COLOR_IDX, STATE)`
  252. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  253. [minigrid/minigrid.py](minigrid/minigrid.py)
  254. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  255. ## Rewards
  256. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  257. ## Termination
  258. The episode ends if any one of the following conditions is met:
  259. 1. The agent goes to the object.
  260. 2. Timeout (see `max_steps`).
  261. ## Registered Configurations
  262. - `BabyAI-GoTo-v0`
  263. - `BabyAI-GoToOpen-v0`
  264. - `BabyAI-GoToObjMaze-v0`
  265. - `BabyAI-GoToObjMazeOpen-v0`
  266. - `BabyAI-GoToObjMazeS4R2-v0`
  267. - `BabyAI-GoToObjMazeS4-v0`
  268. - `BabyAI-GoToObjMazeS5-v0`
  269. - `BabyAI-GoToObjMazeS6-v0`
  270. - `BabyAI-GoToObjMazeS7-v0`
  271. """
  272. def __init__(
  273. self,
  274. room_size=8,
  275. num_rows=3,
  276. num_cols=3,
  277. num_dists=18,
  278. doors_open=False,
  279. **kwargs,
  280. ):
  281. self.num_dists = num_dists
  282. self.doors_open = doors_open
  283. super().__init__(
  284. num_rows=num_rows, num_cols=num_cols, room_size=room_size, **kwargs
  285. )
  286. def gen_mission(self):
  287. self.place_agent()
  288. self.connect_all()
  289. objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  290. self.check_objs_reachable()
  291. obj = self._rand_elem(objs)
  292. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  293. # If requested, open all the doors
  294. if self.doors_open:
  295. self.open_all_doors()
  296. class GoToImpUnlock(RoomGridLevel):
  297. """
  298. ## Description
  299. Go to an object, which may be in a locked room.
  300. Competencies: Maze, GoTo, ImpUnlock
  301. No unblocking.
  302. ## Mission Space
  303. "go to a/the {color} {type}"
  304. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  305. "yellow" or "grey".
  306. {type} is the type of the object. Can be "ball", "box" or "key".
  307. ## Action Space
  308. | Num | Name | Action |
  309. |-----|--------------|-------------------|
  310. | 0 | left | Turn left |
  311. | 1 | right | Turn right |
  312. | 2 | forward | Move forward |
  313. | 3 | pickup | Pick up an object |
  314. | 4 | drop | Unused |
  315. | 5 | toggle | Unused |
  316. | 6 | done | Unused |
  317. ## Observation Encoding
  318. - Each tile is encoded as a 3 dimensional tuple:
  319. `(OBJECT_IDX, COLOR_IDX, STATE)`
  320. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  321. [minigrid/minigrid.py](minigrid/minigrid.py)
  322. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  323. ## Rewards
  324. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  325. ## Termination
  326. The episode ends if any one of the following conditions is met:
  327. 1. The agent goes to the object.
  328. 2. Timeout (see `max_steps`).
  329. ## Registered Configurations
  330. - `BabyAI-GoToImpUnlock-v0`
  331. """
  332. def gen_mission(self):
  333. # Add a locked door to a random room
  334. id = self._rand_int(0, self.num_cols)
  335. jd = self._rand_int(0, self.num_rows)
  336. door, pos = self.add_door(id, jd, locked=True)
  337. locked_room = self.get_room(id, jd)
  338. # Add the key to a different room
  339. while True:
  340. ik = self._rand_int(0, self.num_cols)
  341. jk = self._rand_int(0, self.num_rows)
  342. if ik is id and jk is jd:
  343. continue
  344. self.add_object(ik, jk, "key", door.color)
  345. break
  346. self.connect_all()
  347. # Add distractors to all but the locked room.
  348. # We do this to speed up the reachability test,
  349. # which otherwise will reject all levels with
  350. # objects in the locked room.
  351. for i in range(self.num_cols):
  352. for j in range(self.num_rows):
  353. if i is not id or j is not jd:
  354. self.add_distractors(i, j, num_distractors=2, all_unique=False)
  355. # The agent must be placed after all the object to respect constraints
  356. while True:
  357. self.place_agent()
  358. start_room = self.room_from_pos(*self.agent_pos)
  359. # Ensure that we are not placing the agent in the locked room
  360. if start_room is locked_room:
  361. continue
  362. break
  363. self.check_objs_reachable()
  364. # Add a single object to the locked room
  365. # The instruction requires going to an object matching that description
  366. (obj,) = self.add_distractors(id, jd, num_distractors=1, all_unique=False)
  367. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  368. class GoToSeq(LevelGen):
  369. """
  370. ## Description
  371. Sequencing of go-to-object commands.
  372. Competencies: Maze, GoTo, Seq
  373. No locked room.
  374. No locations.
  375. No unblocking.
  376. ## Mission Space
  377. "go to a/the {color} {type}" +
  378. "and go to a/the {color} {type}" +
  379. ", then go to a/the {color} {type}" +
  380. "and go to a/the {color} {type}"
  381. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  382. "yellow" or "grey".
  383. {type} is the type of the object. Can be "ball", "box" or "key".
  384. ## Action Space
  385. | Num | Name | Action |
  386. |-----|--------------|-------------------|
  387. | 0 | left | Turn left |
  388. | 1 | right | Turn right |
  389. | 2 | forward | Move forward |
  390. | 3 | pickup | Pick up an object |
  391. | 4 | drop | Unused |
  392. | 5 | toggle | Unused |
  393. | 6 | done | Unused |
  394. ## Observation Encoding
  395. - Each tile is encoded as a 3 dimensional tuple:
  396. `(OBJECT_IDX, COLOR_IDX, STATE)`
  397. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  398. [minigrid/minigrid.py](minigrid/minigrid.py)
  399. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  400. ## Rewards
  401. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  402. ## Termination
  403. The episode ends if any one of the following conditions is met:
  404. 1. The agent goes to the object.
  405. 2. Timeout (see `max_steps`).
  406. ## Registered Configurations
  407. - `BabyAI-GoToSeq-v0`
  408. - `BabyAI-GoToSeqS5R2-v0`
  409. """
  410. def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
  411. super().__init__(
  412. room_size=room_size,
  413. num_rows=num_rows,
  414. num_cols=num_cols,
  415. num_dists=num_dists,
  416. action_kinds=["goto"],
  417. locked_room_prob=0,
  418. locations=False,
  419. unblocking=False,
  420. **kwargs,
  421. )
  422. class GoToRedBlueBall(RoomGridLevel):
  423. """
  424. ## Description
  425. Go to the red ball or to the blue ball.
  426. There is exactly one red or blue ball, and some distractors.
  427. The distractors are guaranteed not to be red or blue balls.
  428. Language is not required to solve this level.
  429. ## Mission Space
  430. "go to the {color} ball"
  431. {color} is the color of the box. Can be "red" or "blue".
  432. ## Action Space
  433. | Num | Name | Action |
  434. |-----|--------------|-------------------|
  435. | 0 | left | Turn left |
  436. | 1 | right | Turn right |
  437. | 2 | forward | Move forward |
  438. | 3 | pickup | Pick up an object |
  439. | 4 | drop | Unused |
  440. | 5 | toggle | Unused |
  441. | 6 | done | Unused |
  442. ## Observation Encoding
  443. - Each tile is encoded as a 3 dimensional tuple:
  444. `(OBJECT_IDX, COLOR_IDX, STATE)`
  445. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  446. [minigrid/minigrid.py](minigrid/minigrid.py)
  447. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  448. ## Rewards
  449. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  450. ## Termination
  451. The episode ends if any one of the following conditions is met:
  452. 1. The agent goes to the ball.
  453. 2. Timeout (see `max_steps`).
  454. ## Registered Configurations
  455. - `BabyAI-GoToRedBlueBall-v0`
  456. """
  457. def __init__(self, room_size=8, num_dists=7, **kwargs):
  458. self.num_dists = num_dists
  459. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  460. def gen_mission(self):
  461. self.place_agent()
  462. dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  463. # Ensure there is only one red or blue ball
  464. for dist in dists:
  465. if dist.type == "ball" and (dist.color == "blue" or dist.color == "red"):
  466. raise RejectSampling("can only have one blue or red ball")
  467. color = self._rand_elem(["red", "blue"])
  468. obj, _ = self.add_object(0, 0, "ball", color)
  469. # Make sure no unblocking is required
  470. self.check_objs_reachable()
  471. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  472. class GoToDoor(RoomGridLevel):
  473. """
  474. ## Description
  475. Go to a door
  476. (of a given color, in the current room)
  477. No distractors, no language variation
  478. ## Mission Space
  479. "go to the {color} door"
  480. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  481. "yellow" or "grey".
  482. ## Action Space
  483. | Num | Name | Action |
  484. |-----|--------------|-------------------|
  485. | 0 | left | Turn left |
  486. | 1 | right | Turn right |
  487. | 2 | forward | Move forward |
  488. | 3 | pickup | Pick up an object |
  489. | 4 | drop | Unused |
  490. | 5 | toggle | Unused |
  491. | 6 | done | Unused |
  492. ## Observation Encoding
  493. - Each tile is encoded as a 3 dimensional tuple:
  494. `(OBJECT_IDX, COLOR_IDX, STATE)`
  495. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  496. [minigrid/minigrid.py](minigrid/minigrid.py)
  497. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  498. ## Rewards
  499. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  500. ## Termination
  501. The episode ends if any one of the following conditions is met:
  502. 1. The agent goes to the door.
  503. 2. Timeout (see `max_steps`).
  504. ## Registered Configurations
  505. - `BabyAI-GoToDoor-v0`
  506. """
  507. def __init__(self, **kwargs):
  508. super().__init__(room_size=7, **kwargs)
  509. def gen_mission(self):
  510. objs = []
  511. for _ in range(4):
  512. door, _ = self.add_door(1, 1)
  513. objs.append(door)
  514. self.place_agent(1, 1)
  515. obj = self._rand_elem(objs)
  516. self.instrs = GoToInstr(ObjDesc("door", obj.color))
  517. class GoToObjDoor(RoomGridLevel):
  518. """
  519. ## Description
  520. Go to an object or door
  521. (of a given type and color, in the current room)
  522. ## Mission Space
  523. "go to the {color} {type}"
  524. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  525. "yellow" or "grey".
  526. {type} is the type of the object. Can be "ball", "box", "key" or "door".
  527. ## Action Space
  528. | Num | Name | Action |
  529. |-----|--------------|-------------------|
  530. | 0 | left | Turn left |
  531. | 1 | right | Turn right |
  532. | 2 | forward | Move forward |
  533. | 3 | pickup | Pick up an object |
  534. | 4 | drop | Unused |
  535. | 5 | toggle | Unused |
  536. | 6 | done | Unused |
  537. ## Observation Encoding
  538. - Each tile is encoded as a 3 dimensional tuple:
  539. `(OBJECT_IDX, COLOR_IDX, STATE)`
  540. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  541. [minigrid/minigrid.py](minigrid/minigrid.py)
  542. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  543. ## Rewards
  544. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  545. ## Termination
  546. The episode ends if any one of the following conditions is met:
  547. 1. The agent goes to the object or door.
  548. 2. Timeout (see `max_steps`).
  549. ## Registered Configurations
  550. - `BabyAI-GoToObjDoor-v0`
  551. """
  552. def __init__(self, **kwargs):
  553. super().__init__(room_size=8, **kwargs)
  554. def gen_mission(self):
  555. self.place_agent(1, 1)
  556. objs = self.add_distractors(1, 1, num_distractors=8, all_unique=False)
  557. for _ in range(4):
  558. door, _ = self.add_door(1, 1)
  559. objs.append(door)
  560. self.check_objs_reachable()
  561. obj = self._rand_elem(objs)
  562. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))