fourrooms.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. from minigrid.core.grid import Grid
  2. from minigrid.core.mission import MissionSpace
  3. from minigrid.core.world_object import Goal
  4. from minigrid.minigrid_env import MiniGridEnv
  5. class FourRoomsEnv(MiniGridEnv):
  6. """
  7. ![four-rooms-env](../_static/figures/four-rooms-env.png)
  8. ### Description
  9. Classic four room reinforcement learning environment. The agent must
  10. navigate in a maze composed of four rooms interconnected by 4 gaps in the
  11. walls. To obtain a reward, the agent must reach the green goal square. Both
  12. the agent and the goal square are randomly placed in any of the four rooms.
  13. ### Mission Space
  14. "reach the goal"
  15. ### Action Space
  16. | Num | Name | Action |
  17. |-----|--------------|--------------|
  18. | 0 | left | Turn left |
  19. | 1 | right | Turn right |
  20. | 2 | forward | Move forward |
  21. | 3 | pickup | Unused |
  22. | 4 | drop | Unused |
  23. | 5 | toggle | Unused |
  24. | 6 | done | Unused |
  25. ### Observation Encoding
  26. - Each tile is encoded as a 3 dimensional tuple:
  27. `(OBJECT_IDX, COLOR_IDX, STATE)`
  28. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  29. [minigrid/minigrid.py](minigrid/minigrid.py)
  30. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  31. ### Rewards
  32. A reward of '1' is given for success, and '0' for failure.
  33. ### Termination
  34. The episode ends if any one of the following conditions is met:
  35. 1. The agent reaches the goal.
  36. 2. Timeout (see `max_steps`).
  37. ### Registered Configurations
  38. - `MiniGrid-FourRooms-v0`
  39. """
  40. def __init__(self, agent_pos=None, goal_pos=None, **kwargs):
  41. self._agent_default_pos = agent_pos
  42. self._goal_default_pos = goal_pos
  43. self.size = 19
  44. mission_space = MissionSpace(mission_func=lambda: "reach the goal")
  45. super().__init__(
  46. mission_space=mission_space,
  47. width=self.size,
  48. height=self.size,
  49. max_steps=100,
  50. **kwargs
  51. )
  52. def _gen_grid(self, width, height):
  53. # Create the grid
  54. self.grid = Grid(width, height)
  55. # Generate the surrounding walls
  56. self.grid.horz_wall(0, 0)
  57. self.grid.horz_wall(0, height - 1)
  58. self.grid.vert_wall(0, 0)
  59. self.grid.vert_wall(width - 1, 0)
  60. room_w = width // 2
  61. room_h = height // 2
  62. # For each row of rooms
  63. for j in range(0, 2):
  64. # For each column
  65. for i in range(0, 2):
  66. xL = i * room_w
  67. yT = j * room_h
  68. xR = xL + room_w
  69. yB = yT + room_h
  70. # Bottom wall and door
  71. if i + 1 < 2:
  72. self.grid.vert_wall(xR, yT, room_h)
  73. pos = (xR, self._rand_int(yT + 1, yB))
  74. self.grid.set(*pos, None)
  75. # Bottom wall and door
  76. if j + 1 < 2:
  77. self.grid.horz_wall(xL, yB, room_w)
  78. pos = (self._rand_int(xL + 1, xR), yB)
  79. self.grid.set(*pos, None)
  80. # Randomize the player start position and orientation
  81. if self._agent_default_pos is not None:
  82. self.agent_pos = self._agent_default_pos
  83. self.grid.set(*self._agent_default_pos, None)
  84. # assuming random start direction
  85. self.agent_dir = self._rand_int(0, 4)
  86. else:
  87. self.place_agent()
  88. if self._goal_default_pos is not None:
  89. goal = Goal()
  90. self.put_obj(goal, *self._goal_default_pos)
  91. goal.init_pos, goal.cur_pos = self._goal_default_pos
  92. else:
  93. self.place_obj(Goal())