fourrooms.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. from minigrid.core.grid import Grid
  2. from minigrid.core.mission import MissionSpace
  3. from minigrid.core.world_object import Goal
  4. from minigrid.minigrid_env import MiniGridEnv
  5. class FourRoomsEnv(MiniGridEnv):
  6. """
  7. <p>
  8. <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/four-rooms-env.png" alt="four-rooms-env" width="200px"/>
  9. </p>
  10. ### Description
  11. Classic four room reinforcement learning environment. The agent must
  12. navigate in a maze composed of four rooms interconnected by 4 gaps in the
  13. walls. To obtain a reward, the agent must reach the green goal square. Both
  14. the agent and the goal square are randomly placed in any of the four rooms.
  15. ### Mission Space
  16. "reach the goal"
  17. ### Action Space
  18. | Num | Name | Action |
  19. |-----|--------------|--------------|
  20. | 0 | left | Turn left |
  21. | 1 | right | Turn right |
  22. | 2 | forward | Move forward |
  23. | 3 | pickup | Unused |
  24. | 4 | drop | Unused |
  25. | 5 | toggle | Unused |
  26. | 6 | done | Unused |
  27. ### Observation Encoding
  28. - Each tile is encoded as a 3 dimensional tuple:
  29. `(OBJECT_IDX, COLOR_IDX, STATE)`
  30. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  31. [minigrid/minigrid.py](minigrid/minigrid.py)
  32. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  33. ### Rewards
  34. A reward of '1' is given for success, and '0' for failure.
  35. ### Termination
  36. The episode ends if any one of the following conditions is met:
  37. 1. The agent reaches the goal.
  38. 2. Timeout (see `max_steps`).
  39. ### Registered Configurations
  40. - `MiniGrid-FourRooms-v0`
  41. """
  42. def __init__(self, agent_pos=None, goal_pos=None, max_steps=100, **kwargs):
  43. self._agent_default_pos = agent_pos
  44. self._goal_default_pos = goal_pos
  45. self.size = 19
  46. mission_space = MissionSpace(mission_func=self._gen_mission)
  47. super().__init__(
  48. mission_space=mission_space,
  49. width=self.size,
  50. height=self.size,
  51. max_steps=max_steps,
  52. **kwargs
  53. )
  54. @staticmethod
  55. def _gen_mission():
  56. return "reach the goal"
  57. def _gen_grid(self, width, height):
  58. # Create the grid
  59. self.grid = Grid(width, height)
  60. # Generate the surrounding walls
  61. self.grid.horz_wall(0, 0)
  62. self.grid.horz_wall(0, height - 1)
  63. self.grid.vert_wall(0, 0)
  64. self.grid.vert_wall(width - 1, 0)
  65. room_w = width // 2
  66. room_h = height // 2
  67. # For each row of rooms
  68. for j in range(0, 2):
  69. # For each column
  70. for i in range(0, 2):
  71. xL = i * room_w
  72. yT = j * room_h
  73. xR = xL + room_w
  74. yB = yT + room_h
  75. # Bottom wall and door
  76. if i + 1 < 2:
  77. self.grid.vert_wall(xR, yT, room_h)
  78. pos = (xR, self._rand_int(yT + 1, yB))
  79. self.grid.set(*pos, None)
  80. # Bottom wall and door
  81. if j + 1 < 2:
  82. self.grid.horz_wall(xL, yB, room_w)
  83. pos = (self._rand_int(xL + 1, xR), yB)
  84. self.grid.set(*pos, None)
  85. # Randomize the player start position and orientation
  86. if self._agent_default_pos is not None:
  87. self.agent_pos = self._agent_default_pos
  88. self.grid.set(*self._agent_default_pos, None)
  89. # assuming random start direction
  90. self.agent_dir = self._rand_int(0, 4)
  91. else:
  92. self.place_agent()
  93. if self._goal_default_pos is not None:
  94. goal = Goal()
  95. self.put_obj(goal, *self._goal_default_pos)
  96. goal.init_pos, goal.cur_pos = self._goal_default_pos
  97. else:
  98. self.place_obj(Goal())