fourrooms.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. from minigrid.core.grid import Grid
  2. from minigrid.core.mission import MissionSpace
  3. from minigrid.core.world_object import Goal
  4. from minigrid.minigrid_env import MiniGridEnv
  5. class FourRoomsEnv(MiniGridEnv):
  6. """
  7. ### Description
  8. Classic four room reinforcement learning environment. The agent must
  9. navigate in a maze composed of four rooms interconnected by 4 gaps in the
  10. walls. To obtain a reward, the agent must reach the green goal square. Both
  11. the agent and the goal square are randomly placed in any of the four rooms.
  12. ### Mission Space
  13. "reach the goal"
  14. ### Action Space
  15. | Num | Name | Action |
  16. |-----|--------------|--------------|
  17. | 0 | left | Turn left |
  18. | 1 | right | Turn right |
  19. | 2 | forward | Move forward |
  20. | 3 | pickup | Unused |
  21. | 4 | drop | Unused |
  22. | 5 | toggle | Unused |
  23. | 6 | done | Unused |
  24. ### Observation Encoding
  25. - Each tile is encoded as a 3 dimensional tuple:
  26. `(OBJECT_IDX, COLOR_IDX, STATE)`
  27. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  28. [minigrid/minigrid.py](minigrid/minigrid.py)
  29. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  30. ### Rewards
  31. A reward of '1' is given for success, and '0' for failure.
  32. ### Termination
  33. The episode ends if any one of the following conditions is met:
  34. 1. The agent reaches the goal.
  35. 2. Timeout (see `max_steps`).
  36. ### Registered Configurations
  37. - `MiniGrid-FourRooms-v0`
  38. """
  39. def __init__(self, agent_pos=None, goal_pos=None, **kwargs):
  40. self._agent_default_pos = agent_pos
  41. self._goal_default_pos = goal_pos
  42. self.size = 19
  43. mission_space = MissionSpace(mission_func=lambda: "reach the goal")
  44. super().__init__(
  45. mission_space=mission_space,
  46. width=self.size,
  47. height=self.size,
  48. max_steps=100,
  49. **kwargs
  50. )
  51. def _gen_grid(self, width, height):
  52. # Create the grid
  53. self.grid = Grid(width, height)
  54. # Generate the surrounding walls
  55. self.grid.horz_wall(0, 0)
  56. self.grid.horz_wall(0, height - 1)
  57. self.grid.vert_wall(0, 0)
  58. self.grid.vert_wall(width - 1, 0)
  59. room_w = width // 2
  60. room_h = height // 2
  61. # For each row of rooms
  62. for j in range(0, 2):
  63. # For each column
  64. for i in range(0, 2):
  65. xL = i * room_w
  66. yT = j * room_h
  67. xR = xL + room_w
  68. yB = yT + room_h
  69. # Bottom wall and door
  70. if i + 1 < 2:
  71. self.grid.vert_wall(xR, yT, room_h)
  72. pos = (xR, self._rand_int(yT + 1, yB))
  73. self.grid.set(*pos, None)
  74. # Bottom wall and door
  75. if j + 1 < 2:
  76. self.grid.horz_wall(xL, yB, room_w)
  77. pos = (self._rand_int(xL + 1, xR), yB)
  78. self.grid.set(*pos, None)
  79. # Randomize the player start position and orientation
  80. if self._agent_default_pos is not None:
  81. self.agent_pos = self._agent_default_pos
  82. self.grid.set(*self._agent_default_pos, None)
  83. # assuming random start direction
  84. self.agent_dir = self._rand_int(0, 4)
  85. else:
  86. self.place_agent()
  87. if self._goal_default_pos is not None:
  88. goal = Goal()
  89. self.put_obj(goal, *self._goal_default_pos)
  90. goal.init_pos, goal.cur_pos = self._goal_default_pos
  91. else:
  92. self.place_obj(Goal())