fourrooms.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. from gym_minigrid.minigrid import Goal, Grid, MiniGridEnv, MissionSpace
  2. class FourRoomsEnv(MiniGridEnv):
  3. """
  4. ### Description
  5. Classic four room reinforcement learning environment. The agent must
  6. navigate in a maze composed of four rooms interconnected by 4 gaps in the
  7. walls. To obtain a reward, the agent must reach the green goal square. Both
  8. the agent and the goal square are randomly placed in any of the four rooms.
  9. ### Mission Space
  10. "reach the goal"
  11. ### Action Space
  12. | Num | Name | Action |
  13. |-----|--------------|--------------|
  14. | 0 | left | Turn left |
  15. | 1 | right | Turn right |
  16. | 2 | forward | Move forward |
  17. | 3 | pickup | Unused |
  18. | 4 | drop | Unused |
  19. | 5 | toggle | Unused |
  20. | 6 | done | Unused |
  21. ### Observation Encoding
  22. - Each tile is encoded as a 3 dimensional tuple:
  23. `(OBJECT_IDX, COLOR_IDX, STATE)`
  24. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  25. [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
  26. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  27. ### Rewards
  28. A reward of '1' is given for success, and '0' for failure.
  29. ### Termination
  30. The episode ends if any one of the following conditions is met:
  31. 1. The agent reaches the goal.
  32. 2. Timeout (see `max_steps`).
  33. ### Registered Configurations
  34. - `MiniGrid-FourRooms-v0`
  35. """
  36. def __init__(self, agent_pos=None, goal_pos=None, **kwargs):
  37. self._agent_default_pos = agent_pos
  38. self._goal_default_pos = goal_pos
  39. self.size = 19
  40. mission_space = MissionSpace(mission_func=lambda: "reach the goal")
  41. super().__init__(
  42. mission_space=mission_space,
  43. width=self.size,
  44. height=self.size,
  45. max_steps=100,
  46. **kwargs
  47. )
  48. def _gen_grid(self, width, height):
  49. # Create the grid
  50. self.grid = Grid(width, height)
  51. # Generate the surrounding walls
  52. self.grid.horz_wall(0, 0)
  53. self.grid.horz_wall(0, height - 1)
  54. self.grid.vert_wall(0, 0)
  55. self.grid.vert_wall(width - 1, 0)
  56. room_w = width // 2
  57. room_h = height // 2
  58. # For each row of rooms
  59. for j in range(0, 2):
  60. # For each column
  61. for i in range(0, 2):
  62. xL = i * room_w
  63. yT = j * room_h
  64. xR = xL + room_w
  65. yB = yT + room_h
  66. # Bottom wall and door
  67. if i + 1 < 2:
  68. self.grid.vert_wall(xR, yT, room_h)
  69. pos = (xR, self._rand_int(yT + 1, yB))
  70. self.grid.set(*pos, None)
  71. # Bottom wall and door
  72. if j + 1 < 2:
  73. self.grid.horz_wall(xL, yB, room_w)
  74. pos = (self._rand_int(xL + 1, xR), yB)
  75. self.grid.set(*pos, None)
  76. # Randomize the player start position and orientation
  77. if self._agent_default_pos is not None:
  78. self.agent_pos = self._agent_default_pos
  79. self.grid.set(*self._agent_default_pos, None)
  80. # assuming random start direction
  81. self.agent_dir = self._rand_int(0, 4)
  82. else:
  83. self.place_agent()
  84. if self._goal_default_pos is not None:
  85. goal = Goal()
  86. self.put_obj(goal, *self._goal_default_pos)
  87. goal.init_pos, goal.cur_pos = self._goal_default_pos
  88. else:
  89. self.place_obj(Goal())