empty.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. from typing import Optional
  2. from minigrid.core.grid import Grid
  3. from minigrid.core.mission import MissionSpace
  4. from minigrid.core.world_object import Goal
  5. from minigrid.minigrid_env import MiniGridEnv
  6. class EmptyEnv(MiniGridEnv):
  7. """
  8. <p>
  9. <img src="https://raw.githubusercontent.com/Farama-Foundation/Minigrid/master/figures/empty-env.png" alt="dempty-env" width="200px"/>
  10. </p>
  11. ### Description
  12. This environment is an empty room, and the goal of the agent is to reach the
  13. green goal square, which provides a sparse reward. A small penalty is
  14. subtracted for the number of steps to reach the goal. This environment is
  15. useful, with small rooms, to validate that your RL algorithm works
  16. correctly, and with large rooms to experiment with sparse rewards and
  17. exploration. The random variants of the environment have the agent starting
  18. at a random position for each episode, while the regular variants have the
  19. agent always starting in the corner opposite to the goal.
  20. ### Mission Space
  21. "get to the green goal square"
  22. ### Action Space
  23. | Num | Name | Action |
  24. |-----|--------------|--------------|
  25. | 0 | left | Turn left |
  26. | 1 | right | Turn right |
  27. | 2 | forward | Move forward |
  28. | 3 | pickup | Unused |
  29. | 4 | drop | Unused |
  30. | 5 | toggle | Unused |
  31. | 6 | done | Unused |
  32. ### Observation Encoding
  33. - Each tile is encoded as a 3 dimensional tuple:
  34. `(OBJECT_IDX, COLOR_IDX, STATE)`
  35. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  36. [minigrid/minigrid.py](minigrid/minigrid.py)
  37. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  38. ### Rewards
  39. A reward of '1' is given for success, and '0' for failure.
  40. ### Termination
  41. The episode ends if any one of the following conditions is met:
  42. 1. The agent reaches the goal.
  43. 2. Timeout (see `max_steps`).
  44. ### Registered Configurations
  45. - `MiniGrid-Empty-5x5-v0`
  46. - `MiniGrid-Empty-Random-5x5-v0`
  47. - `MiniGrid-Empty-6x6-v0`
  48. - `MiniGrid-Empty-Random-6x6-v0`
  49. - `MiniGrid-Empty-8x8-v0`
  50. - `MiniGrid-Empty-16x16-v0`
  51. """
  52. def __init__(
  53. self,
  54. size=8,
  55. agent_start_pos=(1, 1),
  56. agent_start_dir=0,
  57. max_steps: Optional[int] = None,
  58. **kwargs
  59. ):
  60. self.agent_start_pos = agent_start_pos
  61. self.agent_start_dir = agent_start_dir
  62. mission_space = MissionSpace(mission_func=self._gen_mission)
  63. if max_steps is None:
  64. max_steps = 4 * size**2
  65. super().__init__(
  66. mission_space=mission_space,
  67. grid_size=size,
  68. # Set this to True for maximum speed
  69. see_through_walls=True,
  70. max_steps=max_steps,
  71. **kwargs
  72. )
  73. @staticmethod
  74. def _gen_mission():
  75. return "get to the green goal square"
  76. def _gen_grid(self, width, height):
  77. # Create an empty grid
  78. self.grid = Grid(width, height)
  79. # Generate the surrounding walls
  80. self.grid.wall_rect(0, 0, width, height)
  81. # Place a goal square in the bottom-right corner
  82. self.put_obj(Goal(), width - 2, height - 2)
  83. # Place the agent
  84. if self.agent_start_pos is not None:
  85. self.agent_pos = self.agent_start_pos
  86. self.agent_dir = self.agent_start_dir
  87. else:
  88. self.place_agent()
  89. self.mission = "get to the green goal square"