memory.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. from typing import Optional
  2. import numpy as np
  3. from minigrid.core.actions import Actions
  4. from minigrid.core.grid import Grid
  5. from minigrid.core.mission import MissionSpace
  6. from minigrid.core.world_object import Ball, Key, Wall
  7. from minigrid.minigrid_env import MiniGridEnv
  8. class MemoryEnv(MiniGridEnv):
  9. """
  10. ### Description
  11. This environment is a memory test. The agent starts in a small room where it
  12. sees an object. It then has to go through a narrow hallway which ends in a
  13. split. At each end of the split there is an object, one of which is the same
  14. as the object in the starting room. The agent has to remember the initial
  15. object, and go to the matching object at split.
  16. ### Mission Space
  17. "go to the matching object at the end of the hallway"
  18. ### Action Space
  19. | Num | Name | Action |
  20. |-----|--------------|---------------------------|
  21. | 0 | left | Turn left |
  22. | 1 | right | Turn right |
  23. | 2 | forward | Move forward |
  24. | 3 | pickup | Pick up an object |
  25. | 4 | drop | Unused |
  26. | 5 | toggle | Toggle/activate an object |
  27. | 6 | done | Unused |
  28. ### Observation Encoding
  29. - Each tile is encoded as a 3 dimensional tuple:
  30. `(OBJECT_IDX, COLOR_IDX, STATE)`
  31. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  32. [minigrid/minigrid.py](minigrid/minigrid.py)
  33. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  34. ### Rewards
  35. A reward of '1' is given for success, and '0' for failure.
  36. ### Termination
  37. The episode ends if any one of the following conditions is met:
  38. 1. The agent reaches the correct matching object.
  39. 2. The agent reaches the wrong matching object.
  40. 3. Timeout (see `max_steps`).
  41. ### Registered Configurations
  42. S: size of map SxS.
  43. - `MiniGrid-MemoryS17Random-v0`
  44. - `MiniGrid-MemoryS13Random-v0`
  45. - `MiniGrid-MemoryS13-v0`
  46. - `MiniGrid-MemoryS11-v0`
  47. """
  48. def __init__(
  49. self, size=8, random_length=False, max_steps: Optional[int] = None, **kwargs
  50. ):
  51. self.size = size
  52. self.random_length = random_length
  53. if max_steps is None:
  54. max_steps = 5 * size**2
  55. mission_space = MissionSpace(
  56. mission_func=lambda: "go to the matching object at the end of the hallway"
  57. )
  58. super().__init__(
  59. mission_space=mission_space,
  60. width=size,
  61. height=size,
  62. # Set this to True for maximum speed
  63. see_through_walls=False,
  64. max_steps=max_steps,
  65. **kwargs
  66. )
  67. def _gen_grid(self, width, height):
  68. self.grid = Grid(width, height)
  69. # Generate the surrounding walls
  70. self.grid.horz_wall(0, 0)
  71. self.grid.horz_wall(0, height - 1)
  72. self.grid.vert_wall(0, 0)
  73. self.grid.vert_wall(width - 1, 0)
  74. assert height % 2 == 1
  75. upper_room_wall = height // 2 - 2
  76. lower_room_wall = height // 2 + 2
  77. if self.random_length:
  78. hallway_end = self._rand_int(4, width - 2)
  79. else:
  80. hallway_end = width - 3
  81. # Start room
  82. for i in range(1, 5):
  83. self.grid.set(i, upper_room_wall, Wall())
  84. self.grid.set(i, lower_room_wall, Wall())
  85. self.grid.set(4, upper_room_wall + 1, Wall())
  86. self.grid.set(4, lower_room_wall - 1, Wall())
  87. # Horizontal hallway
  88. for i in range(5, hallway_end):
  89. self.grid.set(i, upper_room_wall + 1, Wall())
  90. self.grid.set(i, lower_room_wall - 1, Wall())
  91. # Vertical hallway
  92. for j in range(0, height):
  93. if j != height // 2:
  94. self.grid.set(hallway_end, j, Wall())
  95. self.grid.set(hallway_end + 2, j, Wall())
  96. # Fix the player's start position and orientation
  97. self.agent_pos = np.array((self._rand_int(1, hallway_end + 1), height // 2))
  98. self.agent_dir = 0
  99. # Place objects
  100. start_room_obj = self._rand_elem([Key, Ball])
  101. self.grid.set(1, height // 2 - 1, start_room_obj("green"))
  102. other_objs = self._rand_elem([[Ball, Key], [Key, Ball]])
  103. pos0 = (hallway_end + 1, height // 2 - 2)
  104. pos1 = (hallway_end + 1, height // 2 + 2)
  105. self.grid.set(*pos0, other_objs[0]("green"))
  106. self.grid.set(*pos1, other_objs[1]("green"))
  107. # Choose the target objects
  108. if start_room_obj == other_objs[0]:
  109. self.success_pos = (pos0[0], pos0[1] + 1)
  110. self.failure_pos = (pos1[0], pos1[1] - 1)
  111. else:
  112. self.success_pos = (pos1[0], pos1[1] - 1)
  113. self.failure_pos = (pos0[0], pos0[1] + 1)
  114. self.mission = "go to the matching object at the end of the hallway"
  115. def step(self, action):
  116. if action == Actions.pickup:
  117. action = Actions.toggle
  118. obs, reward, terminated, truncated, info = super().step(action)
  119. if tuple(self.agent_pos) == self.success_pos:
  120. reward = self._reward()
  121. terminated = True
  122. if tuple(self.agent_pos) == self.failure_pos:
  123. reward = 0
  124. terminated = True
  125. return obs, reward, terminated, truncated, info