lockedroom.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. from minigrid.core.constants import COLOR_NAMES
  2. from minigrid.core.grid import Grid
  3. from minigrid.core.mission import MissionSpace
  4. from minigrid.core.world_object import Door, Goal, Key, Wall
  5. from minigrid.minigrid_env import MiniGridEnv
  6. class LockedRoom:
  7. def __init__(self, top, size, doorPos):
  8. self.top = top
  9. self.size = size
  10. self.doorPos = doorPos
  11. self.color = None
  12. self.locked = False
  13. def rand_pos(self, env):
  14. topX, topY = self.top
  15. sizeX, sizeY = self.size
  16. return env._rand_pos(topX + 1, topX + sizeX - 1, topY + 1, topY + sizeY - 1)
  17. class LockedRoomEnv(MiniGridEnv):
  18. """
  19. ### Description
  20. The environment has six rooms, one of which is locked. The agent receives
  21. a textual mission string as input, telling it which room to go to in order
  22. to get the key that opens the locked room. It then has to go into the locked
  23. room in order to reach the final goal. This environment is extremely
  24. difficult to solve with vanilla reinforcement learning alone.
  25. ### Mission Space
  26. "get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
  27. {lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
  28. "blue", "purple", "yellow" or "grey".
  29. ### Action Space
  30. | Num | Name | Action |
  31. |-----|--------------|---------------------------|
  32. | 0 | left | Turn left |
  33. | 1 | right | Turn right |
  34. | 2 | forward | Move forward |
  35. | 3 | pickup | Pick up an object |
  36. | 4 | drop | Unused |
  37. | 5 | toggle | Toggle/activate an object |
  38. | 6 | done | Unused |
  39. ### Observation Encoding
  40. - Each tile is encoded as a 3 dimensional tuple:
  41. `(OBJECT_IDX, COLOR_IDX, STATE)`
  42. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  43. [minigrid/minigrid.py](minigrid/minigrid.py)
  44. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  45. ### Rewards
  46. A reward of '1' is given for success, and '0' for failure.
  47. ### Termination
  48. The episode ends if any one of the following conditions is met:
  49. 1. The agent reaches the goal.
  50. 2. Timeout (see `max_steps`).
  51. ### Registered Configurations
  52. - `MiniGrid-LockedRoom-v0`
  53. """
  54. def __init__(self, size=19, **kwargs):
  55. self.size = size
  56. mission_space = MissionSpace(
  57. mission_func=lambda lockedroom_color, keyroom_color, door_color: f"get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal",
  58. ordered_placeholders=[COLOR_NAMES] * 3,
  59. )
  60. super().__init__(
  61. mission_space=mission_space,
  62. width=size,
  63. height=size,
  64. max_steps=10 * size,
  65. **kwargs,
  66. )
  67. def _gen_grid(self, width, height):
  68. # Create the grid
  69. self.grid = Grid(width, height)
  70. # Generate the surrounding walls
  71. for i in range(0, width):
  72. self.grid.set(i, 0, Wall())
  73. self.grid.set(i, height - 1, Wall())
  74. for j in range(0, height):
  75. self.grid.set(0, j, Wall())
  76. self.grid.set(width - 1, j, Wall())
  77. # Hallway walls
  78. lWallIdx = width // 2 - 2
  79. rWallIdx = width // 2 + 2
  80. for j in range(0, height):
  81. self.grid.set(lWallIdx, j, Wall())
  82. self.grid.set(rWallIdx, j, Wall())
  83. self.rooms = []
  84. # Room splitting walls
  85. for n in range(0, 3):
  86. j = n * (height // 3)
  87. for i in range(0, lWallIdx):
  88. self.grid.set(i, j, Wall())
  89. for i in range(rWallIdx, width):
  90. self.grid.set(i, j, Wall())
  91. roomW = lWallIdx + 1
  92. roomH = height // 3 + 1
  93. self.rooms.append(LockedRoom((0, j), (roomW, roomH), (lWallIdx, j + 3)))
  94. self.rooms.append(
  95. LockedRoom((rWallIdx, j), (roomW, roomH), (rWallIdx, j + 3))
  96. )
  97. # Choose one random room to be locked
  98. lockedRoom = self._rand_elem(self.rooms)
  99. lockedRoom.locked = True
  100. goalPos = lockedRoom.rand_pos(self)
  101. self.grid.set(*goalPos, Goal())
  102. # Assign the door colors
  103. colors = set(COLOR_NAMES)
  104. for room in self.rooms:
  105. color = self._rand_elem(sorted(colors))
  106. colors.remove(color)
  107. room.color = color
  108. if room.locked:
  109. self.grid.set(*room.doorPos, Door(color, is_locked=True))
  110. else:
  111. self.grid.set(*room.doorPos, Door(color))
  112. # Select a random room to contain the key
  113. while True:
  114. keyRoom = self._rand_elem(self.rooms)
  115. if keyRoom != lockedRoom:
  116. break
  117. keyPos = keyRoom.rand_pos(self)
  118. self.grid.set(*keyPos, Key(lockedRoom.color))
  119. # Randomize the player start position and orientation
  120. self.agent_pos = self.place_agent(
  121. top=(lWallIdx, 0), size=(rWallIdx - lWallIdx, height)
  122. )
  123. # Generate the mission string
  124. self.mission = (
  125. "get the %s key from the %s room, "
  126. "unlock the %s door and "
  127. "go to the goal"
  128. ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)