lockedroom.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. from gym_minigrid.minigrid import (
  2. COLOR_NAMES,
  3. Door,
  4. Goal,
  5. Grid,
  6. Key,
  7. MiniGridEnv,
  8. MissionSpace,
  9. Wall,
  10. )
  11. class LockedRoom:
  12. def __init__(self, top, size, doorPos):
  13. self.top = top
  14. self.size = size
  15. self.doorPos = doorPos
  16. self.color = None
  17. self.locked = False
  18. def rand_pos(self, env):
  19. topX, topY = self.top
  20. sizeX, sizeY = self.size
  21. return env._rand_pos(topX + 1, topX + sizeX - 1, topY + 1, topY + sizeY - 1)
  22. class LockedRoomEnv(MiniGridEnv):
  23. """
  24. ### Description
  25. The environment has six rooms, one of which is locked. The agent receives
  26. a textual mission string as input, telling it which room to go to in order
  27. to get the key that opens the locked room. It then has to go into the locked
  28. room in order to reach the final goal. This environment is extremely
  29. difficult to solve with vanilla reinforcement learning alone.
  30. ### Mission Space
  31. "get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
  32. {lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
  33. "blue", "purple", "yellow" or "grey".
  34. ### Action Space
  35. | Num | Name | Action |
  36. |-----|--------------|---------------------------|
  37. | 0 | left | Turn left |
  38. | 1 | right | Turn right |
  39. | 2 | forward | Move forward |
  40. | 3 | pickup | Pick up an object |
  41. | 4 | drop | Unused |
  42. | 5 | toggle | Toggle/activate an object |
  43. | 6 | done | Unused |
  44. ### Observation Encoding
  45. - Each tile is encoded as a 3 dimensional tuple:
  46. `(OBJECT_IDX, COLOR_IDX, STATE)`
  47. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  48. [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
  49. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  50. ### Rewards
  51. A reward of '1' is given for success, and '0' for failure.
  52. ### Termination
  53. The episode ends if any one of the following conditions is met:
  54. 1. The agent reaches the goal.
  55. 2. Timeout (see `max_steps`).
  56. ### Registered Configurations
  57. - `MiniGrid-LockedRoom-v0`
  58. """
  59. def __init__(self, size=19, **kwargs):
  60. self.size = size
  61. mission_space = MissionSpace(
  62. mission_func=lambda lockedroom_color, keyroom_color, door_color: f"get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal",
  63. ordered_placeholders=[COLOR_NAMES] * 3,
  64. )
  65. super().__init__(
  66. mission_space=mission_space,
  67. width=size,
  68. height=size,
  69. max_steps=10 * size,
  70. **kwargs,
  71. )
  72. def _gen_grid(self, width, height):
  73. # Create the grid
  74. self.grid = Grid(width, height)
  75. # Generate the surrounding walls
  76. for i in range(0, width):
  77. self.grid.set(i, 0, Wall())
  78. self.grid.set(i, height - 1, Wall())
  79. for j in range(0, height):
  80. self.grid.set(0, j, Wall())
  81. self.grid.set(width - 1, j, Wall())
  82. # Hallway walls
  83. lWallIdx = width // 2 - 2
  84. rWallIdx = width // 2 + 2
  85. for j in range(0, height):
  86. self.grid.set(lWallIdx, j, Wall())
  87. self.grid.set(rWallIdx, j, Wall())
  88. self.rooms = []
  89. # Room splitting walls
  90. for n in range(0, 3):
  91. j = n * (height // 3)
  92. for i in range(0, lWallIdx):
  93. self.grid.set(i, j, Wall())
  94. for i in range(rWallIdx, width):
  95. self.grid.set(i, j, Wall())
  96. roomW = lWallIdx + 1
  97. roomH = height // 3 + 1
  98. self.rooms.append(LockedRoom((0, j), (roomW, roomH), (lWallIdx, j + 3)))
  99. self.rooms.append(
  100. LockedRoom((rWallIdx, j), (roomW, roomH), (rWallIdx, j + 3))
  101. )
  102. # Choose one random room to be locked
  103. lockedRoom = self._rand_elem(self.rooms)
  104. lockedRoom.locked = True
  105. goalPos = lockedRoom.rand_pos(self)
  106. self.grid.set(*goalPos, Goal())
  107. # Assign the door colors
  108. colors = set(COLOR_NAMES)
  109. for room in self.rooms:
  110. color = self._rand_elem(sorted(colors))
  111. colors.remove(color)
  112. room.color = color
  113. if room.locked:
  114. self.grid.set(*room.doorPos, Door(color, is_locked=True))
  115. else:
  116. self.grid.set(*room.doorPos, Door(color))
  117. # Select a random room to contain the key
  118. while True:
  119. keyRoom = self._rand_elem(self.rooms)
  120. if keyRoom != lockedRoom:
  121. break
  122. keyPos = keyRoom.rand_pos(self)
  123. self.grid.set(*keyPos, Key(lockedRoom.color))
  124. # Randomize the player start position and orientation
  125. self.agent_pos = self.place_agent(
  126. top=(lWallIdx, 0), size=(rWallIdx - lWallIdx, height)
  127. )
  128. # Generate the mission string
  129. self.mission = (
  130. "get the %s key from the %s room, "
  131. "unlock the %s door and "
  132. "go to the goal"
  133. ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)