123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- from minigrid.core.constants import COLOR_NAMES
- from minigrid.core.grid import Grid
- from minigrid.core.mission import MissionSpace
- from minigrid.core.world_object import Door, Goal, Key, Wall
- from minigrid.minigrid_env import MiniGridEnv
- class LockedRoom:
- def __init__(self, top, size, doorPos):
- self.top = top
- self.size = size
- self.doorPos = doorPos
- self.color = None
- self.locked = False
- def rand_pos(self, env):
- topX, topY = self.top
- sizeX, sizeY = self.size
- return env._rand_pos(topX + 1, topX + sizeX - 1, topY + 1, topY + sizeY - 1)
- class LockedRoomEnv(MiniGridEnv):
- """
- ### Description
- The environment has six rooms, one of which is locked. The agent receives
- a textual mission string as input, telling it which room to go to in order
- to get the key that opens the locked room. It then has to go into the locked
- room in order to reach the final goal. This environment is extremely
- difficult to solve with vanilla reinforcement learning alone.
- ### Mission Space
- "get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
- {lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
- "blue", "purple", "yellow" or "grey".
- ### Action Space
- | Num | Name | Action |
- |-----|--------------|---------------------------|
- | 0 | left | Turn left |
- | 1 | right | Turn right |
- | 2 | forward | Move forward |
- | 3 | pickup | Pick up an object |
- | 4 | drop | Unused |
- | 5 | toggle | Toggle/activate an object |
- | 6 | done | Unused |
- ### Observation Encoding
- - Each tile is encoded as a 3 dimensional tuple:
- `(OBJECT_IDX, COLOR_IDX, STATE)`
- - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
- [minigrid/minigrid.py](minigrid/minigrid.py)
- - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
- ### Rewards
- A reward of '1' is given for success, and '0' for failure.
- ### Termination
- The episode ends if any one of the following conditions is met:
- 1. The agent reaches the goal.
- 2. Timeout (see `max_steps`).
- ### Registered Configurations
- - `MiniGrid-LockedRoom-v0`
- """
- def __init__(self, size=19, **kwargs):
- self.size = size
- mission_space = MissionSpace(
- mission_func=lambda lockedroom_color, keyroom_color, door_color: f"get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal",
- ordered_placeholders=[COLOR_NAMES] * 3,
- )
- super().__init__(
- mission_space=mission_space,
- width=size,
- height=size,
- max_steps=10 * size,
- **kwargs,
- )
- def _gen_grid(self, width, height):
- # Create the grid
- self.grid = Grid(width, height)
- # Generate the surrounding walls
- for i in range(0, width):
- self.grid.set(i, 0, Wall())
- self.grid.set(i, height - 1, Wall())
- for j in range(0, height):
- self.grid.set(0, j, Wall())
- self.grid.set(width - 1, j, Wall())
- # Hallway walls
- lWallIdx = width // 2 - 2
- rWallIdx = width // 2 + 2
- for j in range(0, height):
- self.grid.set(lWallIdx, j, Wall())
- self.grid.set(rWallIdx, j, Wall())
- self.rooms = []
- # Room splitting walls
- for n in range(0, 3):
- j = n * (height // 3)
- for i in range(0, lWallIdx):
- self.grid.set(i, j, Wall())
- for i in range(rWallIdx, width):
- self.grid.set(i, j, Wall())
- roomW = lWallIdx + 1
- roomH = height // 3 + 1
- self.rooms.append(LockedRoom((0, j), (roomW, roomH), (lWallIdx, j + 3)))
- self.rooms.append(
- LockedRoom((rWallIdx, j), (roomW, roomH), (rWallIdx, j + 3))
- )
- # Choose one random room to be locked
- lockedRoom = self._rand_elem(self.rooms)
- lockedRoom.locked = True
- goalPos = lockedRoom.rand_pos(self)
- self.grid.set(*goalPos, Goal())
- # Assign the door colors
- colors = set(COLOR_NAMES)
- for room in self.rooms:
- color = self._rand_elem(sorted(colors))
- colors.remove(color)
- room.color = color
- if room.locked:
- self.grid.set(*room.doorPos, Door(color, is_locked=True))
- else:
- self.grid.set(*room.doorPos, Door(color))
- # Select a random room to contain the key
- while True:
- keyRoom = self._rand_elem(self.rooms)
- if keyRoom != lockedRoom:
- break
- keyPos = keyRoom.rand_pos(self)
- self.grid.set(*keyPos, Key(lockedRoom.color))
- # Randomize the player start position and orientation
- self.agent_pos = self.place_agent(
- top=(lWallIdx, 0), size=(rWallIdx - lWallIdx, height)
- )
- # Generate the mission string
- self.mission = (
- "get the %s key from the %s room, "
- "unlock the %s door and "
- "go to the goal"
- ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)
|