putnear.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. from minigrid.core.constants import COLOR_NAMES
  2. from minigrid.core.grid import Grid
  3. from minigrid.core.mission import MissionSpace
  4. from minigrid.core.world_object import Ball, Box, Key
  5. from minigrid.minigrid_env import MiniGridEnv
  6. class PutNearEnv(MiniGridEnv):
  7. """
  8. ### Description
  9. The agent is instructed through a textual string to pick up an object and
  10. place it next to another object. This environment is easy to solve with two
  11. objects, but difficult to solve with more, as it involves both textual
  12. understanding and spatial reasoning involving multiple objects.
  13. ### Mission Space
  14. "put the {move_color} {move_type} near the {target_color} {target_type}"
  15. {move_color} and {target_color} can be "red", "green", "blue", "purple",
  16. "yellow" or "grey".
  17. {move_type} and {target_type} Can be "box", "ball" or "key".
  18. ### Action Space
  19. | Num | Name | Action |
  20. |-----|--------------|-------------------|
  21. | 0 | left | Turn left |
  22. | 1 | right | Turn right |
  23. | 2 | forward | Move forward |
  24. | 3 | pickup | Pick up an object |
  25. | 4 | drop | Drop an object |
  26. | 5 | toggle | Unused |
  27. | 6 | done | Unused |
  28. ### Observation Encoding
  29. - Each tile is encoded as a 3 dimensional tuple:
  30. `(OBJECT_IDX, COLOR_IDX, STATE)`
  31. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  32. [minigrid/minigrid.py](minigrid/minigrid.py)
  33. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  34. ### Rewards
  35. A reward of '1' is given for success, and '0' for failure.
  36. ### Termination
  37. The episode ends if any one of the following conditions is met:
  38. 1. The agent picks up the wrong object.
  39. 2. The agent drop the correct object near the target.
  40. 3. Timeout (see `max_steps`).
  41. ### Registered Configurations
  42. N: number of objects.
  43. - `MiniGrid-PutNear-6x6-N2-v0`
  44. - `MiniGrid-PutNear-8x8-N3-v0`
  45. """
  46. def __init__(self, size=6, numObjs=2, **kwargs):
  47. self.size = size
  48. self.numObjs = numObjs
  49. self.obj_types = ["key", "ball", "box"]
  50. mission_space = MissionSpace(
  51. mission_func=lambda move_color, move_type, target_color, target_type: f"put the {move_color} {move_type} near the {target_color} {target_type}",
  52. ordered_placeholders=[
  53. COLOR_NAMES,
  54. self.obj_types,
  55. COLOR_NAMES,
  56. self.obj_types,
  57. ],
  58. )
  59. super().__init__(
  60. mission_space=mission_space,
  61. width=size,
  62. height=size,
  63. max_steps=5 * size,
  64. # Set this to True for maximum speed
  65. see_through_walls=True,
  66. **kwargs,
  67. )
  68. def _gen_grid(self, width, height):
  69. self.grid = Grid(width, height)
  70. # Generate the surrounding walls
  71. self.grid.horz_wall(0, 0)
  72. self.grid.horz_wall(0, height - 1)
  73. self.grid.vert_wall(0, 0)
  74. self.grid.vert_wall(width - 1, 0)
  75. # Types and colors of objects we can generate
  76. types = ["key", "ball", "box"]
  77. objs = []
  78. objPos = []
  79. def near_obj(env, p1):
  80. for p2 in objPos:
  81. dx = p1[0] - p2[0]
  82. dy = p1[1] - p2[1]
  83. if abs(dx) <= 1 and abs(dy) <= 1:
  84. return True
  85. return False
  86. # Until we have generated all the objects
  87. while len(objs) < self.numObjs:
  88. objType = self._rand_elem(types)
  89. objColor = self._rand_elem(COLOR_NAMES)
  90. # If this object already exists, try again
  91. if (objType, objColor) in objs:
  92. continue
  93. if objType == "key":
  94. obj = Key(objColor)
  95. elif objType == "ball":
  96. obj = Ball(objColor)
  97. elif objType == "box":
  98. obj = Box(objColor)
  99. else:
  100. raise ValueError(
  101. "{} object type given. Object type can only be of values key, ball and box.".format(
  102. objType
  103. )
  104. )
  105. pos = self.place_obj(obj, reject_fn=near_obj)
  106. objs.append((objType, objColor))
  107. objPos.append(pos)
  108. # Randomize the agent start position and orientation
  109. self.place_agent()
  110. # Choose a random object to be moved
  111. objIdx = self._rand_int(0, len(objs))
  112. self.move_type, self.moveColor = objs[objIdx]
  113. self.move_pos = objPos[objIdx]
  114. # Choose a target object (to put the first object next to)
  115. while True:
  116. targetIdx = self._rand_int(0, len(objs))
  117. if targetIdx != objIdx:
  118. break
  119. self.target_type, self.target_color = objs[targetIdx]
  120. self.target_pos = objPos[targetIdx]
  121. self.mission = "put the {} {} near the {} {}".format(
  122. self.moveColor,
  123. self.move_type,
  124. self.target_color,
  125. self.target_type,
  126. )
  127. def step(self, action):
  128. preCarrying = self.carrying
  129. obs, reward, terminated, truncated, info = super().step(action)
  130. u, v = self.dir_vec
  131. ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v)
  132. tx, ty = self.target_pos
  133. # If we picked up the wrong object, terminate the episode
  134. if action == self.actions.pickup and self.carrying:
  135. if (
  136. self.carrying.type != self.move_type
  137. or self.carrying.color != self.moveColor
  138. ):
  139. terminated = True
  140. # If successfully dropping an object near the target
  141. if action == self.actions.drop and preCarrying:
  142. if self.grid.get(ox, oy) is preCarrying:
  143. if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
  144. reward = self._reward()
  145. terminated = True
  146. return obs, reward, terminated, truncated, info