Browse Source

Patch rewards part in the document and fix GoToObjectEnv (#333)

SErAphLi 2 years ago
parent
commit
9ff888e889

+ 11 - 11
minigrid/envs/babyai/goto.py

@@ -44,7 +44,7 @@ class GoToRedBallGrey(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -110,7 +110,7 @@ class GoToRedBall(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -173,7 +173,7 @@ class GoToRedBallNoDists(GoToRedBall):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -230,7 +230,7 @@ class GoToObj(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -297,7 +297,7 @@ class GoToLocal(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -373,7 +373,7 @@ class GoTo(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -463,7 +463,7 @@ class GoToImpUnlock(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -568,7 +568,7 @@ class GoToSeq(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -636,7 +636,7 @@ class GoToRedBlueBall(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -712,7 +712,7 @@ class GoToDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -780,7 +780,7 @@ class GoToObjDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 5 - 5
minigrid/envs/babyai/open.py

@@ -51,7 +51,7 @@ class Open(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -121,7 +121,7 @@ class OpenRedDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -183,7 +183,7 @@ class OpenDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -267,7 +267,7 @@ class OpenTwoDoors(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -367,7 +367,7 @@ class OpenDoorsOrder(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 5 - 5
minigrid/envs/babyai/other.py

@@ -64,7 +64,7 @@ class ActionObjDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -142,7 +142,7 @@ class FindObjS5(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -211,7 +211,7 @@ class KeyCorridor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -305,7 +305,7 @@ class OneRoomS8(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -372,7 +372,7 @@ class MoveTwoAcross(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 5 - 5
minigrid/envs/babyai/pickup.py

@@ -47,7 +47,7 @@ class Pickup(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -110,7 +110,7 @@ class UnblockPickup(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -180,7 +180,7 @@ class PickupLoc(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -252,7 +252,7 @@ class PickupDist(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -328,7 +328,7 @@ class PickupAbove(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 2 - 2
minigrid/envs/babyai/putnext.py

@@ -47,7 +47,7 @@ class PutNextLocal(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -119,7 +119,7 @@ class PutNext(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 6 - 6
minigrid/envs/babyai/synth.py

@@ -64,7 +64,7 @@ class Synth(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -152,7 +152,7 @@ class SynthLoc(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -257,7 +257,7 @@ class SynthSeq(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -359,7 +359,7 @@ class MiniBossLevel(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -462,7 +462,7 @@ class BossLevel(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -558,7 +558,7 @@ class BossLevelNoUnlock(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 6 - 6
minigrid/envs/babyai/unlock.py

@@ -48,7 +48,7 @@ class Unlock(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -144,7 +144,7 @@ class UnlockLocal(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -207,7 +207,7 @@ class KeyInBox(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -274,7 +274,7 @@ class UnlockPickup(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -349,7 +349,7 @@ class BlockedUnlockPickup(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -422,7 +422,7 @@ class UnlockToUnlock(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/blockedunlockpickup.py

@@ -49,7 +49,7 @@ class BlockedUnlockPickupEnv(RoomGrid):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/crossing.py

@@ -54,7 +54,7 @@ class CrossingEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/distshift.py

@@ -45,7 +45,7 @@ class DistShiftEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/doorkey.py

@@ -42,7 +42,7 @@ class DoorKeyEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/dynamicobstacles.py

@@ -47,7 +47,7 @@ class DynamicObstaclesEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure. A '-1' penalty is
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. A '-1' penalty is
     subtracted if the agent collides with an obstacle.
 
     ## Termination

+ 1 - 1
minigrid/envs/empty.py

@@ -45,7 +45,7 @@ class EmptyEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/fetch.py

@@ -51,7 +51,7 @@ class FetchEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/fourrooms.py

@@ -42,7 +42,7 @@ class FourRoomsEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/gotodoor.py

@@ -46,7 +46,7 @@ class GoToDoorEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 53 - 3
minigrid/envs/gotoobject.py

@@ -9,8 +9,58 @@ from minigrid.minigrid_env import MiniGridEnv
 
 class GoToObjectEnv(MiniGridEnv):
     """
-    Environment in which the agent is instructed to go to a given object
-    named using an English text string
+    ## Description
+
+    This environment is a room with colored objects. The agent
+    receives a textual (mission) string as input, telling it which colored object to go
+    to, (eg: "go to the red key"). It receives a positive reward for performing
+    the `done` action next to the correct object, as indicated in the mission
+    string.
+
+    ## Mission Space
+
+    "go to the {color} {obj_type}"
+
+    {color} is the color of the object. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+    {obj_type} is the type of the object. Can be "key", "ball", "box".
+
+    ## Action Space
+
+    | Num | Name         | Action               |
+    |-----|--------------|----------------------|
+    | 0   | left         | Turn left            |
+    | 1   | right        | Turn right           |
+    | 2   | forward      | Move forward         |
+    | 3   | pickup       | Unused               |
+    | 4   | drop         | Unused               |
+    | 5   | toggle       | Unused               |
+    | 6   | done         | Done completing task |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent stands next the correct door performing the `done` action.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `MiniGrid-GoToObject-6x6-N2-v0`
+    - `MiniGrid-GoToObject-8x8-N2-v0`
+
     """
 
     def __init__(self, size=6, numObjs=2, max_steps: int | None = None, **kwargs):
@@ -104,7 +154,7 @@ class GoToObjectEnv(MiniGridEnv):
 
         # Reward performing the done action next to the target object
         if action == self.actions.done:
-            if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
+            if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
                 reward = self._reward()
             terminated = True
 

+ 1 - 1
minigrid/envs/keycorridor.py

@@ -49,7 +49,7 @@ class KeyCorridorEnv(RoomGrid):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/lavagap.py

@@ -46,7 +46,7 @@ class LavaGapEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/lockedroom.py

@@ -61,7 +61,7 @@ class LockedRoomEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/memory.py

@@ -46,7 +46,7 @@ class MemoryEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/multiroom.py

@@ -52,7 +52,7 @@ class MultiRoomEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/obstructedmaze.py

@@ -41,7 +41,7 @@ class ObstructedMazeEnv(RoomGrid):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/putnear.py

@@ -48,7 +48,7 @@ class PutNearEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/redbluedoors.py

@@ -42,7 +42,7 @@ class RedBlueDoorEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/unlock.py

@@ -38,7 +38,7 @@ class UnlockEnv(RoomGrid):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

+ 1 - 1
minigrid/envs/unlockpickup.py

@@ -42,7 +42,7 @@ class UnlockPickupEnv(RoomGrid):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination