2 anos atrás · 9ff888e889
--- a/minigrid/envs/babyai/goto.py
+++ b/minigrid/envs/babyai/goto.py
@@ -44,7 +44,7 @@ class GoToRedBallGrey(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -110,7 +110,7 @@ class GoToRedBall(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -173,7 +173,7 @@ class GoToRedBallNoDists(GoToRedBall):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -230,7 +230,7 @@ class GoToObj(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -297,7 +297,7 @@ class GoToLocal(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -373,7 +373,7 @@ class GoTo(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -463,7 +463,7 @@ class GoToImpUnlock(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -568,7 +568,7 @@ class GoToSeq(LevelGen):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -636,7 +636,7 @@ class GoToRedBlueBall(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -712,7 +712,7 @@ class GoToDoor(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -780,7 +780,7 @@ class GoToObjDoor(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/babyai/open.py
+++ b/minigrid/envs/babyai/open.py
@@ -51,7 +51,7 @@ class Open(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -121,7 +121,7 @@ class OpenRedDoor(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -183,7 +183,7 @@ class OpenDoor(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -267,7 +267,7 @@ class OpenTwoDoors(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -367,7 +367,7 @@ class OpenDoorsOrder(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/babyai/other.py
+++ b/minigrid/envs/babyai/other.py
@@ -64,7 +64,7 @@ class ActionObjDoor(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -142,7 +142,7 @@ class FindObjS5(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -211,7 +211,7 @@ class KeyCorridor(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -305,7 +305,7 @@ class OneRoomS8(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -372,7 +372,7 @@ class MoveTwoAcross(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/babyai/pickup.py
+++ b/minigrid/envs/babyai/pickup.py
@@ -47,7 +47,7 @@ class Pickup(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -110,7 +110,7 @@ class UnblockPickup(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -180,7 +180,7 @@ class PickupLoc(LevelGen):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -252,7 +252,7 @@ class PickupDist(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -328,7 +328,7 @@ class PickupAbove(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/babyai/putnext.py
+++ b/minigrid/envs/babyai/putnext.py
@@ -47,7 +47,7 @@ class PutNextLocal(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -119,7 +119,7 @@ class PutNext(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/babyai/synth.py
+++ b/minigrid/envs/babyai/synth.py
@@ -64,7 +64,7 @@ class Synth(LevelGen):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -152,7 +152,7 @@ class SynthLoc(LevelGen):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -257,7 +257,7 @@ class SynthSeq(LevelGen):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -359,7 +359,7 @@ class MiniBossLevel(LevelGen):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -462,7 +462,7 @@ class BossLevel(LevelGen):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -558,7 +558,7 @@ class BossLevelNoUnlock(LevelGen):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/babyai/unlock.py
+++ b/minigrid/envs/babyai/unlock.py
@@ -48,7 +48,7 @@ class Unlock(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -144,7 +144,7 @@ class UnlockLocal(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -207,7 +207,7 @@ class KeyInBox(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -274,7 +274,7 @@ class UnlockPickup(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -349,7 +349,7 @@ class BlockedUnlockPickup(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
@@ -422,7 +422,7 @@ class UnlockToUnlock(RoomGridLevel):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/blockedunlockpickup.py
+++ b/minigrid/envs/blockedunlockpickup.py
@@ -49,7 +49,7 @@ class BlockedUnlockPickupEnv(RoomGrid):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/crossing.py
+++ b/minigrid/envs/crossing.py
@@ -54,7 +54,7 @@ class CrossingEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/distshift.py
+++ b/minigrid/envs/distshift.py
@@ -45,7 +45,7 @@ class DistShiftEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/doorkey.py
+++ b/minigrid/envs/doorkey.py
@@ -42,7 +42,7 @@ class DoorKeyEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/dynamicobstacles.py
+++ b/minigrid/envs/dynamicobstacles.py
@@ -47,7 +47,7 @@ class DynamicObstaclesEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure. A '-1' penalty is
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. A '-1' penalty is
			
 
				     subtracted if the agent collides with an obstacle.
			
 
				 
			
 
				     ## Termination
			
--- a/minigrid/envs/empty.py
+++ b/minigrid/envs/empty.py
@@ -45,7 +45,7 @@ class EmptyEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/fetch.py
+++ b/minigrid/envs/fetch.py
@@ -51,7 +51,7 @@ class FetchEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/fourrooms.py
+++ b/minigrid/envs/fourrooms.py
@@ -42,7 +42,7 @@ class FourRoomsEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/gotodoor.py
+++ b/minigrid/envs/gotodoor.py
@@ -46,7 +46,7 @@ class GoToDoorEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/gotoobject.py
+++ b/minigrid/envs/gotoobject.py
@@ -9,8 +9,58 @@ from minigrid.minigrid_env import MiniGridEnv
 
				 
			
 
				 class GoToObjectEnv(MiniGridEnv):
			
 
				     """
			
 
				-    Environment in which the agent is instructed to go to a given object
			
 
				-    named using an English text string
			
 
				+    ## Description
			
 
				+
			
 
				+    This environment is a room with colored objects. The agent
			
 
				+    receives a textual (mission) string as input, telling it which colored object to go
			
 
				+    to, (eg: "go to the red key"). It receives a positive reward for performing
			
 
				+    the `done` action next to the correct object, as indicated in the mission
			
 
				+    string.
			
 
				+
			
 
				+    ## Mission Space
			
 
				+
			
 
				+    "go to the {color} {obj_type}"
			
 
				+
			
 
				+    {color} is the color of the object. Can be "red", "green", "blue", "purple",
			
 
				+    "yellow" or "grey".
			
 
				+    {obj_type} is the type of the object. Can be "key", "ball", "box".
			
 
				+
			
 
				+    ## Action Space
			
 
				+
			
 
				+    | Num | Name         | Action               |
			
 
				+    |-----|--------------|----------------------|
			
 
				+    | 0   | left         | Turn left            |
			
 
				+    | 1   | right        | Turn right           |
			
 
				+    | 2   | forward      | Move forward         |
			
 
				+    | 3   | pickup       | Unused               |
			
 
				+    | 4   | drop         | Unused               |
			
 
				+    | 5   | toggle       | Unused               |
			
 
				+    | 6   | done         | Done completing task |
			
 
				+
			
 
				+    ## Observation Encoding
			
 
				+
			
 
				+    - Each tile is encoded as a 3 dimensional tuple:
			
 
				+        `(OBJECT_IDX, COLOR_IDX, STATE)`
			
 
				+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
			
 
				+        [minigrid/minigrid.py](minigrid/minigrid.py)
			
 
				+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
			
 
				+
			
 
				+    ## Rewards
			
 
				+
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				+
			
 
				+    ## Termination
			
 
				+
			
 
				+    The episode ends if any one of the following conditions is met:
			
 
				+
			
 
				+    1. The agent stands next the correct door performing the `done` action.
			
 
				+    2. Timeout (see `max_steps`).
			
 
				+
			
 
				+    ## Registered Configurations
			
 
				+
			
 
				+    - `MiniGrid-GoToObject-6x6-N2-v0`
			
 
				+    - `MiniGrid-GoToObject-8x8-N2-v0`
			
 
				+
			
 
				     """
			
 
				 
			
 
				     def __init__(self, size=6, numObjs=2, max_steps: int | None = None, **kwargs):
			
@@ -104,7 +154,7 @@ class GoToObjectEnv(MiniGridEnv):
 
				 
			
 
				         # Reward performing the done action next to the target object
			
 
				         if action == self.actions.done:
			
 
				-            if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
			
 
				+            if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
			
 
				                 reward = self._reward()
			
 
				             terminated = True
			
 
				 
			
--- a/minigrid/envs/keycorridor.py
+++ b/minigrid/envs/keycorridor.py
@@ -49,7 +49,7 @@ class KeyCorridorEnv(RoomGrid):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/lavagap.py
+++ b/minigrid/envs/lavagap.py
@@ -46,7 +46,7 @@ class LavaGapEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/lockedroom.py
+++ b/minigrid/envs/lockedroom.py
@@ -61,7 +61,7 @@ class LockedRoomEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/memory.py
+++ b/minigrid/envs/memory.py
@@ -46,7 +46,7 @@ class MemoryEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/multiroom.py
+++ b/minigrid/envs/multiroom.py
@@ -52,7 +52,7 @@ class MultiRoomEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/obstructedmaze.py
+++ b/minigrid/envs/obstructedmaze.py
@@ -41,7 +41,7 @@ class ObstructedMazeEnv(RoomGrid):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/putnear.py
+++ b/minigrid/envs/putnear.py
@@ -48,7 +48,7 @@ class PutNearEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/redbluedoors.py
+++ b/minigrid/envs/redbluedoors.py
@@ -42,7 +42,7 @@ class RedBlueDoorEnv(MiniGridEnv):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/unlock.py
+++ b/minigrid/envs/unlock.py
@@ -38,7 +38,7 @@ class UnlockEnv(RoomGrid):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination
			
 
				 
			
--- a/minigrid/envs/unlockpickup.py
+++ b/minigrid/envs/unlockpickup.py
@@ -42,7 +42,7 @@ class UnlockPickupEnv(RoomGrid):
 
				 
			
 
				     ## Rewards
			
 
				 
			
 
				-    A reward of '1' is given for success, and '0' for failure.
			
 
				+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
			
 
				 
			
 
				     ## Termination