瀏覽代碼

Added reward penalty based on number of time steps taken

Maxime Chevalier-Boisvert 7 年之前
父節點
當前提交
c99822121e

+ 2 - 2
gym_minigrid/envs/fetch.py

@@ -16,7 +16,7 @@ class FetchEnv(MiniGridEnv):
 
         super().__init__(
             grid_size=size,
-            max_steps=5*size,
+            max_steps=5*size**2,
             # Set this to True for maximum speed
             see_through_walls=True
         )
@@ -77,7 +77,7 @@ class FetchEnv(MiniGridEnv):
         if self.carrying:
             if self.carrying.color == self.targetColor and \
                self.carrying.type == self.targetType:
-                reward = 1
+                reward = self._reward()
                 done = True
             else:
                 reward = 0

+ 2 - 2
gym_minigrid/envs/gotodoor.py

@@ -15,7 +15,7 @@ class GoToDoorEnv(MiniGridEnv):
 
         super().__init__(
             grid_size=size,
-            max_steps=5*size,
+            max_steps=5*size**2,
             # Set this to True for maximum speed
             see_through_walls=True
         )
@@ -75,7 +75,7 @@ class GoToDoorEnv(MiniGridEnv):
         # Reward performing done action in front of the target door
         if action == self.actions.done:
             if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
-                reward = 1
+                reward = self._reward()
             done = True
 
         return obs, reward, done, info

+ 2 - 2
gym_minigrid/envs/gotoobject.py

@@ -16,7 +16,7 @@ class GoToObjectEnv(MiniGridEnv):
 
         super().__init__(
             grid_size=size,
-            max_steps=5*size,
+            max_steps=5*size**2,
             # Set this to True for maximum speed
             see_through_walls=True
         )
@@ -78,7 +78,7 @@ class GoToObjectEnv(MiniGridEnv):
         # Reward performing the done action next to the target object
         if action == self.actions.done:
             if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
-                reward = 1
+                reward = self._reward()
             done = True
 
         return obs, reward, done, info

+ 1 - 1
gym_minigrid/envs/putnear.py

@@ -106,7 +106,7 @@ class PutNearEnv(MiniGridEnv):
         if action == self.actions.drop and preCarrying:
             if self.grid.get(ox, oy) is preCarrying:
                 if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
-                    reward = 1
+                    reward = self._reward()
             done = True
 
         return obs, reward, done, info

+ 1 - 1
gym_minigrid/envs/redbluedoors.py

@@ -60,7 +60,7 @@ class RedBlueDoorEnv(MiniGridEnv):
             self.resolution_state += 1
         elif self.resolution_state == 1 and blue_door_opened:
             self.resolution_state += 1
-            reward = 1
+            reward = self._reward()
             done = True
 
         return obs, reward, done, info

+ 8 - 1
gym_minigrid/minigrid.py

@@ -785,6 +785,13 @@ class MiniGridEnv(gym.Env):
     def _gen_grid(self, width, height):
         assert False, "_gen_grid needs to be implemented by each environment"
 
+    def _reward(self):
+        """
+        Compute the reward to be given upon success
+        """
+
+        return 1 - 0.5 * (self.step_count / self.max_steps)
+
     def _rand_int(self, low, high):
         """
         Generate random integer in [low,high[
@@ -1014,7 +1021,7 @@ class MiniGridEnv(gym.Env):
                 self.agent_pos = fwd_pos
             if fwd_cell != None and fwd_cell.type == 'goal':
                 done = True
-                reward = 1
+                reward = self._reward()
 
         # Pick up an object
         elif action == self.actions.pickup:

+ 1 - 1
standalone.py

@@ -67,7 +67,7 @@ def main():
 
         obs, reward, done, info = env.step(action)
 
-        print('step=%s, reward=%s' % (env.step_count, reward))
+        print('step=%s, reward=%.2f' % (env.step_count, reward))
 
         if done:
             print('done!')