Sfoglia il codice sorgente

Made reward_range the same for all environments, rewards are in [0, 1]

Maxime Chevalier-Boisvert 7 anni fa
parent
commit
146fd10741

+ 6 - 4
README.md

@@ -116,10 +116,12 @@ Actions in the basic environment:
 - Toggle (interact with objects)
 - Wait (noop, do nothing)
 
-By default, sparse rewards for reaching a goal square are provided, but you can
-define your own reward function by creating a class derived from MiniGridEnv. Extending
-the environment with new object types or action should be very easy.
-If you wish to do this, you should take a look at the
+By default, sparse rewards are given for reaching a green goal tile. A
+reward of 1 is given for success, and zero for failure. There is also an
+environment-specific time step limit for completing the task.
+You can define your own reward function by creating a class derived
+from `MiniGridEnv`. Extending the environment with new object types or action
+should be very easy. If you wish to do this, you should take a look at the
 [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file.
 
 ## Included Environments

+ 0 - 2
gym_minigrid/envs/fetch.py

@@ -21,8 +21,6 @@ class FetchEnv(MiniGridEnv):
             see_through_walls=True
         )
 
-        self.reward_range = (0, 1)
-
     def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
 

+ 0 - 2
gym_minigrid/envs/gotodoor.py

@@ -20,8 +20,6 @@ class GoToDoorEnv(MiniGridEnv):
             see_through_walls=True
         )
 
-        self.reward_range = (0, 1)
-
     def _gen_grid(self, width, height):
         # Create the grid
         self.grid = Grid(width, height)

+ 0 - 2
gym_minigrid/envs/gotoobject.py

@@ -21,8 +21,6 @@ class GoToObjectEnv(MiniGridEnv):
             see_through_walls=True
         )
 
-        self.reward_range = (0, 1)
-
     def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
 

+ 0 - 1
gym_minigrid/envs/playground_v0.py

@@ -9,7 +9,6 @@ class PlaygroundV0(MiniGridEnv):
 
     def __init__(self):
         super().__init__(grid_size=19, max_steps=100)
-        self.reward_range = (0, 1)
 
     def _gen_grid(self, width, height):
         # Create the grid

+ 0 - 2
gym_minigrid/envs/putnear.py

@@ -21,8 +21,6 @@ class PutNearEnv(MiniGridEnv):
             see_through_walls=True
         )
 
-        self.reward_range = (0, 1)
-
     def _gen_grid(self, width, height):
         self.grid = Grid(width, height)
 

+ 2 - 2
gym_minigrid/minigrid.py

@@ -693,7 +693,7 @@ class MiniGridEnv(gym.Env):
         })
 
         # Range of possible rewards
-        self.reward_range = (-1, 1000)
+        self.reward_range = (0, 1)
 
         # Renderer object used to render the whole grid (full-scale)
         self.grid_render = None
@@ -1067,7 +1067,7 @@ class MiniGridEnv(gym.Env):
                 self.agent_pos = fwd_pos
             if fwd_cell != None and fwd_cell.type == 'goal':
                 done = True
-                reward = 1000 - self.step_count
+                reward = 1
 
         # Pick up an object
         elif action == self.actions.pickup:

+ 1 - 1
pytorch_rl/main.py

@@ -244,7 +244,7 @@ def main():
             end = time.time()
             total_num_steps = (j + 1) * args.num_processes * args.num_steps
             print(
-                "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}".
+                "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.2f}/{:.2f}, min/max reward {:.2f}/{:.2f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}".
                 format(
                     j,
                     total_num_steps,