Browse Source

Modified reward range for fetch environment

Maxime Chevalier-Boisvert 7 years ago
parent
commit
2cdc42ac43
2 changed files with 16 additions and 6 deletions
  1. 4 3
      README.md
  2. 12 3
      gym_minigrid/envs/fetch.py

+ 4 - 3
README.md

@@ -79,7 +79,7 @@ describing the objective the agent should reach to get a reward. Using
 dictionaries makes it easy for you to add additional information to observations
 if you need to, without having to force everything into a single tensor.
 If your RL code expects a tensor for observations, please take a look at
-`FlatObsWrapper` in 
+`FlatObsWrapper` in
 [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py).
 
 The partially observable view of the environment uses a compact and efficient
@@ -153,8 +153,8 @@ useful to experiment with curiosity or curriculum learning.
 ### Multi-room environment
 
 Registered configurations:
-- `MiniGrid-MultiRoom-N2-S4-v0`
-- `MiniGrid-MultiRoom-N6-v0`
+- `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
+- `MiniGrid-MultiRoom-N6-v0` (six room)
 
 <p align="center">
 <img src="/figures/multi-room.gif" width=416 height=424>
@@ -170,6 +170,7 @@ rooms and building a curriculum, the environment can be solved.
 
 Registered configurations:
 - `MiniGrid-Fetch-5x5-N2-v0`
+- `MiniGrid-Fetch-6x6-N2-v0`
 - `MiniGrid-Fetch-8x8-N3-v0`
 
 <p align="center">

+ 12 - 3
gym_minigrid/envs/fetch.py

@@ -14,7 +14,7 @@ class FetchEnv(MiniGridEnv):
     ):
         self.numObjs = numObjs
         super().__init__(gridSize=size, maxSteps=5*size)
-        self.reward_range = (-1000, 1000)
+        self.reward_range = (0, 1)
 
     def _genGrid(self, width, height):
         assert width == height
@@ -85,10 +85,10 @@ class FetchEnv(MiniGridEnv):
         if self.carrying:
             if self.carrying.color == self.targetColor and \
                self.carrying.type == self.targetType:
-                reward = 1000 - self.stepCount
+                reward = 1
                 done = True
             else:
-                reward = -1000
+                reward = 0
                 done = True
 
         return obs, reward, done, info
@@ -97,12 +97,21 @@ class FetchEnv5x5N2(FetchEnv):
     def __init__(self):
         super().__init__(size=5, numObjs=2)
 
+class FetchEnv6x6N2(FetchEnv):
+    def __init__(self):
+        super().__init__(size=6, numObjs=2)
+
 register(
     id='MiniGrid-Fetch-5x5-N2-v0',
     entry_point='gym_minigrid.envs:FetchEnv5x5N2'
 )
 
 register(
+    id='MiniGrid-Fetch-6x6-N2-v0',
+    entry_point='gym_minigrid.envs:FetchEnv6x6N2'
+)
+
+register(
     id='MiniGrid-Fetch-8x8-N3-v0',
     entry_point='gym_minigrid.envs:FetchEnv'
 )