8 лет назад · 2cdc42ac43
--- a/README.md
+++ b/README.md
@@ -79,7 +79,7 @@ describing the objective the agent should reach to get a reward. Using
 
				 dictionaries makes it easy for you to add additional information to observations
			
 
				 if you need to, without having to force everything into a single tensor.
			
 
				 If your RL code expects a tensor for observations, please take a look at
			
 
				-`FlatObsWrapper` in 
			
 
				+`FlatObsWrapper` in
			
 
				 [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py).
			
 
				 
			
 
				 The partially observable view of the environment uses a compact and efficient
			
@@ -153,8 +153,8 @@ useful to experiment with curiosity or curriculum learning.
 
				 ### Multi-room environment
			
 
				 
			
 
				 Registered configurations:
			
 
				-- `MiniGrid-MultiRoom-N2-S4-v0`
			
 
				-- `MiniGrid-MultiRoom-N6-v0`
			
 
				+- `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
			
 
				+- `MiniGrid-MultiRoom-N6-v0` (six room)
			
 
				 
			
 
				 <p align="center">
			
 
				 <img src="/figures/multi-room.gif" width=416 height=424>
			
@@ -170,6 +170,7 @@ rooms and building a curriculum, the environment can be solved.
 
				 
			
 
				 Registered configurations:
			
 
				 - `MiniGrid-Fetch-5x5-N2-v0`
			
 
				+- `MiniGrid-Fetch-6x6-N2-v0`
			
 
				 - `MiniGrid-Fetch-8x8-N3-v0`
			
 
				 
			
 
				 <p align="center">
			
--- a/gym_minigrid/envs/fetch.py
+++ b/gym_minigrid/envs/fetch.py
@@ -14,7 +14,7 @@ class FetchEnv(MiniGridEnv):
 
				     ):
			
 
				         self.numObjs = numObjs
			
 
				         super().__init__(gridSize=size, maxSteps=5*size)
			
 
				-        self.reward_range = (-1000, 1000)
			
 
				+        self.reward_range = (0, 1)
			
 
				 
			
 
				     def _genGrid(self, width, height):
			
 
				         assert width == height
			
@@ -85,10 +85,10 @@ class FetchEnv(MiniGridEnv):
 
				         if self.carrying:
			
 
				             if self.carrying.color == self.targetColor and \
			
 
				                self.carrying.type == self.targetType:
			
 
				-                reward = 1000 - self.stepCount
			
 
				+                reward = 1
			
 
				                 done = True
			
 
				             else:
			
 
				-                reward = -1000
			
 
				+                reward = 0
			
 
				                 done = True
			
 
				 
			
 
				         return obs, reward, done, info
			
@@ -97,12 +97,21 @@ class FetchEnv5x5N2(FetchEnv):
 
				     def __init__(self):
			
 
				         super().__init__(size=5, numObjs=2)
			
 
				 
			
 
				+class FetchEnv6x6N2(FetchEnv):
			
 
				+    def __init__(self):
			
 
				+        super().__init__(size=6, numObjs=2)
			
 
				+
			
 
				 register(
			
 
				     id='MiniGrid-Fetch-5x5-N2-v0',
			
 
				     entry_point='gym_minigrid.envs:FetchEnv5x5N2'
			
 
				 )
			
 
				 
			
 
				 register(
			
 
				+    id='MiniGrid-Fetch-6x6-N2-v0',
			
 
				+    entry_point='gym_minigrid.envs:FetchEnv6x6N2'
			
 
				+)
			
 
				+
			
 
				+register(
			
 
				     id='MiniGrid-Fetch-8x8-N3-v0',
			
 
				     entry_point='gym_minigrid.envs:FetchEnv'
			
 
				 )