8 лет назад · 1a80488ad0
--- a/gym_minigrid/envs/gotodoor.py
+++ b/gym_minigrid/envs/gotodoor.py
@@ -13,7 +13,10 @@ class GoToDoorEnv(MiniGridEnv):
 
				     ):
			
 
				         super().__init__(gridSize=size, maxSteps=10*size)
			
 
				 
			
 
				-        self.reward_range = (-1, self.maxSteps)
			
 
				+        self.reward_range = (-1000, 1000)
			
 
				+
			
 
				+        # Flag determining whether the wait action ends the episode
			
 
				+        self.waitEnds = True
			
 
				 
			
 
				     def _genGrid(self, width, height):
			
 
				         assert width == height
			
@@ -58,8 +61,8 @@ class GoToDoorEnv(MiniGridEnv):
 
				 
			
 
				         # Select a random target door
			
 
				         doorIdx = self._randInt(0, len(doorPos))
			
 
				-        self.targetPos = doorPos[idx]
			
 
				-        self.targetColor = doorColors[idx]
			
 
				+        self.targetPos = doorPos[doorIdx]
			
 
				+        self.targetColor = doorColors[doorIdx]
			
 
				 
			
 
				         # Generate the mission string
			
 
				         self.mission = 'go to the %s door' % self.targetColor
			
@@ -97,12 +100,11 @@ class GoToDoorEnv(MiniGridEnv):
 
				 
			
 
				         # Reward waiting in front of the target door
			
 
				         if action == self.actions.wait:
			
 
				-            if ax == tx and abs(ay - ty) == 1:
			
 
				-                reward = 1
			
 
				-            elif ay == ty and abs(ax - tx) == 1:
			
 
				+            if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
			
 
				                 reward = 1
			
 
				-            #else:
			
 
				-            #    reward = -0.1
			
 
				+            else:
			
 
				+                reward = 0
			
 
				+            done = self.waitEnds
			
 
				 
			
 
				         obs = self._observation(obs)