7 роки тому · 116bfb2d4b
--- a/gym_minigrid/envs/fourroomqa.py
+++ b/gym_minigrid/envs/fourroomqa.py
@@ -30,14 +30,15 @@ class FourRoomQAEnv(MiniGridEnv):
 
				         right = 1
			
 
				         forward = 2
			
 
				         toggle = 3
			
 
				-        say = 4
			
 
				+        wait = 4
			
 
				+        answer = 5
			
 
				 
			
 
				     def __init__(self, size=16):
			
 
				         assert size >= 10
			
 
				         super(FourRoomQAEnv, self).__init__(gridSize=size, maxSteps=8*size)
			
 
				 
			
 
				         # Action enumeration for this environment
			
 
				-        self.actions = MiniGridEnv.Actions
			
 
				+        self.actions = FourRoomQAEnv.Actions
			
 
				 
			
 
				         # TODO: dictionary action_space, to include answer sentence?
			
 
				         # Actions are discrete integer values
			
@@ -45,6 +46,8 @@ class FourRoomQAEnv(MiniGridEnv):
 
				 
			
 
				         # TODO: dictionary observation_space, to include question?
			
 
				 
			
 
				+        self.reward_range = (-1000, 1000)
			
 
				+
			
 
				     def _randPos(self, room, border=1):
			
 
				         return (
			
 
				             self._randInt(
			
@@ -181,8 +184,8 @@ class FourRoomQAEnv(MiniGridEnv):
 
				 
			
 
				         # TODO: how many X in the Y room question type
			
 
				 
			
 
				-        print(self.question)
			
 
				-        print(self.answer)
			
 
				+        #print(self.question)
			
 
				+        #print(self.answer)
			
 
				 
			
 
				         return grid
			
 
				 
			
@@ -203,12 +206,20 @@ class FourRoomQAEnv(MiniGridEnv):
 
				         else:
			
 
				             answer = ''
			
 
				 
			
 
				-        obs, reward, done, info = MiniGridEnv._step(self, action)
			
 
				-
			
 
				-        if answer == self.answer:
			
 
				-            reward = 1000 - self.stepCount
			
 
				+        if action == self.actions.answer:
			
 
				+            # To the superclass, this action behaves like a noop
			
 
				+            obs, reward, done, info = MiniGridEnv._step(self, self.actions.wait)
			
 
				             done = True
			
 
				 
			
 
				+            if answer == self.answer:
			
 
				+                reward = 1000 - self.stepCount
			
 
				+            else:
			
 
				+                reward = -1000
			
 
				+
			
 
				+        else:
			
 
				+            # Let the superclass handle the action
			
 
				+            obs, reward, done, info = MiniGridEnv._step(self, action)
			
 
				+
			
 
				         obs = {
			
 
				             'image': obs,
			
 
				             'question': self.question
			
--- a/gym_minigrid/envs/gotoobject.py
+++ b/gym_minigrid/envs/gotoobject.py
@@ -14,6 +14,7 @@ class GoToObjectEnv(MiniGridEnv):
 
				     ):
			
 
				         self.numObjs = numObjs
			
 
				         super().__init__(gridSize=size, maxSteps=5*size)
			
 
				+
			
 
				         self.reward_range = (-1000, 1000)
			
 
				 
			
 
				     def _genGrid(self, width, height):
			
@@ -34,12 +35,17 @@ class GoToObjectEnv(MiniGridEnv):
 
				         colors = list(COLORS.keys())
			
 
				 
			
 
				         objs = []
			
 
				+        objPos = []
			
 
				 
			
 
				         # For each object to be generated
			
 
				         for i in range(0, self.numObjs):
			
 
				             objType = self._randElem(types)
			
 
				             objColor = self._randElem(colors)
			
 
				 
			
 
				+            # If this object already exists, try again
			
 
				+            if (objType, objColor) in objs:
			
 
				+                continue
			
 
				+
			
 
				             if objType == 'key':
			
 
				                 obj = Key(objColor)
			
 
				             elif objType == 'ball':
			
@@ -57,12 +63,13 @@ class GoToObjectEnv(MiniGridEnv):
 
				                     grid.set(*pos, obj)
			
 
				                     break
			
 
				 
			
 
				-            objs.append(obj)
			
 
				+            objs.append((objType, objColor))
			
 
				+            objPos.append(pos)
			
 
				 
			
 
				         # Choose a random object to be picked up
			
 
				-        target = objs[self._randInt(0, len(objs))]
			
 
				-        self.targetType = target.type
			
 
				-        self.targetColor = target.color
			
 
				+        objIdx = self._randInt(0, len(objs))
			
 
				+        self.targetType, self.targetColor = objs[objIdx]
			
 
				+        self.targetPos = objPos[objIdx]
			
 
				 
			
 
				         descStr = '%s %s' % (self.targetColor, self.targetType)
			
 
				 
			
@@ -83,6 +90,7 @@ class GoToObjectEnv(MiniGridEnv):
 
				         """
			
 
				 
			
 
				         self.mission = 'go to the %s' % descStr
			
 
				+        #print(self.mission)
			
 
				 
			
 
				         return grid
			
 
				 
			
@@ -106,14 +114,16 @@ class GoToObjectEnv(MiniGridEnv):
 
				     def _step(self, action):
			
 
				         obs, reward, done, info = MiniGridEnv._step(self, action)
			
 
				 
			
 
				-        #if self.carrying:
			
 
				-        #    if self.carrying.color == self.targetColor and \
			
 
				-        #       self.carrying.type == self.targetType:
			
 
				-        #        reward = 1000 - self.stepCount
			
 
				-        #        done = True
			
 
				-        #    else:
			
 
				-        #        reward = -1000
			
 
				-        #        done = True
			
 
				+        ax, ay = self.agentPos
			
 
				+        tx, ty = self.targetPos
			
 
				+
			
 
				+        # Reward being next to the object
			
 
				+        # Double reward waiting next to the object
			
 
				+        if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
			
 
				+            if action == self.actions.wait:
			
 
				+                reward = 2
			
 
				+            else:
			
 
				+                reward = 1
			
 
				 
			
 
				         obs = self._observation(obs)
			
 
				 
			
--- a/gym_minigrid/minigrid.py
+++ b/gym_minigrid/minigrid.py
@@ -497,7 +497,10 @@ class MiniGridEnv(gym.Env):
 
				         left = 0
			
 
				         right = 1
			
 
				         forward = 2
			
 
				+        # Toggle/pick up/activate object
			
 
				         toggle = 3
			
 
				+        # Wait/stay put/do nothing
			
 
				+        wait = 4
			
 
				 
			
 
				     def __init__(self, gridSize=16, maxSteps=100):
			
 
				         # Action enumeration for this environment
			
@@ -699,6 +702,10 @@ class MiniGridEnv(gym.Env):
 
				             elif cell:
			
 
				                 cell.toggle(self, objPos)
			
 
				 
			
 
				+        # Wait/do nothing
			
 
				+        elif action == self.actions.wait:
			
 
				+            pass
			
 
				+
			
 
				         else:
			
 
				             assert False, "unknown action"