8 лет назад · 25fe4664fa
--- a/gym_minigrid/envs/doorkey.py
+++ b/gym_minigrid/envs/doorkey.py
@@ -53,6 +53,8 @@ class DoorKeyEnv(MiniGridEnv):
 
				             grid.set(*pos, Key('yellow'))
			
 
				             break
			
 
				 
			
 
				+        self.mission = "use the key to open the door and then get to the goal"
			
 
				+
			
 
				         return grid
			
 
				 
			
 
				 class DoorKeyEnv5x5(DoorKeyEnv):
			
--- a/gym_minigrid/envs/empty.py
+++ b/gym_minigrid/envs/empty.py
@@ -24,6 +24,8 @@ class EmptyEnv(MiniGridEnv):
 
				         # Place a goal in the bottom-right corner
			
 
				         grid.set(width - 2, height - 2, Goal())
			
 
				 
			
 
				+        self.mission = "get to the green goal square"
			
 
				+
			
 
				         return grid
			
 
				 
			
 
				 class EmptyEnv6x6(EmptyEnv):
			
--- a/gym_minigrid/envs/fetch.py
+++ b/gym_minigrid/envs/fetch.py
@@ -14,11 +14,6 @@ class FetchEnv(MiniGridEnv):
 
				     ):
			
 
				         self.numObjs = numObjs
			
 
				         super().__init__(gridSize=size, maxSteps=5*size)
			
 
				-
			
 
				-        self.observation_space = spaces.Dict({
			
 
				-            'image': self.observation_space
			
 
				-        })
			
 
				-
			
 
				         self.reward_range = (-1000, 1000)
			
 
				 
			
 
				     def _genGrid(self, width, height):
			
@@ -84,22 +79,6 @@ class FetchEnv(MiniGridEnv):
 
				 
			
 
				         return grid
			
 
				 
			
 
				-    def _observation(self, obs):
			
 
				-        """
			
 
				-        Encode observations
			
 
				-        """
			
 
				-
			
 
				-        obs = {
			
 
				-            'image': obs,
			
 
				-            'mission': self.mission
			
 
				-        }
			
 
				-
			
 
				-        return obs
			
 
				-
			
 
				-    def reset(self):
			
 
				-        obs = MiniGridEnv.reset(self)
			
 
				-        return self._observation(obs)
			
 
				-
			
 
				     def step(self, action):
			
 
				         obs, reward, done, info = MiniGridEnv.step(self, action)
			
 
				 
			
@@ -112,8 +91,6 @@ class FetchEnv(MiniGridEnv):
 
				                 reward = -1000
			
 
				                 done = True
			
 
				 
			
 
				-        obs = self._observation(obs)
			
 
				-
			
 
				         return obs, reward, done, info
			
 
				 
			
 
				 class FetchEnv5x5N2(FetchEnv):
			
--- a/gym_minigrid/envs/fourroomqa.py
+++ b/gym_minigrid/envs/fourroomqa.py
@@ -44,8 +44,6 @@ class FourRoomQAEnv(MiniGridEnv):
 
				         # Actions are discrete integer values
			
 
				         self.action_space = spaces.Discrete(len(self.actions))
			
 
				 
			
 
				-        # TODO: dictionary observation_space, to include question?
			
 
				-
			
 
				         self.reward_range = (-1000, 1000)
			
 
				 
			
 
				     def _randPos(self, room, border=1):
			
@@ -179,26 +177,16 @@ class FourRoomQAEnv(MiniGridEnv):
 
				 
			
 
				         # TODO: identify unique objects
			
 
				 
			
 
				-        self.question = "Are there any %ss in the %s room?" % (objType, room.color)
			
 
				+        self.mission = "Are there any %ss in the %s room?" % (objType, room.color)
			
 
				         self.answer = "yes" if count > 0 else "no"
			
 
				 
			
 
				         # TODO: how many X in the Y room question type
			
 
				 
			
 
				-        #print(self.question)
			
 
				+        #print(self.mission)
			
 
				         #print(self.answer)
			
 
				 
			
 
				         return grid
			
 
				 
			
 
				-    def reset(self):
			
 
				-        obs = MiniGridEnv.reset(self)
			
 
				-
			
 
				-        obs = {
			
 
				-            'image': obs,
			
 
				-            'question': self.question
			
 
				-        }
			
 
				-
			
 
				-        return obs
			
 
				-
			
 
				     def step(self, action):
			
 
				         if isinstance(action, dict):
			
 
				             answer = action['answer']
			
@@ -211,7 +199,7 @@ class FourRoomQAEnv(MiniGridEnv):
 
				             obs, reward, done, info = MiniGridEnv.step(self, self.actions.wait)
			
 
				             done = True
			
 
				 
			
 
				-            if answer == self.answer:
			
 
				+            if answer == self.mission:
			
 
				                 reward = 1000 - self.stepCount
			
 
				             else:
			
 
				                 reward = -1000
			
@@ -220,11 +208,6 @@ class FourRoomQAEnv(MiniGridEnv):
 
				             # Let the superclass handle the action
			
 
				             obs, reward, done, info = MiniGridEnv.step(self, action)
			
 
				 
			
 
				-        obs = {
			
 
				-            'image': obs,
			
 
				-            'question': self.question
			
 
				-        }
			
 
				-
			
 
				         return obs, reward, done, info
			
 
				 
			
 
				 register(
			
--- a/gym_minigrid/envs/gotodoor.py
+++ b/gym_minigrid/envs/gotodoor.py
@@ -14,11 +14,6 @@ class GoToDoorEnv(MiniGridEnv):
 
				     ):
			
 
				         assert size >= 5
			
 
				         super().__init__(gridSize=size, maxSteps=10*size)
			
 
				-
			
 
				-        self.observation_space = spaces.Dict({
			
 
				-            'image': self.observation_space
			
 
				-        })
			
 
				-
			
 
				         self.reward_range = (-1, 1)
			
 
				 
			
 
				     def _genGrid(self, width, height):
			
@@ -75,22 +70,6 @@ class GoToDoorEnv(MiniGridEnv):
 
				 
			
 
				         return grid
			
 
				 
			
 
				-    def _observation(self, obs):
			
 
				-        """
			
 
				-        Encode observations
			
 
				-        """
			
 
				-
			
 
				-        obs = {
			
 
				-            'image': obs,
			
 
				-            'mission': self.mission
			
 
				-        }
			
 
				-
			
 
				-        return obs
			
 
				-
			
 
				-    def reset(self):
			
 
				-        obs = MiniGridEnv.reset(self)
			
 
				-        return self._observation(obs)
			
 
				-
			
 
				     def step(self, action):
			
 
				         obs, reward, done, info = MiniGridEnv.step(self, action)
			
 
				 
			
@@ -107,8 +86,6 @@ class GoToDoorEnv(MiniGridEnv):
 
				                 reward = 1
			
 
				             done = True
			
 
				 
			
 
				-        obs = self._observation(obs)
			
 
				-
			
 
				         return obs, reward, done, info
			
 
				 
			
 
				 class GoToDoor8x8Env(GoToDoorEnv):
			
--- a/gym_minigrid/envs/gotoobject.py
+++ b/gym_minigrid/envs/gotoobject.py
@@ -14,11 +14,6 @@ class GoToObjectEnv(MiniGridEnv):
 
				     ):
			
 
				         self.numObjs = numObjs
			
 
				         super().__init__(gridSize=size, maxSteps=5*size)
			
 
				-
			
 
				-        self.observation_space = spaces.Dict({
			
 
				-            'image': self.observation_space
			
 
				-        })
			
 
				-
			
 
				         self.reward_range = (-1, 1)
			
 
				 
			
 
				     def _genGrid(self, width, height):
			
@@ -83,22 +78,6 @@ class GoToObjectEnv(MiniGridEnv):
 
				 
			
 
				         return grid
			
 
				 
			
 
				-    def _observation(self, obs):
			
 
				-        """
			
 
				-        Encode observations
			
 
				-        """
			
 
				-
			
 
				-        obs = {
			
 
				-            'image': obs,
			
 
				-            'mission': self.mission
			
 
				-        }
			
 
				-
			
 
				-        return obs
			
 
				-
			
 
				-    def reset(self):
			
 
				-        obs = MiniGridEnv.reset(self)
			
 
				-        return self._observation(obs)
			
 
				-
			
 
				     def step(self, action):
			
 
				         obs, reward, done, info = MiniGridEnv.step(self, action)
			
 
				 
			
@@ -115,8 +94,6 @@ class GoToObjectEnv(MiniGridEnv):
 
				                 reward = 1
			
 
				             done = True
			
 
				 
			
 
				-        obs = self._observation(obs)
			
 
				-
			
 
				         return obs, reward, done, info
			
 
				 
			
 
				 class GotoEnv8x8N2(GoToObjectEnv):
			
--- a/gym_minigrid/envs/lockedroom.py
+++ b/gym_minigrid/envs/lockedroom.py
@@ -123,25 +123,8 @@ class LockedRoom(MiniGridEnv):
 
				 
			
 
				         return grid
			
 
				 
			
 
				-    def _observation(self, obs):
			
 
				-        """
			
 
				-        Encode observations
			
 
				-        """
			
 
				-
			
 
				-        obs = {
			
 
				-            'image': obs,
			
 
				-            'mission': self.mission
			
 
				-        }
			
 
				-
			
 
				-        return obs
			
 
				-
			
 
				-    def reset(self):
			
 
				-        obs = MiniGridEnv.reset(self)
			
 
				-        return self._observation(obs)
			
 
				-
			
 
				     def step(self, action):
			
 
				         obs, reward, done, info = MiniGridEnv.step(self, action)
			
 
				-        obs = self._observation(obs)
			
 
				         return obs, reward, done, info
			
 
				 
			
 
				 register(
			
--- a/gym_minigrid/envs/multiroom.py
+++ b/gym_minigrid/envs/multiroom.py
@@ -130,6 +130,8 @@ class MultiRoomEnv(MiniGridEnv):
 
				                 grid.set(*self.goalPos, Goal())
			
 
				                 break
			
 
				 
			
 
				+        self.mission = 'traverse the rooms to get to the goal'
			
 
				+
			
 
				         return grid
			
 
				 
			
 
				     def _placeRoom(
			
--- a/gym_minigrid/envs/putnear.py
+++ b/gym_minigrid/envs/putnear.py
@@ -14,11 +14,6 @@ class PutNearEnv(MiniGridEnv):
 
				     ):
			
 
				         self.numObjs = numObjs
			
 
				         super().__init__(gridSize=size, maxSteps=5*size)
			
 
				-
			
 
				-        self.observation_space = spaces.Dict({
			
 
				-            'image': self.observation_space
			
 
				-        })
			
 
				-
			
 
				         self.reward_range = (-1, 1)
			
 
				 
			
 
				     def _genGrid(self, width, height):
			
@@ -99,22 +94,6 @@ class PutNearEnv(MiniGridEnv):
 
				 
			
 
				         return grid
			
 
				 
			
 
				-    def _observation(self, obs):
			
 
				-        """
			
 
				-        Encode observations
			
 
				-        """
			
 
				-
			
 
				-        obs = {
			
 
				-            'image': obs,
			
 
				-            'mission': self.mission
			
 
				-        }
			
 
				-
			
 
				-        return obs
			
 
				-
			
 
				-    def reset(self):
			
 
				-        obs = MiniGridEnv.reset(self)
			
 
				-        return self._observation(obs)
			
 
				-
			
 
				     def step(self, action):
			
 
				         preCarrying = self.carrying
			
 
				 
			
@@ -138,8 +117,6 @@ class PutNearEnv(MiniGridEnv):
 
				                         reward = 1
			
 
				                 done = True
			
 
				 
			
 
				-        obs = self._observation(obs)
			
 
				-
			
 
				         return obs, reward, done, info
			
 
				 
			
 
				 class PutNear8x8N3(PutNearEnv):
			
--- a/gym_minigrid/minigrid.py
+++ b/gym_minigrid/minigrid.py
@@ -527,13 +527,17 @@ class MiniGridEnv(gym.Env):
 
				         # Actions are discrete integer values
			
 
				         self.action_space = spaces.Discrete(len(self.actions))
			
 
				 
			
 
				-        # The observations are RGB images
			
 
				+        # Observations are dictionaries containing an
			
 
				+        # encoding of the grid and a textual 'mission' string
			
 
				         self.observation_space = spaces.Box(
			
 
				             low=0,
			
 
				             high=255,
			
 
				             shape=OBS_ARRAY_SIZE,
			
 
				             dtype='uint8'
			
 
				         )
			
 
				+        self.observation_space = spaces.Dict({
			
 
				+            'image': self.observation_space
			
 
				+        })
			
 
				 
			
 
				         # Range of possible rewards
			
 
				         self.reward_range = (-1, 1000)
			
@@ -745,7 +749,16 @@ class MiniGridEnv(gym.Env):
 
				             grid.set(*agentPos, None)
			
 
				 
			
 
				         # Encode the partially observable view into a numpy array
			
 
				-        obs = grid.encode()
			
 
				+        image = grid.encode()
			
 
				+
			
 
				+        assert hasattr(self, 'mission'), "environments must define a textual mission string"
			
 
				+
			
 
				+        # Observations are dictionaries with both an image
			
 
				+        # and a textual mission string
			
 
				+        obs = {
			
 
				+            'image': image,
			
 
				+            'mission': self.mission
			
 
				+        }
			
 
				 
			
 
				         return obs
			
 
				 
			
--- a/run_tests.py
+++ b/run_tests.py
@@ -43,7 +43,7 @@ for envName in envList:
 
				         obs, reward, done, info = env.step(action)
			
 
				 
			
 
				         # Test observation encode/decode roundtrip
			
 
				-        img = obs if type(obs) is np.ndarray else obs['image']
			
 
				+        img = obs['image']
			
 
				         grid = Grid.decode(img)
			
 
				         img2 = grid.encode()
			
 
				         assert np.array_equal(img2, img)
			
@@ -76,7 +76,7 @@ env.reset()
 
				 for i in range(0, 200):
			
 
				     action = random.randint(0, env.action_space.n - 1)
			
 
				     obs, reward, done, info = env.step(action)
			
 
				-    goalVisible = ('green', 'goal') in Grid.decode(obs)
			
 
				+    goalVisible = ('green', 'goal') in Grid.decode(obs['image'])
			
 
				     assert env.agentSees(*goalPos) == goalVisible
			
 
				     if done:
			
 
				         env.reset()