Browse Source

Modified environments so they all produce observations in a dict

Maxime Chevalier-Boisvert 7 years ago
parent
commit
25fe4664fa

+ 2 - 0
gym_minigrid/envs/doorkey.py

@@ -53,6 +53,8 @@ class DoorKeyEnv(MiniGridEnv):
             grid.set(*pos, Key('yellow'))
             break
 
+        self.mission = "use the key to open the door and then get to the goal"
+
         return grid
 
 class DoorKeyEnv5x5(DoorKeyEnv):

+ 2 - 0
gym_minigrid/envs/empty.py

@@ -24,6 +24,8 @@ class EmptyEnv(MiniGridEnv):
         # Place a goal in the bottom-right corner
         grid.set(width - 2, height - 2, Goal())
 
+        self.mission = "get to the green goal square"
+
         return grid
 
 class EmptyEnv6x6(EmptyEnv):

+ 0 - 23
gym_minigrid/envs/fetch.py

@@ -14,11 +14,6 @@ class FetchEnv(MiniGridEnv):
     ):
         self.numObjs = numObjs
         super().__init__(gridSize=size, maxSteps=5*size)
-
-        self.observation_space = spaces.Dict({
-            'image': self.observation_space
-        })
-
         self.reward_range = (-1000, 1000)
 
     def _genGrid(self, width, height):
@@ -84,22 +79,6 @@ class FetchEnv(MiniGridEnv):
 
         return grid
 
-    def _observation(self, obs):
-        """
-        Encode observations
-        """
-
-        obs = {
-            'image': obs,
-            'mission': self.mission
-        }
-
-        return obs
-
-    def reset(self):
-        obs = MiniGridEnv.reset(self)
-        return self._observation(obs)
-
     def step(self, action):
         obs, reward, done, info = MiniGridEnv.step(self, action)
 
@@ -112,8 +91,6 @@ class FetchEnv(MiniGridEnv):
                 reward = -1000
                 done = True
 
-        obs = self._observation(obs)
-
         return obs, reward, done, info
 
 class FetchEnv5x5N2(FetchEnv):

+ 3 - 20
gym_minigrid/envs/fourroomqa.py

@@ -44,8 +44,6 @@ class FourRoomQAEnv(MiniGridEnv):
         # Actions are discrete integer values
         self.action_space = spaces.Discrete(len(self.actions))
 
-        # TODO: dictionary observation_space, to include question?
-
         self.reward_range = (-1000, 1000)
 
     def _randPos(self, room, border=1):
@@ -179,26 +177,16 @@ class FourRoomQAEnv(MiniGridEnv):
 
         # TODO: identify unique objects
 
-        self.question = "Are there any %ss in the %s room?" % (objType, room.color)
+        self.mission = "Are there any %ss in the %s room?" % (objType, room.color)
         self.answer = "yes" if count > 0 else "no"
 
         # TODO: how many X in the Y room question type
 
-        #print(self.question)
+        #print(self.mission)
         #print(self.answer)
 
         return grid
 
-    def reset(self):
-        obs = MiniGridEnv.reset(self)
-
-        obs = {
-            'image': obs,
-            'question': self.question
-        }
-
-        return obs
-
     def step(self, action):
         if isinstance(action, dict):
             answer = action['answer']
@@ -211,7 +199,7 @@ class FourRoomQAEnv(MiniGridEnv):
             obs, reward, done, info = MiniGridEnv.step(self, self.actions.wait)
             done = True
 
-            if answer == self.answer:
+            if answer == self.mission:
                 reward = 1000 - self.stepCount
             else:
                 reward = -1000
@@ -220,11 +208,6 @@ class FourRoomQAEnv(MiniGridEnv):
             # Let the superclass handle the action
             obs, reward, done, info = MiniGridEnv.step(self, action)
 
-        obs = {
-            'image': obs,
-            'question': self.question
-        }
-
         return obs, reward, done, info
 
 register(

+ 0 - 23
gym_minigrid/envs/gotodoor.py

@@ -14,11 +14,6 @@ class GoToDoorEnv(MiniGridEnv):
     ):
         assert size >= 5
         super().__init__(gridSize=size, maxSteps=10*size)
-
-        self.observation_space = spaces.Dict({
-            'image': self.observation_space
-        })
-
         self.reward_range = (-1, 1)
 
     def _genGrid(self, width, height):
@@ -75,22 +70,6 @@ class GoToDoorEnv(MiniGridEnv):
 
         return grid
 
-    def _observation(self, obs):
-        """
-        Encode observations
-        """
-
-        obs = {
-            'image': obs,
-            'mission': self.mission
-        }
-
-        return obs
-
-    def reset(self):
-        obs = MiniGridEnv.reset(self)
-        return self._observation(obs)
-
     def step(self, action):
         obs, reward, done, info = MiniGridEnv.step(self, action)
 
@@ -107,8 +86,6 @@ class GoToDoorEnv(MiniGridEnv):
                 reward = 1
             done = True
 
-        obs = self._observation(obs)
-
         return obs, reward, done, info
 
 class GoToDoor8x8Env(GoToDoorEnv):

+ 0 - 23
gym_minigrid/envs/gotoobject.py

@@ -14,11 +14,6 @@ class GoToObjectEnv(MiniGridEnv):
     ):
         self.numObjs = numObjs
         super().__init__(gridSize=size, maxSteps=5*size)
-
-        self.observation_space = spaces.Dict({
-            'image': self.observation_space
-        })
-
         self.reward_range = (-1, 1)
 
     def _genGrid(self, width, height):
@@ -83,22 +78,6 @@ class GoToObjectEnv(MiniGridEnv):
 
         return grid
 
-    def _observation(self, obs):
-        """
-        Encode observations
-        """
-
-        obs = {
-            'image': obs,
-            'mission': self.mission
-        }
-
-        return obs
-
-    def reset(self):
-        obs = MiniGridEnv.reset(self)
-        return self._observation(obs)
-
     def step(self, action):
         obs, reward, done, info = MiniGridEnv.step(self, action)
 
@@ -115,8 +94,6 @@ class GoToObjectEnv(MiniGridEnv):
                 reward = 1
             done = True
 
-        obs = self._observation(obs)
-
         return obs, reward, done, info
 
 class GotoEnv8x8N2(GoToObjectEnv):

+ 0 - 17
gym_minigrid/envs/lockedroom.py

@@ -123,25 +123,8 @@ class LockedRoom(MiniGridEnv):
 
         return grid
 
-    def _observation(self, obs):
-        """
-        Encode observations
-        """
-
-        obs = {
-            'image': obs,
-            'mission': self.mission
-        }
-
-        return obs
-
-    def reset(self):
-        obs = MiniGridEnv.reset(self)
-        return self._observation(obs)
-
     def step(self, action):
         obs, reward, done, info = MiniGridEnv.step(self, action)
-        obs = self._observation(obs)
         return obs, reward, done, info
 
 register(

+ 2 - 0
gym_minigrid/envs/multiroom.py

@@ -130,6 +130,8 @@ class MultiRoomEnv(MiniGridEnv):
                 grid.set(*self.goalPos, Goal())
                 break
 
+        self.mission = 'traverse the rooms to get to the goal'
+
         return grid
 
     def _placeRoom(

+ 0 - 23
gym_minigrid/envs/putnear.py

@@ -14,11 +14,6 @@ class PutNearEnv(MiniGridEnv):
     ):
         self.numObjs = numObjs
         super().__init__(gridSize=size, maxSteps=5*size)
-
-        self.observation_space = spaces.Dict({
-            'image': self.observation_space
-        })
-
         self.reward_range = (-1, 1)
 
     def _genGrid(self, width, height):
@@ -99,22 +94,6 @@ class PutNearEnv(MiniGridEnv):
 
         return grid
 
-    def _observation(self, obs):
-        """
-        Encode observations
-        """
-
-        obs = {
-            'image': obs,
-            'mission': self.mission
-        }
-
-        return obs
-
-    def reset(self):
-        obs = MiniGridEnv.reset(self)
-        return self._observation(obs)
-
     def step(self, action):
         preCarrying = self.carrying
 
@@ -138,8 +117,6 @@ class PutNearEnv(MiniGridEnv):
                         reward = 1
                 done = True
 
-        obs = self._observation(obs)
-
         return obs, reward, done, info
 
 class PutNear8x8N3(PutNearEnv):

+ 15 - 2
gym_minigrid/minigrid.py

@@ -527,13 +527,17 @@ class MiniGridEnv(gym.Env):
         # Actions are discrete integer values
         self.action_space = spaces.Discrete(len(self.actions))
 
-        # The observations are RGB images
+        # Observations are dictionaries containing an
+        # encoding of the grid and a textual 'mission' string
         self.observation_space = spaces.Box(
             low=0,
             high=255,
             shape=OBS_ARRAY_SIZE,
             dtype='uint8'
         )
+        self.observation_space = spaces.Dict({
+            'image': self.observation_space
+        })
 
         # Range of possible rewards
         self.reward_range = (-1, 1000)
@@ -745,7 +749,16 @@ class MiniGridEnv(gym.Env):
             grid.set(*agentPos, None)
 
         # Encode the partially observable view into a numpy array
-        obs = grid.encode()
+        image = grid.encode()
+
+        assert hasattr(self, 'mission'), "environments must define a textual mission string"
+
+        # Observations are dictionaries with both an image
+        # and a textual mission string
+        obs = {
+            'image': image,
+            'mission': self.mission
+        }
 
         return obs
 

+ 2 - 2
run_tests.py

@@ -43,7 +43,7 @@ for envName in envList:
         obs, reward, done, info = env.step(action)
 
         # Test observation encode/decode roundtrip
-        img = obs if type(obs) is np.ndarray else obs['image']
+        img = obs['image']
         grid = Grid.decode(img)
         img2 = grid.encode()
         assert np.array_equal(img2, img)
@@ -76,7 +76,7 @@ env.reset()
 for i in range(0, 200):
     action = random.randint(0, env.action_space.n - 1)
     obs, reward, done, info = env.step(action)
-    goalVisible = ('green', 'goal') in Grid.decode(obs)
+    goalVisible = ('green', 'goal') in Grid.decode(obs['image'])
     assert env.agentSees(*goalPos) == goalVisible
     if done:
         env.reset()