3 سال پیش · d20e9133b3
--- a/gym_minigrid/wrappers.py
+++ b/gym_minigrid/wrappers.py
@@ -7,7 +7,8 @@ import gym
 
																 from gym import error, spaces, utils
															
 
																 from .minigrid import OBJECT_TO_IDX, COLOR_TO_IDX, STATE_TO_IDX, Goal
															
 
																-class ReseedWrapper(gym.core.Wrapper):
															
 
																+
															
 
																+class ReseedWrapper(gym.Wrapper):
															
 
																     """
															
 
																     Wrapper to always regenerate an environment with the same set of seeds.
															
 
																     This can be used to force an environment to always keep the same
															
@@ -28,7 +29,8 @@ class ReseedWrapper(gym.core.Wrapper):
 
																         obs, reward, done, info = self.env.step(action)
															
 
																         return obs, reward, done, info
															
 
																-class ActionBonus(gym.core.Wrapper):
															
 
																+
															
 
																+class ActionBonus(gym.Wrapper):
															
 
																     """
															
 
																     Wrapper which adds an exploration bonus.
															
 
																     This is a reward to encourage exploration of less
															
@@ -62,7 +64,8 @@ class ActionBonus(gym.core.Wrapper):
 
																     def reset(self, **kwargs):
															
 
																         return self.env.reset(**kwargs)
															
 
																-class StateBonus(gym.core.Wrapper):
															
 
																+
															
 
																+class StateBonus(gym.Wrapper):
															
 
																     """
															
 
																     Adds an exploration bonus based on which positions
															
 
																     are visited on the grid.
															
@@ -97,7 +100,8 @@ class StateBonus(gym.core.Wrapper):
 
																     def reset(self, **kwargs):
															
 
																         return self.env.reset(**kwargs)
															
 
																-class ImgObsWrapper(gym.core.ObservationWrapper):
															
 
																+
															
 
																+class ImgObsWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Use the image as the only observation output, no language/mission.
															
 
																     """
															
@@ -109,7 +113,8 @@ class ImgObsWrapper(gym.core.ObservationWrapper):
 
																     def observation(self, obs):
															
 
																         return obs['image']
															
 
																-class OneHotPartialObsWrapper(gym.core.ObservationWrapper):
															
 
																+
															
 
																+class OneHotPartialObsWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Wrapper to get a one-hot encoding of a partially observable
															
 
																     agent view as observation.
															
@@ -131,11 +136,13 @@ class OneHotPartialObsWrapper(gym.core.ObservationWrapper):
 
																             shape=(obs_shape[0], obs_shape[1], num_bits),
															
 
																             dtype='uint8'
															
 
																         )
															
 
																-        self.observation_space = spaces.Dict({**self.observation_space, 'image':new_image_space})
															
 
																+        self.observation_space = spaces.Dict(
															
 
																+            {**self.observation_space, 'image': new_image_space})
															
 
																     def observation(self, obs):
															
 
																         img = obs['image']
															
 
																-        out = np.zeros(self.observation_space.spaces['image'].shape, dtype='uint8')
															
 
																+        out = np.zeros(
															
 
																+            self.observation_space.spaces['image'].shape, dtype='uint8')
															
 
																         for i in range(img.shape[0]):
															
 
																             for j in range(img.shape[1]):
															
@@ -152,7 +159,8 @@ class OneHotPartialObsWrapper(gym.core.ObservationWrapper):
 
																             'image': out
															
 
																         }
															
 
																-class RGBImgObsWrapper(gym.core.ObservationWrapper):
															
 
																+
															
 
																+class RGBImgObsWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Wrapper to use fully observable RGB image as observation,
															
 
																     This can be used to have the agent to solve the gridworld in pixel space.
															
@@ -171,7 +179,8 @@ class RGBImgObsWrapper(gym.core.ObservationWrapper):
 
																             dtype='uint8'
															
 
																         )
															
 
																-        self.observation_space = spaces.Dict({**self.observation_space, 'image':new_image_space})
															
 
																+        self.observation_space = spaces.Dict(
															
 
																+            {**self.observation_space, 'image': new_image_space})
															
 
																     def observation(self, obs):
															
 
																         env = self.unwrapped
															
@@ -188,7 +197,7 @@ class RGBImgObsWrapper(gym.core.ObservationWrapper):
 
																         }
															
 
																-class RGBImgPartialObsWrapper(gym.core.ObservationWrapper):
															
 
																+class RGBImgPartialObsWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Wrapper to use partially observable RGB image as observation.
															
 
																     This can be used to have the agent to solve the gridworld in pixel space.
															
@@ -207,7 +216,8 @@ class RGBImgPartialObsWrapper(gym.core.ObservationWrapper):
 
																             dtype='uint8'
															
 
																         )
															
 
																-        self.observation_space = spaces.Dict({**self.observation_space, 'image':new_image_space})
															
 
																+        self.observation_space = spaces.Dict(
															
 
																+            {**self.observation_space, 'image': new_image_space})
															
 
																     def observation(self, obs):
															
 
																         env = self.unwrapped
															
@@ -222,7 +232,8 @@ class RGBImgPartialObsWrapper(gym.core.ObservationWrapper):
 
																             'image': rgb_img_partial
															
 
																         }
															
 
																-class FullyObsWrapper(gym.core.ObservationWrapper):
															
 
																+
															
 
																+class FullyObsWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Fully observable gridworld using a compact grid encoding
															
 
																     """
															
@@ -237,7 +248,8 @@ class FullyObsWrapper(gym.core.ObservationWrapper):
 
																             dtype='uint8'
															
 
																         )
															
 
																-        self.observation_space = spaces.Dict({**self.observation_space, 'image':new_image_space})
															
 
																+        self.observation_space = spaces.Dict(
															
 
																+            {**self.observation_space, 'image': new_image_space})
															
 
																     def observation(self, obs):
															
 
																         env = self.unwrapped
															
@@ -253,7 +265,8 @@ class FullyObsWrapper(gym.core.ObservationWrapper):
 
																             'image': full_grid
															
 
																         }
															
 
																-class DictObservationSpaceWrapper(gym.core.ObservationWrapper):
															
 
																+
															
 
																+class DictObservationSpaceWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Use a Dict Obsevation Space encoding images, missions, and directions
															
 
																     """
															
@@ -268,7 +281,7 @@ class DictObservationSpaceWrapper(gym.core.ObservationWrapper):
 
																         if word_dict is None:
															
 
																             word_dict = DictObservationSpaceWrapper.get_minigrid_words()
															
 
																-            
															
 
																+
															
 
																         self.max_words_in_mission = max_words_in_mission
															
 
																         self.word_dict = word_dict
															
@@ -282,24 +295,23 @@ class DictObservationSpaceWrapper(gym.core.ObservationWrapper):
 
																             'image': image_observation_space,
															
 
																             'direction': spaces.Discrete(4),
															
 
																             'mission': spaces.MultiDiscrete([len(self.word_dict.keys())]
															
 
																-             * max_words_in_mission)
															
 
																+                                            * max_words_in_mission)
															
 
																         })
															
 
																     @staticmethod
															
 
																     def get_minigrid_words():
															
 
																         colors = ['red', 'green', 'blue', 'yellow', 'purple', 'grey']
															
 
																         objects = ['unseen', 'empty', 'wall', 'floor', 'box', 'key', 'ball',
															
 
																-        'door', 'goal', 'agent', 'lava']
															
 
																+                   'door', 'goal', 'agent', 'lava']
															
 
																         verbs = ['pick', 'avoid', 'get', 'find', 'put',
															
 
																-                'use', 'open', 'go', 'fetch',
															
 
																-                'reach', 'unlock', 'traverse']
															
 
																+                 'use', 'open', 'go', 'fetch',
															
 
																+                 'reach', 'unlock', 'traverse']
															
 
																         extra_words = ['up', 'the', 'a', 'at', ',', 'square',
															
 
																-                    'and', 'then', 'to', 'of', 'rooms', 'near',
															
 
																-                    'opening', 'must', 'you', 'matching', 'end',
															
 
																-                    'hallway', 'object', 'from', 'room']
															
 
																-
															
 
																+                       'and', 'then', 'to', 'of', 'rooms', 'near',
															
 
																+                       'opening', 'must', 'you', 'matching', 'end',
															
 
																+                       'hallway', 'object', 'from', 'room']
															
 
																         all_words = colors + objects + verbs + extra_words
															
 
																         assert len(all_words) == len(set(all_words))
															
@@ -310,22 +322,25 @@ class DictObservationSpaceWrapper(gym.core.ObservationWrapper):
 
																         Convert a string to a list of indices.
															
 
																         """
															
 
																         indices = []
															
 
																-        string = string.replace(',', ' , ')  # adding space before and after commas
															
 
																+        # adding space before and after commas
															
 
																+        string = string.replace(',', ' , ')
															
 
																         for word in string.split():
															
 
																             if word in self.word_dict.keys():
															
 
																                 indices.append(self.word_dict[word] + offset)
															
 
																             else:
															
 
																                 raise ValueError('Unknown word: {}'.format(word))
															
 
																         return indices
															
 
																-        
															
 
																+
															
 
																     def observation(self, obs):
															
 
																         obs['mission'] = self.string_to_indices(obs['mission'])
															
 
																         assert len(obs['mission']) < self.max_words_in_mission
															
 
																-        obs['mission'] += [0] * (self.max_words_in_mission - len(obs['mission']))
															
 
																+        obs['mission'] += [0] * \
															
 
																+            (self.max_words_in_mission - len(obs['mission']))
															
 
																         return obs
															
 
																-        
															
 
																-class FlatObsWrapper(gym.core.ObservationWrapper):
															
 
																+
															
 
																+
															
 
																+class FlatObsWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Encode mission strings using a one-hot scheme,
															
 
																     and combine these with observed images into one flat array
															
@@ -356,10 +371,12 @@ class FlatObsWrapper(gym.core.ObservationWrapper):
 
																         # Cache the last-encoded mission string
															
 
																         if mission != self.cachedStr:
															
 
																-            assert len(mission) <= self.maxStrLen, 'mission string too long ({} chars)'.format(len(mission))
															
 
																+            assert len(mission) <= self.maxStrLen, 'mission string too long ({} chars)'.format(
															
 
																+                len(mission))
															
 
																             mission = mission.lower()
															
 
																-            strArray = np.zeros(shape=(self.maxStrLen, self.numCharCodes), dtype='float32')
															
 
																+            strArray = np.zeros(
															
 
																+                shape=(self.maxStrLen, self.numCharCodes), dtype='float32')
															
 
																             for idx, ch in enumerate(mission):
															
 
																                 if ch >= 'a' and ch <= 'z':
															
@@ -376,7 +393,8 @@ class FlatObsWrapper(gym.core.ObservationWrapper):
 
																         return obs
															
 
																-class ViewSizeWrapper(gym.core.Wrapper):
															
 
																+
															
 
																+class ViewSizeWrapper(gym.Wrapper):
															
 
																     """
															
 
																     Wrapper to customize the agent field of view size.
															
 
																     This cannot be used with fully observable wrappers.
															
@@ -399,7 +417,8 @@ class ViewSizeWrapper(gym.core.Wrapper):
 
																         )
															
 
																         # Override the environment's observation spaceexit
															
 
																-        self.observation_space = spaces.Dict({**self.observation_space, 'image':new_image_space})
															
 
																+        self.observation_space = spaces.Dict(
															
 
																+            {**self.observation_space, 'image': new_image_space})
															
 
																     def observation(self, obs):
															
 
																         env = self.unwrapped
															
@@ -409,18 +428,19 @@ class ViewSizeWrapper(gym.core.Wrapper):
 
																         # Encode the partially observable view into a numpy array
															
 
																         image = grid.encode(vis_mask)
															
 
																-
															
 
																         return {
															
 
																             **obs,
															
 
																             'image': image
															
 
																         }
															
 
																-class DirectionObsWrapper(gym.core.ObservationWrapper):
															
 
																+
															
 
																+class DirectionObsWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Provides the slope/angular direction to the goal with the observations as modeled by (y2 - y2 )/( x2 - x1)
															
 
																     type = {slope , angle}
															
 
																     """
															
 
																-    def __init__(self, env,type='slope'):
															
 
																+
															
 
																+    def __init__(self, env, type='slope'):
															
 
																         super().__init__(env)
															
 
																         self.goal_position = None
															
 
																         self.type = type
															
@@ -428,17 +448,23 @@ class DirectionObsWrapper(gym.core.ObservationWrapper):
 
																     def reset(self):
															
 
																         obs = self.env.reset()
															
 
																         if not self.goal_position:
															
 
																-            self.goal_position = [x for x,y in enumerate(self.grid.grid) if isinstance(y,(Goal) ) ]
															
 
																-            if len(self.goal_position) >= 1: # in case there are multiple goals , needs to be handled for other env types
															
 
																-                self.goal_position = (int(self.goal_position[0]/self.height) , self.goal_position[0]%self.width)
															
 
																+            self.goal_position = [x for x, y in enumerate(
															
 
																+                self.grid.grid) if isinstance(y, (Goal))]
															
 
																+            # in case there are multiple goals , needs to be handled for other env types
															
 
																+            if len(self.goal_position) >= 1:
															
 
																+                self.goal_position = (
															
 
																+                    int(self.goal_position[0]/self.height), self.goal_position[0] % self.width)
															
 
																         return obs
															
 
																     def observation(self, obs):
															
 
																-        slope = np.divide( self.goal_position[1] - self.agent_pos[1] ,  self.goal_position[0] - self.agent_pos[0])
															
 
																-        obs['goal_direction'] = np.arctan( slope ) if self.type == 'angle' else slope
															
 
																+        slope = np.divide(
															
 
																+            self.goal_position[1] - self.agent_pos[1],  self.goal_position[0] - self.agent_pos[0])
															
 
																+        obs['goal_direction'] = np.arctan(
															
 
																+            slope) if self.type == 'angle' else slope
															
 
																         return obs
															
 
																-class SymbolicObsWrapper(gym.core.ObservationWrapper):
															
 
																+
															
 
																+class SymbolicObsWrapper(gym.ObservationWrapper):
															
 
																     """
															
 
																     Fully observable grid with a symbolic state representation.
															
 
																     The symbol is a triple of (X, Y, IDX), where X and Y are
															
@@ -454,7 +480,8 @@ class SymbolicObsWrapper(gym.core.ObservationWrapper):
 
																             shape=(self.env.width, self.env.height, 3),  # number of cells
															
 
																             dtype="uint8",
															
 
																         )
															
 
																-        self.observation_space = spaces.Dict({**self.observation_space, 'image':new_image_space})
															
 
																+        self.observation_space = spaces.Dict(
															
 
																+            {**self.observation_space, 'image': new_image_space})
															
 
																     def observation(self, obs):
															
 
																         objects = np.array(