|
@@ -5,11 +5,12 @@ from functools import reduce
|
|
|
import gym
|
|
|
import numpy as np
|
|
|
from gym import spaces
|
|
|
+from gym.core import ObservationWrapper, Wrapper
|
|
|
|
|
|
from gym_minigrid.minigrid import COLOR_TO_IDX, OBJECT_TO_IDX, STATE_TO_IDX, Goal
|
|
|
|
|
|
|
|
|
-class ReseedWrapper(gym.Wrapper):
|
|
|
+class ReseedWrapper(Wrapper):
|
|
|
"""
|
|
|
Wrapper to always regenerate an environment with the same set of seeds.
|
|
|
This can be used to force an environment to always keep the same
|
|
@@ -31,7 +32,7 @@ class ReseedWrapper(gym.Wrapper):
|
|
|
return obs, reward, done, info
|
|
|
|
|
|
|
|
|
-class ActionBonus(gym.Wrapper):
|
|
|
+class ActionBonus(Wrapper):
|
|
|
"""
|
|
|
Wrapper which adds an exploration bonus.
|
|
|
This is a reward to encourage exploration of less
|
|
@@ -66,7 +67,7 @@ class ActionBonus(gym.Wrapper):
|
|
|
return self.env.reset(**kwargs)
|
|
|
|
|
|
|
|
|
-class StateBonus(gym.Wrapper):
|
|
|
+class StateBonus(Wrapper):
|
|
|
"""
|
|
|
Adds an exploration bonus based on which positions
|
|
|
are visited on the grid.
|
|
@@ -102,7 +103,7 @@ class StateBonus(gym.Wrapper):
|
|
|
return self.env.reset(**kwargs)
|
|
|
|
|
|
|
|
|
-class ImgObsWrapper(gym.ObservationWrapper):
|
|
|
+class ImgObsWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Use the image as the only observation output, no language/mission.
|
|
|
"""
|
|
@@ -115,7 +116,7 @@ class ImgObsWrapper(gym.ObservationWrapper):
|
|
|
return obs["image"]
|
|
|
|
|
|
|
|
|
-class OneHotPartialObsWrapper(gym.ObservationWrapper):
|
|
|
+class OneHotPartialObsWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Wrapper to get a one-hot encoding of a partially observable
|
|
|
agent view as observation.
|
|
@@ -155,7 +156,7 @@ class OneHotPartialObsWrapper(gym.ObservationWrapper):
|
|
|
return {**obs, "image": out}
|
|
|
|
|
|
|
|
|
-class RGBImgObsWrapper(gym.ObservationWrapper):
|
|
|
+class RGBImgObsWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Wrapper to use fully observable RGB image as observation,
|
|
|
This can be used to have the agent to solve the gridworld in pixel space.
|
|
@@ -187,7 +188,7 @@ class RGBImgObsWrapper(gym.ObservationWrapper):
|
|
|
return {**obs, "image": rgb_img}
|
|
|
|
|
|
|
|
|
-class RGBImgPartialObsWrapper(gym.ObservationWrapper):
|
|
|
+class RGBImgPartialObsWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Wrapper to use partially observable RGB image as observation.
|
|
|
This can be used to have the agent to solve the gridworld in pixel space.
|
|
@@ -218,7 +219,7 @@ class RGBImgPartialObsWrapper(gym.ObservationWrapper):
|
|
|
return {**obs, "image": rgb_img_partial}
|
|
|
|
|
|
|
|
|
-class FullyObsWrapper(gym.ObservationWrapper):
|
|
|
+class FullyObsWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Fully observable gridworld using a compact grid encoding
|
|
|
"""
|
|
@@ -247,7 +248,7 @@ class FullyObsWrapper(gym.ObservationWrapper):
|
|
|
return {**obs, "image": full_grid}
|
|
|
|
|
|
|
|
|
-class DictObservationSpaceWrapper(gym.ObservationWrapper):
|
|
|
+class DictObservationSpaceWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Transforms the observation space (that has a textual component) to a fully numerical observation space,
|
|
|
where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
|
|
@@ -365,7 +366,7 @@ class DictObservationSpaceWrapper(gym.ObservationWrapper):
|
|
|
return obs
|
|
|
|
|
|
|
|
|
-class FlatObsWrapper(gym.ObservationWrapper):
|
|
|
+class FlatObsWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Encode mission strings using a one-hot scheme,
|
|
|
and combine these with observed images into one flat array
|
|
@@ -424,7 +425,7 @@ class FlatObsWrapper(gym.ObservationWrapper):
|
|
|
return obs
|
|
|
|
|
|
|
|
|
-class ViewSizeWrapper(gym.Wrapper):
|
|
|
+class ViewSizeWrapper(Wrapper):
|
|
|
"""
|
|
|
Wrapper to customize the agent field of view size.
|
|
|
This cannot be used with fully observable wrappers.
|
|
@@ -459,7 +460,7 @@ class ViewSizeWrapper(gym.Wrapper):
|
|
|
return {**obs, "image": image}
|
|
|
|
|
|
|
|
|
-class DirectionObsWrapper(gym.ObservationWrapper):
|
|
|
+class DirectionObsWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Provides the slope/angular direction to the goal with the observations as modeled by (y2 - y2 )/( x2 - x1)
|
|
|
type = {slope , angle}
|
|
@@ -493,7 +494,7 @@ class DirectionObsWrapper(gym.ObservationWrapper):
|
|
|
return obs
|
|
|
|
|
|
|
|
|
-class SymbolicObsWrapper(gym.ObservationWrapper):
|
|
|
+class SymbolicObsWrapper(ObservationWrapper):
|
|
|
"""
|
|
|
Fully observable grid with a symbolic state representation.
|
|
|
The symbol is a triple of (X, Y, IDX), where X and Y are
|