8 年之前 · 23d1b6b98d
--- a/README.md
+++ b/README.md
@@ -95,6 +95,8 @@ and with large rooms to experiment with sparse rewards.
 
				 ### Door & key environment
			
 
				 
			
 
				 Registered configurations:
			
 
				+- `MiniGrid-DoorKey-5x5-v0`
			
 
				+- `MiniGrid-DoorKey-6x6-v0`
			
 
				 - `MiniGrid-DoorKey-8x8-v0`
			
 
				 - `MiniGrid-DoorKey-16x16-v0`
			
 
				 
			
--- a/gym_minigrid/envs/simple_envs.py
+++ b/gym_minigrid/envs/simple_envs.py
@@ -46,7 +46,7 @@ class DoorKeyEnv(MiniGridEnv):
 
				         gridSz = width
			
 
				 
			
 
				         # Create a vertical splitting wall
			
 
				-        splitIdx = self._randInt(2, gridSz-3)
			
 
				+        splitIdx = self._randInt(2, gridSz-2)
			
 
				         for i in range(0, gridSz):
			
 
				             grid.set(splitIdx, i, Wall())
			
 
				 
			
@@ -61,11 +61,29 @@ class DoorKeyEnv(MiniGridEnv):
 
				 
			
 
				         return grid
			
 
				 
			
 
				+class DoorKeyEnv5x5(DoorKeyEnv):
			
 
				+    def __init__(self):
			
 
				+        super().__init__(size=5)
			
 
				+
			
 
				+class DoorKeyEnv6x6(DoorKeyEnv):
			
 
				+    def __init__(self):
			
 
				+        super().__init__(size=6)
			
 
				+
			
 
				 class DoorKeyEnv16x16(DoorKeyEnv):
			
 
				     def __init__(self):
			
 
				         super().__init__(size=16)
			
 
				 
			
 
				 register(
			
 
				+    id='MiniGrid-DoorKey-5x5-v0',
			
 
				+    entry_point='gym_minigrid.envs:DoorKeyEnv5x5'
			
 
				+)
			
 
				+
			
 
				+register(
			
 
				+    id='MiniGrid-DoorKey-6x6-v0',
			
 
				+    entry_point='gym_minigrid.envs:DoorKeyEnv6x6'
			
 
				+)
			
 
				+
			
 
				+register(
			
 
				     id='MiniGrid-DoorKey-8x8-v0',
			
 
				     entry_point='gym_minigrid.envs:DoorKeyEnv'
			
 
				 )
			
--- a/gym_minigrid/wrappers.py
+++ b/gym_minigrid/wrappers.py
@@ -1,8 +1,7 @@
 
				 import math
			
 
				 import gym
			
 
				 
			
 
				-class ExplBonus(gym.core.Wrapper):
			
 
				-
			
 
				+class ActionBonus(gym.core.Wrapper):
			
 
				     """
			
 
				     Wrapper which adds an exploration bonus.
			
 
				     This is a reward to encourage exploration of less
			
@@ -34,3 +33,37 @@ class ExplBonus(gym.core.Wrapper):
 
				         reward += bonus
			
 
				 
			
 
				         return obs, reward, done, info
			
 
				+
			
 
				+class StateBonus(gym.core.Wrapper):
			
 
				+    """
			
 
				+    Adds an exploration bonus based on which positions
			
 
				+    are visited on the grid.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, env):
			
 
				+        super().__init__(env)
			
 
				+        self.counts = {}
			
 
				+
			
 
				+    def _step(self, action):
			
 
				+
			
 
				+        obs, reward, done, info = self.env.step(action)
			
 
				+
			
 
				+        # Tuple based on which we index the counts
			
 
				+        # We use the position after an update
			
 
				+        env = self.unwrapped
			
 
				+        tup = (env.agentPos)
			
 
				+
			
 
				+        # Get the count for this key
			
 
				+        preCnt = 0
			
 
				+        if tup in self.counts:
			
 
				+            preCnt = self.counts[tup]
			
 
				+
			
 
				+        # Update the count for this key
			
 
				+        newCnt = preCnt + 1
			
 
				+        self.counts[tup] = newCnt
			
 
				+
			
 
				+        bonus = 1 / math.sqrt(newCnt)
			
 
				+
			
 
				+        reward += bonus
			
 
				+
			
 
				+        return obs, reward, done, info