浏览代码

Implemented count-based exploration system (intrinsic motivation)

Maxime Chevalier-Boisvert 7 年之前
父节点
当前提交
f2824c7687
共有 3 个文件被更改,包括 55 次插入9 次删除
  1. 2 1
      README.md
  2. 17 8
      gym_minigrid/envs/simple_envs.py
  3. 36 0
      gym_minigrid/wrappers.py

+ 2 - 1
README.md

@@ -78,8 +78,9 @@ The environments listed below are implemented in the [gym_minigrid/envs](/gym_mi
 ### Empty environment
 
 Registered configurations:
-- `MiniGrid-Empty-8x8-v0`
 - `MiniGrid-Empty-6x6-v0`
+- `MiniGrid-Empty-8x8-v0`
+- `MiniGrid-Empty-15x15-v0`
 
 <p align="center">
 <img src="/figures/empty-env.png">

+ 17 - 8
gym_minigrid/envs/simple_envs.py

@@ -7,11 +7,20 @@ class EmptyEnv(MiniGridEnv):
     """
 
     def __init__(self, size=8):
-        super(EmptyEnv, self).__init__(gridSize=size, maxSteps=2 * size)
+        super().__init__(gridSize=size, maxSteps=3 * size)
 
 class EmptyEnv6x6(EmptyEnv):
     def __init__(self):
-        super(EmptyEnv6x6, self).__init__(size=6)
+        super().__init__(size=6)
+
+class EmptyEnv15x15(EmptyEnv):
+    def __init__(self):
+        super().__init__(size=15)
+
+register(
+    id='MiniGrid-Empty-6x6-v0',
+    entry_point='gym_minigrid.envs:EmptyEnv6x6'
+)
 
 register(
     id='MiniGrid-Empty-8x8-v0',
@@ -19,8 +28,8 @@ register(
 )
 
 register(
-    id='MiniGrid-Empty-6x6-v0',
-    entry_point='gym_minigrid.envs:EmptyEnv6x6'
+    id='MiniGrid-Empty-15x15-v0',
+    entry_point='gym_minigrid.envs:EmptyEnv15x15'
 )
 
 class DoorKeyEnv(MiniGridEnv):
@@ -29,10 +38,10 @@ class DoorKeyEnv(MiniGridEnv):
     """
 
     def __init__(self, size=8):
-        super(DoorKeyEnv, self).__init__(gridSize=size, maxSteps=4 * size)
+        super().__init__(gridSize=size, maxSteps=4 * size)
 
     def _genGrid(self, width, height):
-        grid = super(DoorKeyEnv, self)._genGrid(width, height)
+        grid = super()._genGrid(width, height)
         assert width == height
         gridSz = width
 
@@ -43,7 +52,7 @@ class DoorKeyEnv(MiniGridEnv):
 
         # Place a door in the wall
         doorIdx = self._randInt(1, gridSz-2)
-        grid.set(splitIdx, doorIdx, Door('yellow'))
+        grid.set(splitIdx, doorIdx, LockedDoor('yellow'))
 
         # Place a key on the left side
         #keyIdx = self._randInt(1 + gridSz // 2, gridSz-2)
@@ -54,7 +63,7 @@ class DoorKeyEnv(MiniGridEnv):
 
 class DoorKeyEnv16x16(DoorKeyEnv):
     def __init__(self):
-        super(DoorKeyEnv16x16, self).__init__(size=16)
+        super().__init__(size=16)
 
 register(
     id='MiniGrid-DoorKey-8x8-v0',

+ 36 - 0
gym_minigrid/wrappers.py

@@ -0,0 +1,36 @@
+import math
+import gym
+
+class ExplBonus(gym.core.Wrapper):
+
+    """
+    Wrapper which adds an exploration bonus.
+    This is a reward to encourage exploration of less
+    visited (state,action) pairs.
+    """
+
+    def __init__(self, env):
+        super().__init__(env)
+        self.counts = {}
+
+    def _step(self, action):
+
+        obs, reward, done, info = self.env.step(action)
+
+        env = self.unwrapped
+        tup = (env.agentPos, env.agentDir, action)
+
+        # Get the count for this (s,a) pair
+        preCnt = 0
+        if tup in self.counts:
+            preCnt = self.counts[tup]
+
+        # Update the count for this (s,a) pair
+        newCnt = preCnt + 1
+        self.counts[tup] = newCnt
+
+        bonus = 1 / math.sqrt(newCnt)
+
+        reward += bonus
+
+        return obs, reward, done, info