Sfoglia il codice sorgente

Added smaller DoorKey environments, exploration bonus wrapper

Maxime Chevalier-Boisvert 7 anni fa
parent
commit
23d1b6b98d
3 ha cambiato i file con 56 aggiunte e 3 eliminazioni
  1. 2 0
      README.md
  2. 19 1
      gym_minigrid/envs/simple_envs.py
  3. 35 2
      gym_minigrid/wrappers.py

+ 2 - 0
README.md

@@ -95,6 +95,8 @@ and with large rooms to experiment with sparse rewards.
 ### Door & key environment
 
 Registered configurations:
+- `MiniGrid-DoorKey-5x5-v0`
+- `MiniGrid-DoorKey-6x6-v0`
 - `MiniGrid-DoorKey-8x8-v0`
 - `MiniGrid-DoorKey-16x16-v0`
 

+ 19 - 1
gym_minigrid/envs/simple_envs.py

@@ -46,7 +46,7 @@ class DoorKeyEnv(MiniGridEnv):
         gridSz = width
 
         # Create a vertical splitting wall
-        splitIdx = self._randInt(2, gridSz-3)
+        splitIdx = self._randInt(2, gridSz-2)
         for i in range(0, gridSz):
             grid.set(splitIdx, i, Wall())
 
@@ -61,11 +61,29 @@ class DoorKeyEnv(MiniGridEnv):
 
         return grid
 
+class DoorKeyEnv5x5(DoorKeyEnv):
+    def __init__(self):
+        super().__init__(size=5)
+
+class DoorKeyEnv6x6(DoorKeyEnv):
+    def __init__(self):
+        super().__init__(size=6)
+
 class DoorKeyEnv16x16(DoorKeyEnv):
     def __init__(self):
         super().__init__(size=16)
 
 register(
+    id='MiniGrid-DoorKey-5x5-v0',
+    entry_point='gym_minigrid.envs:DoorKeyEnv5x5'
+)
+
+register(
+    id='MiniGrid-DoorKey-6x6-v0',
+    entry_point='gym_minigrid.envs:DoorKeyEnv6x6'
+)
+
+register(
     id='MiniGrid-DoorKey-8x8-v0',
     entry_point='gym_minigrid.envs:DoorKeyEnv'
 )

+ 35 - 2
gym_minigrid/wrappers.py

@@ -1,8 +1,7 @@
 import math
 import gym
 
-class ExplBonus(gym.core.Wrapper):
-
+class ActionBonus(gym.core.Wrapper):
     """
     Wrapper which adds an exploration bonus.
     This is a reward to encourage exploration of less
@@ -34,3 +33,37 @@ class ExplBonus(gym.core.Wrapper):
         reward += bonus
 
         return obs, reward, done, info
+
+class StateBonus(gym.core.Wrapper):
+    """
+    Adds an exploration bonus based on which positions
+    are visited on the grid.
+    """
+
+    def __init__(self, env):
+        super().__init__(env)
+        self.counts = {}
+
+    def _step(self, action):
+
+        obs, reward, done, info = self.env.step(action)
+
+        # Tuple based on which we index the counts
+        # We use the position after an update
+        env = self.unwrapped
+        tup = (env.agentPos)
+
+        # Get the count for this key
+        preCnt = 0
+        if tup in self.counts:
+            preCnt = self.counts[tup]
+
+        # Update the count for this key
+        newCnt = preCnt + 1
+        self.counts[tup] = newCnt
+
+        bonus = 1 / math.sqrt(newCnt)
+
+        reward += bonus
+
+        return obs, reward, done, info