Bladeren bron

Completed implementation of goto env

Maxime Chevalier-Boisvert 7 jaren geleden
bovenliggende
commit
116bfb2d4b
3 gewijzigde bestanden met toevoegingen van 48 en 20 verwijderingen
  1. 19 8
      gym_minigrid/envs/fourroomqa.py
  2. 22 12
      gym_minigrid/envs/gotoobject.py
  3. 7 0
      gym_minigrid/minigrid.py

+ 19 - 8
gym_minigrid/envs/fourroomqa.py

@@ -30,14 +30,15 @@ class FourRoomQAEnv(MiniGridEnv):
         right = 1
         forward = 2
         toggle = 3
-        say = 4
+        wait = 4
+        answer = 5
 
     def __init__(self, size=16):
         assert size >= 10
         super(FourRoomQAEnv, self).__init__(gridSize=size, maxSteps=8*size)
 
         # Action enumeration for this environment
-        self.actions = MiniGridEnv.Actions
+        self.actions = FourRoomQAEnv.Actions
 
         # TODO: dictionary action_space, to include answer sentence?
         # Actions are discrete integer values
@@ -45,6 +46,8 @@ class FourRoomQAEnv(MiniGridEnv):
 
         # TODO: dictionary observation_space, to include question?
 
+        self.reward_range = (-1000, 1000)
+
     def _randPos(self, room, border=1):
         return (
             self._randInt(
@@ -181,8 +184,8 @@ class FourRoomQAEnv(MiniGridEnv):
 
         # TODO: how many X in the Y room question type
 
-        print(self.question)
-        print(self.answer)
+        #print(self.question)
+        #print(self.answer)
 
         return grid
 
@@ -203,12 +206,20 @@ class FourRoomQAEnv(MiniGridEnv):
         else:
             answer = ''
 
-        obs, reward, done, info = MiniGridEnv._step(self, action)
-
-        if answer == self.answer:
-            reward = 1000 - self.stepCount
+        if action == self.actions.answer:
+            # To the superclass, this action behaves like a noop
+            obs, reward, done, info = MiniGridEnv._step(self, self.actions.wait)
             done = True
 
+            if answer == self.answer:
+                reward = 1000 - self.stepCount
+            else:
+                reward = -1000
+
+        else:
+            # Let the superclass handle the action
+            obs, reward, done, info = MiniGridEnv._step(self, action)
+
         obs = {
             'image': obs,
             'question': self.question

+ 22 - 12
gym_minigrid/envs/gotoobject.py

@@ -14,6 +14,7 @@ class GoToObjectEnv(MiniGridEnv):
     ):
         self.numObjs = numObjs
         super().__init__(gridSize=size, maxSteps=5*size)
+
         self.reward_range = (-1000, 1000)
 
     def _genGrid(self, width, height):
@@ -34,12 +35,17 @@ class GoToObjectEnv(MiniGridEnv):
         colors = list(COLORS.keys())
 
         objs = []
+        objPos = []
 
         # For each object to be generated
         for i in range(0, self.numObjs):
             objType = self._randElem(types)
             objColor = self._randElem(colors)
 
+            # If this object already exists, try again
+            if (objType, objColor) in objs:
+                continue
+
             if objType == 'key':
                 obj = Key(objColor)
             elif objType == 'ball':
@@ -57,12 +63,13 @@ class GoToObjectEnv(MiniGridEnv):
                     grid.set(*pos, obj)
                     break
 
-            objs.append(obj)
+            objs.append((objType, objColor))
+            objPos.append(pos)
 
         # Choose a random object to be picked up
-        target = objs[self._randInt(0, len(objs))]
-        self.targetType = target.type
-        self.targetColor = target.color
+        objIdx = self._randInt(0, len(objs))
+        self.targetType, self.targetColor = objs[objIdx]
+        self.targetPos = objPos[objIdx]
 
         descStr = '%s %s' % (self.targetColor, self.targetType)
 
@@ -83,6 +90,7 @@ class GoToObjectEnv(MiniGridEnv):
         """
 
         self.mission = 'go to the %s' % descStr
+        #print(self.mission)
 
         return grid
 
@@ -106,14 +114,16 @@ class GoToObjectEnv(MiniGridEnv):
     def _step(self, action):
         obs, reward, done, info = MiniGridEnv._step(self, action)
 
-        #if self.carrying:
-        #    if self.carrying.color == self.targetColor and \
-        #       self.carrying.type == self.targetType:
-        #        reward = 1000 - self.stepCount
-        #        done = True
-        #    else:
-        #        reward = -1000
-        #        done = True
+        ax, ay = self.agentPos
+        tx, ty = self.targetPos
+
+        # Reward being next to the object
+        # Double reward waiting next to the object
+        if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
+            if action == self.actions.wait:
+                reward = 2
+            else:
+                reward = 1
 
         obs = self._observation(obs)
 

+ 7 - 0
gym_minigrid/minigrid.py

@@ -497,7 +497,10 @@ class MiniGridEnv(gym.Env):
         left = 0
         right = 1
         forward = 2
+        # Toggle/pick up/activate object
         toggle = 3
+        # Wait/stay put/do nothing
+        wait = 4
 
     def __init__(self, gridSize=16, maxSteps=100):
         # Action enumeration for this environment
@@ -699,6 +702,10 @@ class MiniGridEnv(gym.Env):
             elif cell:
                 cell.toggle(self, objPos)
 
+        # Wait/do nothing
+        elif action == self.actions.wait:
+            pass
+
         else:
             assert False, "unknown action"