فهرست منبع

Fixed issues with dynamic obstacles env

Maxime Chevalier-Boisvert 5 سال پیش
والد
کامیت
97bca6e172
1فایلهای تغییر یافته به همراه24 افزوده شده و 16 حذف شده
  1. 24 16
      gym_minigrid/envs/dynamicobstacles.py

+ 24 - 16
gym_minigrid/envs/dynamicobstacles.py

@@ -12,12 +12,10 @@ class DynamicObstaclesEnv(MiniGridEnv):
             size=8,
             agent_start_pos=(1, 1),
             agent_start_dir=0,
-            n_obstacles=4,
-            show_obstacles=True
+            n_obstacles=4
     ):
         self.agent_start_pos = agent_start_pos
         self.agent_start_dir = agent_start_dir
-        self.show_obstacles = show_obstacles
 
         # Reduce obstacles if there are too many
         if n_obstacles <= size/2 + 1:
@@ -52,11 +50,11 @@ class DynamicObstaclesEnv(MiniGridEnv):
             self.place_agent()
 
         # Place obstacles
-        if self.show_obstacles:
-            self.obstacles = []
-            for i_obst in range(self.n_obstacles):
-                self.obstacles.append(Ball())
-                self.place_obj(self.obstacles[i_obst], max_tries=100)
+        self.obstacles = []
+        for i_obst in range(self.n_obstacles):
+            self.obstacles.append(Ball())
+            self.place_obj(self.obstacles[i_obst], max_tries=100)
+
         self.mission = "get to the green goal square"
 
     def step(self, action):
@@ -64,18 +62,28 @@ class DynamicObstaclesEnv(MiniGridEnv):
         if action >= self.action_space.n:
             action = 0
 
+        # Check if there is an obstacle in front of the agent
+        front_cell = self.grid.get(*self.front_pos)
+        not_clear = front_cell and front_cell.type != 'goal'
+
         obs, reward, done, info = MiniGridEnv.step(self, action)
 
+        # If the agent tries to walk over an obstacle
+        if action == self.actions.forward and not_clear:
+            reward = -1
+            done = True
+            return obs, reward, done, info
+
         # Update obstacle positions
-        if self.show_obstacles:
-            for i_obst in range(len(self.obstacles)):
-                old_pos = self.obstacles[i_obst].cur_pos
-                top = tuple(map(add, old_pos, (-1, -1)))
-                self.grid.set(*old_pos, None)
+        for i_obst in range(len(self.obstacles)):
+            old_pos = self.obstacles[i_obst].cur_pos
+            top = tuple(map(add, old_pos, (-1, -1)))
+
+            try:
                 self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100)
-                if np.array_equal(self.obstacles[i_obst].cur_pos, self.agent_pos):
-                    reward = -1
-                    done = True
+                self.grid.set(*old_pos, None)
+            except:
+                pass
 
         return obs, reward, done, info