| 
															
																@@ -12,12 +12,10 @@ class DynamicObstaclesEnv(MiniGridEnv): 
															 | 
														
													
												
													
														
															| 
															 | 
															
																             size=8, 
															 | 
															
															 | 
															
																             size=8, 
															 | 
														
													
												
													
														
															| 
															 | 
															
																             agent_start_pos=(1, 1), 
															 | 
															
															 | 
															
																             agent_start_pos=(1, 1), 
															 | 
														
													
												
													
														
															| 
															 | 
															
																             agent_start_dir=0, 
															 | 
															
															 | 
															
																             agent_start_dir=0, 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-            n_obstacles=4, 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-            show_obstacles=True 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            n_obstacles=4 
															 | 
														
													
												
													
														
															| 
															 | 
															
																     ): 
															 | 
															
															 | 
															
																     ): 
															 | 
														
													
												
													
														
															| 
															 | 
															
																         self.agent_start_pos = agent_start_pos 
															 | 
															
															 | 
															
																         self.agent_start_pos = agent_start_pos 
															 | 
														
													
												
													
														
															| 
															 | 
															
																         self.agent_start_dir = agent_start_dir 
															 | 
															
															 | 
															
																         self.agent_start_dir = agent_start_dir 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-        self.show_obstacles = show_obstacles 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																  
															 | 
															
															 | 
															
																  
															 | 
														
													
												
													
														
															| 
															 | 
															
																         # Reduce obstacles if there are too many 
															 | 
															
															 | 
															
																         # Reduce obstacles if there are too many 
															 | 
														
													
												
													
														
															| 
															 | 
															
																         if n_obstacles <= size/2 + 1: 
															 | 
															
															 | 
															
																         if n_obstacles <= size/2 + 1: 
															 | 
														
													
												
											
												
													
														
															 | 
															
																@@ -52,11 +50,11 @@ class DynamicObstaclesEnv(MiniGridEnv): 
															 | 
														
													
												
													
														
															| 
															 | 
															
																             self.place_agent() 
															 | 
															
															 | 
															
																             self.place_agent() 
															 | 
														
													
												
													
														
															| 
															 | 
															
																  
															 | 
															
															 | 
															
																  
															 | 
														
													
												
													
														
															| 
															 | 
															
																         # Place obstacles 
															 | 
															
															 | 
															
																         # Place obstacles 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-        if self.show_obstacles: 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-            self.obstacles = [] 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-            for i_obst in range(self.n_obstacles): 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-                self.obstacles.append(Ball()) 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-                self.place_obj(self.obstacles[i_obst], max_tries=100) 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+        self.obstacles = [] 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+        for i_obst in range(self.n_obstacles): 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            self.obstacles.append(Ball()) 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            self.place_obj(self.obstacles[i_obst], max_tries=100) 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+ 
															 | 
														
													
												
													
														
															| 
															 | 
															
																         self.mission = "get to the green goal square" 
															 | 
															
															 | 
															
																         self.mission = "get to the green goal square" 
															 | 
														
													
												
													
														
															| 
															 | 
															
																  
															 | 
															
															 | 
															
																  
															 | 
														
													
												
													
														
															| 
															 | 
															
																     def step(self, action): 
															 | 
															
															 | 
															
																     def step(self, action): 
															 | 
														
													
												
											
												
													
														
															 | 
															
																@@ -64,18 +62,28 @@ class DynamicObstaclesEnv(MiniGridEnv): 
															 | 
														
													
												
													
														
															| 
															 | 
															
																         if action >= self.action_space.n: 
															 | 
															
															 | 
															
																         if action >= self.action_space.n: 
															 | 
														
													
												
													
														
															| 
															 | 
															
																             action = 0 
															 | 
															
															 | 
															
																             action = 0 
															 | 
														
													
												
													
														
															| 
															 | 
															
																  
															 | 
															
															 | 
															
																  
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+        # Check if there is an obstacle in front of the agent 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+        front_cell = self.grid.get(*self.front_pos) 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+        not_clear = front_cell and front_cell.type != 'goal' 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+ 
															 | 
														
													
												
													
														
															| 
															 | 
															
																         obs, reward, done, info = MiniGridEnv.step(self, action) 
															 | 
															
															 | 
															
																         obs, reward, done, info = MiniGridEnv.step(self, action) 
															 | 
														
													
												
													
														
															| 
															 | 
															
																  
															 | 
															
															 | 
															
																  
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+        # If the agent tries to walk over an obstacle 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+        if action == self.actions.forward and not_clear: 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            reward = -1 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            done = True 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            return obs, reward, done, info 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+ 
															 | 
														
													
												
													
														
															| 
															 | 
															
																         # Update obstacle positions 
															 | 
															
															 | 
															
																         # Update obstacle positions 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-        if self.show_obstacles: 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-            for i_obst in range(len(self.obstacles)): 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-                old_pos = self.obstacles[i_obst].cur_pos 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-                top = tuple(map(add, old_pos, (-1, -1))) 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-                self.grid.set(*old_pos, None) 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+        for i_obst in range(len(self.obstacles)): 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            old_pos = self.obstacles[i_obst].cur_pos 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            top = tuple(map(add, old_pos, (-1, -1))) 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+ 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            try: 
															 | 
														
													
												
													
														
															| 
															 | 
															
																                 self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100) 
															 | 
															
															 | 
															
																                 self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100) 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-                if np.array_equal(self.obstacles[i_obst].cur_pos, self.agent_pos): 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-                    reward = -1 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																-                    done = True 
															 | 
															
															 | 
															
																 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+                self.grid.set(*old_pos, None) 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+            except: 
															 | 
														
													
												
													
														
															| 
															 | 
															
																 
															 | 
															
															 | 
															
																+                pass 
															 | 
														
													
												
													
														
															| 
															 | 
															
																  
															 | 
															
															 | 
															
																  
															 | 
														
													
												
													
														
															| 
															 | 
															
																         return obs, reward, done, info 
															 | 
															
															 | 
															
																         return obs, reward, done, info 
															 | 
														
													
												
													
														
															| 
															 | 
															
																  
															 | 
															
															 | 
															
																  
															 |