| 
					
				 | 
			
			
				@@ -12,12 +12,10 @@ class DynamicObstaclesEnv(MiniGridEnv): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             size=8, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             agent_start_pos=(1, 1), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             agent_start_dir=0, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            n_obstacles=4, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            show_obstacles=True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            n_obstacles=4 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     ): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.agent_start_pos = agent_start_pos 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.agent_start_dir = agent_start_dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        self.show_obstacles = show_obstacles 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # Reduce obstacles if there are too many 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if n_obstacles <= size/2 + 1: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -52,11 +50,11 @@ class DynamicObstaclesEnv(MiniGridEnv): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             self.place_agent() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # Place obstacles 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if self.show_obstacles: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            self.obstacles = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for i_obst in range(self.n_obstacles): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                self.obstacles.append(Ball()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                self.place_obj(self.obstacles[i_obst], max_tries=100) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.obstacles = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for i_obst in range(self.n_obstacles): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.obstacles.append(Ball()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.place_obj(self.obstacles[i_obst], max_tries=100) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.mission = "get to the green goal square" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def step(self, action): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -64,18 +62,28 @@ class DynamicObstaclesEnv(MiniGridEnv): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if action >= self.action_space.n: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             action = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # Check if there is an obstacle in front of the agent 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        front_cell = self.grid.get(*self.front_pos) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        not_clear = front_cell and front_cell.type != 'goal' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         obs, reward, done, info = MiniGridEnv.step(self, action) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # If the agent tries to walk over an obstacle 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if action == self.actions.forward and not_clear: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            reward = -1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            done = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return obs, reward, done, info 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # Update obstacle positions 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if self.show_obstacles: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for i_obst in range(len(self.obstacles)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                old_pos = self.obstacles[i_obst].cur_pos 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                top = tuple(map(add, old_pos, (-1, -1))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                self.grid.set(*old_pos, None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for i_obst in range(len(self.obstacles)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            old_pos = self.obstacles[i_obst].cur_pos 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            top = tuple(map(add, old_pos, (-1, -1))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if np.array_equal(self.obstacles[i_obst].cur_pos, self.agent_pos): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    reward = -1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    done = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.grid.set(*old_pos, None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                pass 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return obs, reward, done, info 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 |