| 
					
				 | 
			
			
				@@ -56,7 +56,13 @@ def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         viz = Visdom() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         win = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    envs = [make_env(args.env_name, args.seed, i, args.log_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    paramSteps = [5,6,7,8,9,10,11,12,13,14,15,16] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    roomSize = paramSteps[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    paramSteps = paramSteps[1:] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    envs = [make_env(args.env_name, args.seed, i, args.log_dir, roomSize) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 for i in range(args.num_processes)] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if args.num_processes > 1: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -250,6 +256,9 @@ def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if j % args.log_interval == 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             end = time.time() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             total_num_steps = (j + 1) * args.num_processes * args.num_steps 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('roomSize=%s' % roomSize) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             print("Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}". 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 format(j, total_num_steps, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                        int(total_num_steps / (end - start)), 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -258,6 +267,21 @@ def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                        final_rewards.min(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                        final_rewards.max(), dist_entropy.data[0], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                        value_loss.data[0], action_loss.data[0])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            #print(final_rewards.min()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if final_rewards.min() > 950 and len(paramSteps) > 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                roomSize = paramSteps[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                paramSteps = paramSteps[1:] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                envs.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                envs = [make_env(args.env_name, args.seed, i, args.log_dir, roomSize) for i in range(args.num_processes)] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                envs = SubprocVecEnv(envs) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                obs = envs.reset() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                update_current_obs(obs) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # Reset the rewards 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                final_rewards = torch.zeros([args.num_processes, 1]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if args.vis and j % args.vis_interval == 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 # Sometimes monitor doesn't properly flush the outputs 
			 |