|
@@ -56,7 +56,13 @@ def main():
|
|
|
viz = Visdom()
|
|
|
win = None
|
|
|
|
|
|
- envs = [make_env(args.env_name, args.seed, i, args.log_dir)
|
|
|
+
|
|
|
+ paramSteps = [5,6,7,8,9,10,11,12,13,14,15,16]
|
|
|
+
|
|
|
+ roomSize = paramSteps[0]
|
|
|
+ paramSteps = paramSteps[1:]
|
|
|
+
|
|
|
+ envs = [make_env(args.env_name, args.seed, i, args.log_dir, roomSize)
|
|
|
for i in range(args.num_processes)]
|
|
|
|
|
|
if args.num_processes > 1:
|
|
@@ -250,6 +256,9 @@ def main():
|
|
|
if j % args.log_interval == 0:
|
|
|
end = time.time()
|
|
|
total_num_steps = (j + 1) * args.num_processes * args.num_steps
|
|
|
+
|
|
|
+ print('roomSize=%s' % roomSize)
|
|
|
+
|
|
|
print("Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}".
|
|
|
format(j, total_num_steps,
|
|
|
int(total_num_steps / (end - start)),
|
|
@@ -258,6 +267,21 @@ def main():
|
|
|
final_rewards.min(),
|
|
|
final_rewards.max(), dist_entropy.data[0],
|
|
|
value_loss.data[0], action_loss.data[0]))
|
|
|
+
|
|
|
+ #print(final_rewards.min())
|
|
|
+ if final_rewards.min() > 950 and len(paramSteps) > 0:
|
|
|
+ roomSize = paramSteps[0]
|
|
|
+ paramSteps = paramSteps[1:]
|
|
|
+
|
|
|
+ envs.close()
|
|
|
+ envs = [make_env(args.env_name, args.seed, i, args.log_dir, roomSize) for i in range(args.num_processes)]
|
|
|
+ envs = SubprocVecEnv(envs)
|
|
|
+ obs = envs.reset()
|
|
|
+ update_current_obs(obs)
|
|
|
+
|
|
|
+ # Reset the rewards
|
|
|
+ final_rewards = torch.zeros([args.num_processes, 1])
|
|
|
+
|
|
|
if args.vis and j % args.vis_interval == 0:
|
|
|
try:
|
|
|
# Sometimes monitor doesn't properly flush the outputs
|