7 роки тому · 80b3178610
--- a/README.md
+++ b/README.md
@@ -28,10 +28,10 @@ pip3 install -e .
 
				 ```
			
 
				 
			
 
				 Optionally, if you wish use the reinforcement learning code included
			
 
				-under [/basicrl](/basicrl), you can install its dependencies as follows:
			
 
				+under [/pytorch-rl](/pytorch-rl), you can install its dependencies as follows:
			
 
				 
			
 
				 ```
			
 
				-cd basicrl
			
 
				+cd pytorch-rl
			
 
				 
			
 
				 # PyTorch
			
 
				 conda install pytorch torchvision -c soumith
			
@@ -49,7 +49,7 @@ cd ..
 
				 pip3 install -r requirements.txt
			
 
				 ```
			
 
				 
			
 
				-Note: the basicrl code is a custom fork of [this repository](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr),
			
 
				+Note: the pytorch-rl code is a custom fork of [this repository](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr),
			
 
				 which was modified to work with this environment.
			
 
				 
			
 
				 ## Basic Usage
			
@@ -66,17 +66,17 @@ The environment being run can be selected with the `--env-name` option, eg:
 
				 ./standalone.py --env-name MiniGrid-Empty-8x8-v0
			
 
				 ```
			
 
				 
			
 
				-Basic reinforcement learning code is provided in the `basicrl` subdirectory.
			
 
				+Basic reinforcement learning code is provided in the `pytorch-rl` subdirectory.
			
 
				 You can perform training using the ACKTR algorithm with:
			
 
				 
			
 
				 ```
			
 
				-python3 basicrl/main.py --env-name MiniGrid-Empty-6x6-v0 --no-vis --num-processes 32 --algo acktr
			
 
				+python3 pytorch-rl/main.py --env-name MiniGrid-Empty-6x6-v0 --no-vis --num-processes 32 --algo acktr
			
 
				 ```
			
 
				 
			
 
				 You can view the result of training using the `enjoy.py` script:
			
 
				 
			
 
				 ```
			
 
				-python3 basicrl/enjoy.py --env-name MiniGrid-Empty-6x6-v0 --load-dir ./trained_models/acktr
			
 
				+python3 pytorch-rl/enjoy.py --env-name MiniGrid-Empty-6x6-v0 --load-dir ./trained_models/acktr
			
 
				 ```
			
 
				 
			
 
				 ## Design
			
--- a/pytorch-rl/LICENSE
+++ b/pytorch-rl/LICENSE
--- a/pytorch-rl/README.md
+++ b/pytorch-rl/README.md
--- a/pytorch-rl/arguments.py
+++ b/pytorch-rl/arguments.py
--- a/basicrl/distributions.py
+++ b/basicrl/distributions.py
@@ -19,7 +19,7 @@ class Categorical(nn.Module):
 
				     def sample(self, x, deterministic):
			
 
				         x = self(x)
			
 
				 
			
 
				-        probs = F.softmax(x)
			
 
				+        probs = F.softmax(x, dim=1)
			
 
				         if deterministic is False:
			
 
				             action = probs.multinomial()
			
 
				         else:
			
@@ -29,8 +29,8 @@ class Categorical(nn.Module):
 
				     def logprobs_and_entropy(self, x, actions):
			
 
				         x = self(x)
			
 
				 
			
 
				-        log_probs = F.log_softmax(x)
			
 
				-        probs = F.softmax(x)
			
 
				+        log_probs = F.log_softmax(x, dim=1)
			
 
				+        probs = F.softmax(x, dim=1)
			
 
				 
			
 
				         action_log_probs = log_probs.gather(1, actions)
			
 
				 
			
--- a/pytorch-rl/enjoy.py
+++ b/pytorch-rl/enjoy.py
--- a/pytorch-rl/envs.py
+++ b/pytorch-rl/envs.py
--- a/pytorch-rl/imgs/a2c_beamrider.png
+++ b/pytorch-rl/imgs/a2c_beamrider.png
--- a/pytorch-rl/imgs/a2c_breakout.png
+++ b/pytorch-rl/imgs/a2c_breakout.png
--- a/pytorch-rl/imgs/a2c_qbert.png
+++ b/pytorch-rl/imgs/a2c_qbert.png
--- a/pytorch-rl/imgs/a2c_seaquest.png
+++ b/pytorch-rl/imgs/a2c_seaquest.png
--- a/pytorch-rl/imgs/acktr_beamrider.png
+++ b/pytorch-rl/imgs/acktr_beamrider.png
--- a/pytorch-rl/imgs/acktr_breakout.png
+++ b/pytorch-rl/imgs/acktr_breakout.png
--- a/pytorch-rl/imgs/acktr_qbert.png
+++ b/pytorch-rl/imgs/acktr_qbert.png
--- a/pytorch-rl/imgs/acktr_seaquest.png
+++ b/pytorch-rl/imgs/acktr_seaquest.png
--- a/pytorch-rl/imgs/ppo_halfcheetah.png
+++ b/pytorch-rl/imgs/ppo_halfcheetah.png
--- a/pytorch-rl/imgs/ppo_hopper.png
+++ b/pytorch-rl/imgs/ppo_hopper.png
--- a/pytorch-rl/imgs/ppo_reacher.png
+++ b/pytorch-rl/imgs/ppo_reacher.png
--- a/pytorch-rl/imgs/ppo_walker.png
+++ b/pytorch-rl/imgs/ppo_walker.png
--- a/pytorch-rl/kfac.py
+++ b/pytorch-rl/kfac.py
--- a/basicrl/main.py
+++ b/basicrl/main.py
@@ -64,8 +64,10 @@ def main():
 
				     else:
			
 
				         envs = DummyVecEnv(envs)
			
 
				 
			
 
				-    if len(envs.observation_space.shape) == 1:
			
 
				-        envs = VecNormalize(envs)
			
 
				+    # Maxime: commented this out because it very much changes the behavior
			
 
				+    # of the code for seemingly arbitrary reasons
			
 
				+    #if len(envs.observation_space.shape) == 1:
			
 
				+    #    envs = VecNormalize(envs)
			
 
				 
			
 
				     obs_shape = envs.observation_space.shape
			
 
				     obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])
			
@@ -79,6 +81,14 @@ def main():
 
				             "Recurrent policy is not implemented for the MLP controller"
			
 
				         actor_critic = MLPPolicy(obs_numel, envs.action_space)
			
 
				 
			
 
				+    # Maxime: log some info about the model and its size
			
 
				+    modelSize = 0
			
 
				+    for p in actor_critic.parameters():
			
 
				+        pSize = reduce(operator.mul, p.size(), 1)
			
 
				+        modelSize += pSize
			
 
				+    print(str(actor_critic))
			
 
				+    print('Total model size: %d' % modelSize)
			
 
				+
			
 
				     if envs.action_space.__class__.__name__ == "Discrete":
			
 
				         action_shape = 1
			
 
				     else:
			
--- a/pytorch-rl/model.py
+++ b/pytorch-rl/model.py
--- a/pytorch-rl/requirements.txt
+++ b/pytorch-rl/requirements.txt
--- a/pytorch-rl/storage.py
+++ b/pytorch-rl/storage.py
--- a/pytorch-rl/utils.py
+++ b/pytorch-rl/utils.py
--- a/pytorch-rl/visualize.py
+++ b/pytorch-rl/visualize.py