浏览代码

Fixed basicrl code

Maxime Chevalier-Boisvert 7 年之前
父节点
当前提交
8fc72cda9f
共有 5 个文件被更改,包括 11 次插入12 次删除
  1. 2 0
      .gitignore
  2. 7 1
      README.md
  3. 1 1
      basicrl/enjoy.py
  4. 0 9
      basicrl/envs.py
  5. 1 1
      gym_minigrid/envs/simple_envs.py

+ 2 - 0
.gitignore

@@ -1,2 +1,4 @@
 *.pyc
 *__pycache__
+*egg-info
+trained_models

+ 7 - 1
README.md

@@ -24,7 +24,7 @@ pip3 install -e .
 
 ## Usage
 
-To run the standalone UI application:
+To run the standalone UI application, which allows you to manually control the agent with the arrow keys:
 
 ```
 ./standalone.py
@@ -44,3 +44,9 @@ You can perform training using the ACKTR algorithm with:
 ```
 python3 basicrl/main.py --env-name MiniGrid-Empty-8x8-v0 --no-vis --num-processes 32 --algo acktr
 ```
+
+You can view the result of training using the `enjoy.py` script:
+
+```
+python3 basicrl/enjoy.py --env-name MiniGrid-Empty-8x8-v0 --load-dir ./trained_models/acktr
+```

+ 1 - 1
basicrl/enjoy.py

@@ -27,7 +27,7 @@ parser.add_argument('--load-dir', default='./trained_models/',
 args = parser.parse_args()
 
 
-env = make_env(args.env_name, args.seed, 0, None, 5, 7)
+env = make_env(args.env_name, args.seed, 0, None)
 env = DummyVecEnv([env])
 
 actor_critic, ob_rms = \

+ 0 - 9
basicrl/envs.py

@@ -17,13 +17,6 @@ try:
 except:
     pass
 
-class ScaleActions(gym.ActionWrapper):
-    def __init__(self, env=None):
-        super(ScaleActions, self).__init__(env)
-
-    def _step(self, action):
-        action = (numpy.tanh(action) + 1) / 2 * (self.action_space.high - self.action_space.low) + self.action_space.low
-        return self.env.step(action)
 
 def make_env(env_id, seed, rank, log_dir):
     def _thunk():
@@ -41,8 +34,6 @@ def make_env(env_id, seed, rank, log_dir):
         if len(obs_shape) == 3 and obs_shape[2] == 3:
             env = WrapPyTorch(env)
 
-        #env = ScaleActions(env)
-
         return env
 
     return _thunk

+ 1 - 1
gym_minigrid/envs/simple_envs.py

@@ -15,7 +15,7 @@ class EmptyEnv6x6(EmptyEnv):
 
 register(
     id='MiniGrid-Empty-8x8-v0',
-    entry_point='gym_MiniGrid.envs:EmptyEnv',
+    entry_point='gym_minigrid.envs:EmptyEnv',
     reward_threshold=1000.0
 )