test_envs.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import gym
  2. import numpy as np
  3. import pytest
  4. from gym.envs.registration import EnvSpec
  5. from gym.utils.env_checker import check_env
  6. from gym_minigrid.minigrid import Grid
  7. from tests.utils import all_testing_env_specs, assert_equals
  8. CHECK_ENV_IGNORE_WARNINGS = [
  9. f"\x1b[33mWARN: {message}\x1b[0m"
  10. for message in [
  11. "A Box observation space minimum value is -infinity. This is probably too low.",
  12. "A Box observation space maximum value is -infinity. This is probably too high.",
  13. "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.",
  14. "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.",
  15. "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.",
  16. "Core environment is written in old step API which returns one bool instead of two. It is recommended to rewrite the environment with new step API. ",
  17. ]
  18. ]
  19. @pytest.mark.parametrize(
  20. "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
  21. )
  22. def test_env(spec):
  23. # Capture warnings
  24. env = spec.make(disable_env_checker=True).unwrapped
  25. # Test if env adheres to Gym API
  26. with pytest.warns() as warnings:
  27. check_env(env)
  28. for warning in warnings.list:
  29. if warning.message.args[0] not in CHECK_ENV_IGNORE_WARNINGS:
  30. raise gym.error.Error(f"Unexpected warning: {warning.message}")
  31. # Note that this precludes running this test in multiple threads.
  32. # However, we probably already can't do multithreading due to some environments.
  33. SEED = 0
  34. NUM_STEPS = 50
  35. @pytest.mark.parametrize(
  36. "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs]
  37. )
  38. def test_env_determinism_rollout(env_spec: EnvSpec):
  39. """Run a rollout with two environments and assert equality.
  40. This test run a rollout of NUM_STEPS steps with two environments
  41. initialized with the same seed and assert that:
  42. - observation after first reset are the same
  43. - same actions are sampled by the two envs
  44. - observations are contained in the observation space
  45. - obs, rew, done and info are equals between the two envs
  46. """
  47. # Don't check rollout equality if it's a nondeterministic environment.
  48. if env_spec.nondeterministic is True:
  49. return
  50. env_1 = env_spec.make(disable_env_checker=True)
  51. env_2 = env_spec.make(disable_env_checker=True)
  52. initial_obs_1 = env_1.reset(seed=SEED)
  53. initial_obs_2 = env_2.reset(seed=SEED)
  54. assert_equals(initial_obs_1, initial_obs_2)
  55. env_1.action_space.seed(SEED)
  56. for time_step in range(NUM_STEPS):
  57. # We don't evaluate the determinism of actions
  58. action = env_1.action_space.sample()
  59. obs_1, rew_1, done_1, info_1 = env_1.step(action)
  60. obs_2, rew_2, done_2, info_2 = env_2.step(action)
  61. assert_equals(obs_1, obs_2, f"[{time_step}] ")
  62. assert env_1.observation_space.contains(
  63. obs_1
  64. ) # obs_2 verified by previous assertion
  65. assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
  66. assert done_1 == done_2, f"[{time_step}] done 1={done_1}, done 2={done_2}"
  67. assert_equals(info_1, info_2, f"[{time_step}] ")
  68. if done_1: # done_2 verified by previous assertion
  69. env_1.reset(seed=SEED)
  70. env_2.reset(seed=SEED)
  71. env_1.close()
  72. env_2.close()
  73. @pytest.mark.parametrize(
  74. "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
  75. )
  76. def test_render_modes(spec):
  77. env = spec.make()
  78. for mode in env.metadata.get("render_modes", []):
  79. if mode != "human":
  80. new_env = spec.make(render_mode=mode)
  81. new_env.reset()
  82. new_env.step(new_env.action_space.sample())
  83. new_env.render()
  84. @pytest.mark.parametrize("env_id", ["MiniGrid-DoorKey-6x6-v0"])
  85. def test_agent_sees_method(env_id):
  86. env = gym.make(env_id)
  87. goal_pos = (env.grid.width - 2, env.grid.height - 2)
  88. # Test the "in" operator on grid objects
  89. assert ("green", "goal") in env.grid
  90. assert ("blue", "key") not in env.grid
  91. # Test the env.agent_sees() function
  92. env.reset()
  93. for i in range(0, 500):
  94. action = env.action_space.sample()
  95. obs, reward, done, info = env.step(action)
  96. grid, _ = Grid.decode(obs["image"])
  97. goal_visible = ("green", "goal") in grid
  98. agent_sees_goal = env.agent_sees(*goal_pos)
  99. assert agent_sees_goal == goal_visible
  100. if done:
  101. env.reset()
  102. env.close()
  103. @pytest.mark.parametrize(
  104. "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
  105. )
  106. def old_run_test(env_spec):
  107. # Load the gym environment
  108. env = env_spec.make()
  109. env.max_steps = min(env.max_steps, 200)
  110. env.reset()
  111. env.render()
  112. # Verify that the same seed always produces the same environment
  113. for i in range(0, 5):
  114. seed = 1337 + i
  115. _ = env.reset(seed=seed)
  116. grid1 = env.grid
  117. _ = env.reset(seed=seed)
  118. grid2 = env.grid
  119. assert grid1 == grid2
  120. env.reset()
  121. # Run for a few episodes
  122. num_episodes = 0
  123. while num_episodes < 5:
  124. # Pick a random action
  125. action = env.action_space.sample()
  126. obs, reward, done, info = env.step(action)
  127. # Validate the agent position
  128. assert env.agent_pos[0] < env.width
  129. assert env.agent_pos[1] < env.height
  130. # Test observation encode/decode roundtrip
  131. img = obs["image"]
  132. grid, vis_mask = Grid.decode(img)
  133. img2 = grid.encode(vis_mask=vis_mask)
  134. assert np.array_equal(img, img2)
  135. # Test the env to string function
  136. str(env)
  137. # Check that the reward is within the specified range
  138. assert reward >= env.reward_range[0], reward
  139. assert reward <= env.reward_range[1], reward
  140. if done:
  141. num_episodes += 1
  142. env.reset()
  143. env.render()
  144. # Test the close method
  145. env.close()
  146. @pytest.mark.parametrize("env_id", ["MiniGrid-Empty-8x8-v0"])
  147. def test_interactive_mode(env_id):
  148. env = gym.make(env_id, render_mode="human")
  149. env.reset()
  150. for i in range(0, 100):
  151. print(f"step {i}")
  152. # Pick a random action
  153. action = env.action_space.sample()
  154. obs, reward, done, info = env.step(action)
  155. # Test the close method
  156. env.close()