|
@@ -6,8 +6,77 @@ from gym_minigrid.minigrid import Goal, Grid, Lava, MiniGridEnv, MissionSpace
|
|
|
|
|
|
|
|
|
class CrossingEnv(MiniGridEnv):
|
|
|
+
|
|
|
"""
|
|
|
- Environment with wall or lava obstacles, sparse reward.
|
|
|
+ ### Description
|
|
|
+
|
|
|
+ Depending on the `obstacle_type` parameter:
|
|
|
+ - `Lava` - The agent has to reach the green goal square on the other corner
|
|
|
+ of the room while avoiding rivers of deadly lava which terminate the
|
|
|
+ episode in failure. Each lava stream runs across the room either
|
|
|
+ horizontally or vertically, and has a single crossing point which can be
|
|
|
+ safely used; Luckily, a path to the goal is guaranteed to exist. This
|
|
|
+ environment is useful for studying safety and safe exploration.
|
|
|
+ - otherwise - Similar to the `LavaCrossing` environment, the agent has to
|
|
|
+ reach the green goal square on the other corner of the room, however
|
|
|
+ lava is replaced by walls. This MDP is therefore much easier and maybe
|
|
|
+ useful for quickly testing your algorithms.
|
|
|
+
|
|
|
+ ### Mission Space
|
|
|
+ Depending on the `obstacle_type` parameter:
|
|
|
+ - `Lava` - "avoid the lava and get to the green goal square"
|
|
|
+ - otherwise - "find the opening and get to the green goal square"
|
|
|
+
|
|
|
+ ### Action Space
|
|
|
+
|
|
|
+ | Num | Name | Action |
|
|
|
+ |-----|--------------|--------------|
|
|
|
+ | 0 | left | Turn left |
|
|
|
+ | 1 | right | Turn right |
|
|
|
+ | 2 | forward | Move forward |
|
|
|
+ | 3 | pickup | Unused |
|
|
|
+ | 4 | drop | Unused |
|
|
|
+ | 5 | toggle | Unused |
|
|
|
+ | 6 | done | Unused |
|
|
|
+
|
|
|
+ ### Observation Encoding
|
|
|
+
|
|
|
+ - Each tile is encoded as a 3 dimensional tuple:
|
|
|
+ `(OBJECT_IDX, COLOR_IDX, STATE)`
|
|
|
+ - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
|
|
|
+ [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
|
|
|
+ - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
|
|
|
+
|
|
|
+ ### Rewards
|
|
|
+
|
|
|
+ A reward of '1' is given for success, and '0' for failure.
|
|
|
+
|
|
|
+ ### Termination
|
|
|
+
|
|
|
+ The episode ends if any one of the following conditions is met:
|
|
|
+
|
|
|
+ 1. The agent reaches the goal.
|
|
|
+ 2. The agent falls into lava.
|
|
|
+ 3. Timeout (see `max_steps`).
|
|
|
+
|
|
|
+ ### Registered Configurations
|
|
|
+
|
|
|
+ S: size of the map SxS.
|
|
|
+ N: number of valid crossings across lava or walls from the starting position
|
|
|
+ to the goal
|
|
|
+
|
|
|
+ - `Lava` :
|
|
|
+ - `MiniGrid-LavaCrossingS9N1-v0`
|
|
|
+ - `MiniGrid-LavaCrossingS9N2-v0`
|
|
|
+ - `MiniGrid-LavaCrossingS9N3-v0`
|
|
|
+ - `MiniGrid-LavaCrossingS11N5-v0`
|
|
|
+
|
|
|
+ - otherwise :
|
|
|
+ - `MiniGrid-SimpleCrossingS9N1-v0`
|
|
|
+ - `MiniGrid-SimpleCrossingS9N2-v0`
|
|
|
+ - `MiniGrid-SimpleCrossingS9N3-v0`
|
|
|
+ - `MiniGrid-SimpleCrossingS11N5-v0`
|
|
|
+
|
|
|
"""
|
|
|
|
|
|
def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, **kwargs):
|