Browse Source

Add BabyAI levels (#253)

Co-authored-by: Mark Towers <mark.m.towers@gmail.com>
saleml 2 years ago
parent
commit
b2e382a145

+ 564 - 0
minigrid/__init__.py

@@ -505,3 +505,567 @@ def register_minigrid_envs():
         id="MiniGrid-UnlockPickup-v0",
         id="MiniGrid-UnlockPickup-v0",
         entry_point="minigrid.envs:UnlockPickupEnv",
         entry_point="minigrid.envs:UnlockPickupEnv",
     )
     )
+
+    # BabyAI - Language based levels - GoTo
+    # ----------------------------------------
+
+    register(
+        id="BabyAI-GoToRedBallGrey-v0",
+        entry_point="minigrid.envs.babyai:GoToRedBallGrey",
+    )
+
+    register(
+        id="BabyAI-GoToRedBall-v0",
+        entry_point="minigrid.envs.babyai:GoToRedBall",
+    )
+
+    register(
+        id="BabyAI-GoToRedBallNoDists-v0",
+        entry_point="minigrid.envs.babyai:GoToRedBallNoDists",
+    )
+
+    register(
+        id="BabyAI-GoToObj-v0",
+        entry_point="minigrid.envs.babyai:GoToObj",
+    )
+
+    register(
+        id="BabyAI-GoToObjS4-v0",
+        entry_point="minigrid.envs.babyai:GoToObj",
+        kwargs={"room_size": 4},
+    )
+
+    register(
+        id="BabyAI-GoToObjS6-v0",
+        entry_point="minigrid.envs.babyai:GoToObj",
+        kwargs={"room_size": 4},
+    )
+
+    register(
+        id="BabyAI-GoToLocal-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+    )
+
+    register(
+        id="BabyAI-GoToLocalS5N2-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 5, "num_dists": 2},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS6N2-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 6, "num_dists": 2},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS6N3-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 6, "num_dists": 3},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS6N4-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 6, "num_dists": 4},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS7N4-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 7, "num_dists": 4},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS7N5-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 7, "num_dists": 5},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS8N2-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 8, "num_dists": 2},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS8N3-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 8, "num_dists": 3},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS8N4-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 8, "num_dists": 4},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS8N5-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 8, "num_dists": 5},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS8N6-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 8, "num_dists": 6},
+    )
+
+    register(
+        id="BabyAI-GoToLocalS8N7-v0",
+        entry_point="minigrid.envs.babyai:GoToLocal",
+        kwargs={"room_size": 8, "num_dists": 7},
+    )
+
+    register(
+        id="BabyAI-GoTo-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+    )
+
+    register(
+        "BabyAI-GoToOpen-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+        kwargs={"doors_open": True},
+    )
+
+    register(
+        id="BabyAI-GoToObjMaze-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+        kwargs={"num_dists": 1, "doors_open": False},
+    )
+
+    register(
+        id="BabyAI-GoToObjMazeOpen-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+        kwargs={"num_dists": 1, "doors_open": True},
+    )
+
+    register(
+        id="BabyAI-GoToObjMazeS4R2-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+        kwargs={"num_dists": 1, "room_size": 4, "num_rows": 2, "num_cols": 2},
+    )
+
+    register(
+        id="BabyAI-GoToObjMazeS4-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+        kwargs={"num_dists": 1, "room_size": 4},
+    )
+
+    register(
+        id="BabyAI-GoToObjMazeS5-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+        kwargs={"num_dists": 1, "room_size": 5},
+    )
+
+    register(
+        id="BabyAI-GoToObjMazeS6-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+        kwargs={"num_dists": 1, "room_size": 6},
+    )
+
+    register(
+        id="BabyAI-GoToObjMazeS7-v0",
+        entry_point="minigrid.envs.babyai:GoTo",
+        kwargs={"num_dists": 1, "room_size": 7},
+    )
+
+    register(
+        id="BabyAI-GoToImpUnlock-v0",
+        entry_point="minigrid.envs.babyai:GoToImpUnlock",
+    )
+
+    register(
+        id="BabyAI-GoToSeq-v0",
+        entry_point="minigrid.envs.babyai:GoToSeq",
+    )
+
+    register(
+        id="BabyAI-GoToSeqS5R2-v0",
+        entry_point="minigrid.envs.babyai:GoToSeq",
+        kwargs={"room_size": 5, "num_rows": 2, "num_cols": 2, "num_dists": 4},
+    )
+
+    register(
+        id="BabyAI-GoToRedBlueBall-v0",
+        entry_point="minigrid.envs.babyai:GoToRedBlueBall",
+    )
+
+    register(
+        id="BabyAI-GoToDoor-v0",
+        entry_point="minigrid.envs.babyai:GoToDoor",
+    )
+
+    register(
+        id="BabyAI-GoToObjDoor-v0",
+        entry_point="minigrid.envs.babyai:GoToObjDoor",
+    )
+
+    # BabyAI - Language based levels - Open
+    # ----------------------------------------
+
+    register(
+        id="BabyAI-Open-v0",
+        entry_point="minigrid.envs.babyai:Open",
+    )
+
+    register(
+        id="BabyAI-OpenRedDoor-v0",
+        entry_point="minigrid.envs.babyai:OpenRedDoor",
+    )
+
+    register(
+        id="BabyAI-OpenDoor-v0",
+        entry_point="minigrid.envs.babyai:OpenDoor",
+    )
+
+    register(
+        id="BabyAI-OpenDoorDebug-v0",
+        entry_point="minigrid.envs.babyai:OpenDoor",
+        kwargs={"debug": True, "select_by": None},
+    )
+
+    register(
+        id="BabyAI-OpenDoorColor-v0",
+        entry_point="minigrid.envs.babyai:OpenDoor",
+        kwargs={"select_by": "color"},
+    )
+
+    register(
+        id="BabyAI-OpenDoorLoc-v0",
+        entry_point="minigrid.envs.babyai:OpenDoor",
+        kwargs={"select_by": "loc"},
+    )
+
+    register(
+        id="BabyAI-OpenTwoDoors-v0",
+        entry_point="minigrid.envs.babyai:OpenTwoDoors",
+    )
+
+    register(
+        id="BabyAI-OpenRedBlueDoors-v0",
+        entry_point="minigrid.envs.babyai:OpenTwoDoors",
+        kwargs={"first_color": "red", "second_color": "blue"},
+    )
+
+    register(
+        id="BabyAI-OpenRedBlueDoorsDebug-v0",
+        entry_point="minigrid.envs.babyai:OpenTwoDoors",
+        kwargs={
+            "first_color": "red",
+            "second_color": "blue",
+            "strict": True,
+        },
+    )
+
+    register(
+        id="BabyAI-OpenDoorsOrderN2-v0",
+        entry_point="minigrid.envs.babyai:OpenDoorsOrder",
+        kwargs={"num_doors": 2},
+    )
+
+    register(
+        id="BabyAI-OpenDoorsOrderN4-v0",
+        entry_point="minigrid.envs.babyai:OpenDoorsOrder",
+        kwargs={"num_doors": 4},
+    )
+
+    register(
+        id="BabyAI-OpenDoorsOrderN2Debug-v0",
+        entry_point="minigrid.envs.babyai:OpenDoorsOrder",
+        kwargs={"debug": True, "num_doors": 2},
+    )
+
+    register(
+        id="BabyAI-OpenDoorsOrderN4Debug-v0",
+        entry_point="minigrid.envs.babyai:OpenDoorsOrder",
+        kwargs={"debug": True, "num_doors": 4},
+    )
+
+    # BabyAI - Language based levels - Pickup
+    # ----------------------------------------
+
+    register(
+        id="BabyAI-Pickup-v0",
+        entry_point="minigrid.envs.babyai:Pickup",
+    )
+
+    register(
+        id="BabyAI-UnblockPickup-v0",
+        entry_point="minigrid.envs.babyai:UnblockPickup",
+    )
+
+    register(
+        id="BabyAI-PickupLoc-v0",
+        entry_point="minigrid.envs.babyai:PickupLoc",
+    )
+
+    register(
+        id="BabyAI-PickupDist-v0",
+        entry_point="minigrid.envs.babyai:PickupDist",
+    )
+
+    register(
+        id="BabyAI-PickupDistDebug-v0",
+        entry_point="minigrid.envs.babyai:PickupDist",
+        kwargs={"debug": True},
+    )
+
+    register(
+        id="BabyAI-PickupAbove-v0",
+        entry_point="minigrid.envs.babyai:PickupAbove",
+    )
+
+    # BabyAI - Language based levels - PutNext
+    # ----------------------------------------
+
+    register(
+        id="BabyAI-PutNextLocal-v0",
+        entry_point="minigrid.envs.babyai:PutNextLocal",
+    )
+
+    register(
+        id="BabyAI-PutNextLocalS5N3-v0",
+        entry_point="minigrid.envs.babyai:PutNextLocal",
+        kwargs={"room_size": 5, "num_objs": 3},
+    )
+
+    register(
+        id="BabyAI-PutNextLocalS6N4-v0",
+        entry_point="minigrid.envs.babyai:PutNextLocal",
+        kwargs={"room_size": 6, "num_objs": 4},
+    )
+
+    register(
+        id="BabyAI-PutNextS4N1-v0",
+        entry_point="minigrid.envs.babyai:PutNext",
+        kwargs={"room_size": 4, "objs_per_room": 1},
+    )
+
+    register(
+        id="BabyAI-PutNextS5N2-v0",
+        entry_point="minigrid.envs.babyai:PutNext",
+        kwargs={"room_size": 5, "objs_per_room": 2},
+    )
+
+    register(
+        id="BabyAI-PutNextS5N1-v0",
+        entry_point="minigrid.envs.babyai:PutNext",
+        kwargs={"room_size": 5, "objs_per_room": 1},
+    )
+
+    register(
+        id="BabyAI-PutNextS6N3-v0",
+        entry_point="minigrid.envs.babyai:PutNext",
+        kwargs={"room_size": 6, "objs_per_room": 3},
+    )
+
+    register(
+        id="BabyAI-PutNextS7N4-v0",
+        entry_point="minigrid.envs.babyai:PutNext",
+        kwargs={"room_size": 7, "objs_per_room": 4},
+    )
+
+    register(
+        id="BabyAI-PutNextS5N2Carrying-v0",
+        entry_point="minigrid.envs.babyai:PutNext",
+        kwargs={"room_size": 5, "objs_per_room": 2, "start_carrying": True},
+    )
+
+    register(
+        id="BabyAI-PutNextS6N3Carrying-v0",
+        entry_point="minigrid.envs.babyai:PutNext",
+        kwargs={"room_size": 6, "objs_per_room": 3, "start_carrying": True},
+    )
+
+    register(
+        id="BabyAI-PutNextS7N4Carrying-v0",
+        entry_point="minigrid.envs.babyai:PutNext",
+        kwargs={"room_size": 7, "objs_per_room": 4, "start_carrying": True},
+    )
+
+    # BabyAI - Language based levels - Unlock
+    # ----------------------------------------
+
+    register(
+        id="BabyAI-Unlock-v0",
+        entry_point="minigrid.envs.babyai:Unlock",
+    )
+
+    register(
+        id="BabyAI-UnlockLocal-v0",
+        entry_point="minigrid.envs.babyai:UnlockLocal",
+    )
+
+    register(
+        id="BabyAI-UnlockLocalDist-v0",
+        entry_point="minigrid.envs.babyai:UnlockLocal",
+        kwargs={"distractors": True},
+    )
+
+    register(
+        id="BabyAI-KeyInBox-v0",
+        entry_point="minigrid.envs.babyai:KeyInBox",
+    )
+
+    register(
+        id="BabyAI-UnlockPickup-v0",
+        entry_point="minigrid.envs.babyai:UnlockPickup",
+    )
+
+    register(
+        id="BabyAI-UnlockPickupDist-v0",
+        entry_point="minigrid.envs.babyai:UnlockPickup",
+        kwargs={"distractors": True},
+    )
+
+    register(
+        id="BabyAI-BlockedUnlockPickup-v0",
+        entry_point="minigrid.envs.babyai:BlockedUnlockPickup",
+    )
+
+    register(
+        id="BabyAI-UnlockToUnlock-v0",
+        entry_point="minigrid.envs.babyai:UnlockToUnlock",
+    )
+
+    # BabyAI - Language based levels - Other
+    # ----------------------------------------
+
+    register(
+        id="BabyAI-ActionObjDoor-v0",
+        entry_point="minigrid.envs.babyai:ActionObjDoor",
+    )
+
+    register(
+        id="BabyAI-FindObjS5-v0",
+        entry_point="minigrid.envs.babyai:FindObjS5",
+    )
+
+    register(
+        id="BabyAI-FindObjS6-v0",
+        entry_point="minigrid.envs.babyai:FindObjS5",
+        kwargs={"room_size": 6},
+    )
+
+    register(
+        id="BabyAI-FindObjS7-v0",
+        entry_point="minigrid.envs.babyai:FindObjS5",
+        kwargs={"room_size": 7},
+    )
+
+    register(
+        id="BabyAI-KeyCorridor-v0",
+        entry_point="minigrid.envs.babyai:KeyCorridor",
+    )
+
+    register(
+        id="BabyAI-KeyCorridorS3R1-v0",
+        entry_point="minigrid.envs.babyai:KeyCorridor",
+        kwargs={"room_size": 3, "num_rows": 1},
+    )
+
+    register(
+        id="BabyAI-KeyCorridorS3R2-v0",
+        entry_point="minigrid.envs.babyai:KeyCorridor",
+        kwargs={"room_size": 3, "num_rows": 2},
+    )
+
+    register(
+        id="BabyAI-KeyCorridorS3R3-v0",
+        entry_point="minigrid.envs.babyai:KeyCorridor",
+        kwargs={"room_size": 3, "num_rows": 3},
+    )
+
+    register(
+        id="BabyAI-KeyCorridorS4R3-v0",
+        entry_point="minigrid.envs.babyai:KeyCorridor",
+        kwargs={"room_size": 4, "num_rows": 3},
+    )
+
+    register(
+        id="BabyAI-KeyCorridorS5R3-v0",
+        entry_point="minigrid.envs.babyai:KeyCorridor",
+        kwargs={"room_size": 5, "num_rows": 3},
+    )
+
+    register(
+        id="BabyAI-KeyCorridorS6R3-v0",
+        entry_point="minigrid.envs.babyai:KeyCorridor",
+        kwargs={"room_size": 6, "num_rows": 3},
+    )
+
+    register(
+        id="BabyAI-OneRoomS8-v0",
+        entry_point="minigrid.envs.babyai:OneRoomS8",
+    )
+
+    register(
+        id="BabyAI-OneRoomS12-v0",
+        entry_point="minigrid.envs.babyai:OneRoomS8",
+        kwargs={"room_size": 12},
+    )
+
+    register(
+        id="BabyAI-OneRoomS16-v0",
+        entry_point="minigrid.envs.babyai:OneRoomS8",
+        kwargs={"room_size": 16},
+    )
+
+    register(
+        id="BabyAI-OneRoomS20-v0",
+        entry_point="minigrid.envs.babyai:OneRoomS8",
+        kwargs={"room_size": 20},
+    )
+
+    register(
+        id="BabyAI-MoveTwoAcrossS5N2-v0",
+        entry_point="minigrid.envs.babyai:MoveTwoAcross",
+        kwargs={"room_size": 5, "objs_per_room": 2},
+    )
+
+    register(
+        id="BabyAI-MoveTwoAcrossS8N9-v0",
+        entry_point="minigrid.envs.babyai:MoveTwoAcross",
+        kwargs={"room_size": 8, "objs_per_room": 9},
+    )
+
+    # BabyAI - Language based levels - Synth
+    # ----------------------------------------
+
+    register(
+        id="BabyAI-Synth-v0",
+        entry_point="minigrid.envs.babyai:Synth",
+    )
+
+    register(
+        id="BabyAI-SynthS5R2-v0",
+        entry_point="minigrid.envs.babyai:Synth",
+        kwargs={"room_size": 5, "num_rows": 2},
+    )
+
+    register(
+        id="BabyAI-SynthLoc-v0",
+        entry_point="minigrid.envs.babyai:SynthLoc",
+    )
+
+    register(
+        id="BabyAI-SynthSeq-v0",
+        entry_point="minigrid.envs.babyai:SynthSeq",
+    )
+
+    register(
+        id="BabyAI-MiniBossLevel-v0",
+        entry_point="minigrid.envs.babyai:MiniBossLevel",
+    )
+
+    register(
+        id="BabyAI-BossLevel-v0",
+        entry_point="minigrid.envs.babyai:BossLevel",
+    )
+
+    register(
+        id="BabyAI-BossLevelNoUnlock-v0",
+        entry_point="minigrid.envs.babyai:BossLevelNoUnlock",
+    )

+ 51 - 0
minigrid/envs/babyai/__init__.py

@@ -0,0 +1,51 @@
+from minigrid.envs.babyai.goto import (
+    GoTo,
+    GoToDoor,
+    GoToImpUnlock,
+    GoToLocal,
+    GoToObj,
+    GoToObjDoor,
+    GoToRedBall,
+    GoToRedBallGrey,
+    GoToRedBallNoDists,
+    GoToRedBlueBall,
+    GoToSeq,
+)
+from minigrid.envs.babyai.open import (
+    Open,
+    OpenDoor,
+    OpenDoorsOrder,
+    OpenRedDoor,
+    OpenTwoDoors,
+)
+from minigrid.envs.babyai.other import (
+    ActionObjDoor,
+    FindObjS5,
+    KeyCorridor,
+    MoveTwoAcross,
+    OneRoomS8,
+)
+from minigrid.envs.babyai.pickup import (
+    Pickup,
+    PickupAbove,
+    PickupDist,
+    PickupLoc,
+    UnblockPickup,
+)
+from minigrid.envs.babyai.putnext import PutNext, PutNextLocal
+from minigrid.envs.babyai.synth import (
+    BossLevel,
+    BossLevelNoUnlock,
+    MiniBossLevel,
+    Synth,
+    SynthLoc,
+    SynthSeq,
+)
+from minigrid.envs.babyai.unlock import (
+    BlockedUnlockPickup,
+    KeyInBox,
+    Unlock,
+    UnlockLocal,
+    UnlockPickup,
+    UnlockToUnlock,
+)

+ 0 - 0
minigrid/envs/babyai/core/__init__.py


+ 208 - 0
minigrid/envs/babyai/core/levelgen.py

@@ -0,0 +1,208 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai
+"""
+from minigrid.core.constants import COLOR_NAMES
+from minigrid.core.roomgrid import Room
+from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
+from minigrid.envs.babyai.core.verifier import (
+    LOC_NAMES,
+    OBJ_TYPES,
+    OBJ_TYPES_NOT_DOOR,
+    AfterInstr,
+    AndInstr,
+    BeforeInstr,
+    GoToInstr,
+    ObjDesc,
+    OpenInstr,
+    PickupInstr,
+    PutNextInstr,
+)
+
+
+class LevelGen(RoomGridLevel):
+    """
+    Level generator which attempts to produce every possible sentence in
+    the baby language as an instruction.
+    """
+
+    def __init__(
+        self,
+        room_size=8,
+        num_rows=3,
+        num_cols=3,
+        num_dists=18,
+        locked_room_prob=0.5,
+        locations=True,
+        unblocking=True,
+        implicit_unlock=True,
+        action_kinds=["goto", "pickup", "open", "putnext"],
+        instr_kinds=["action", "and", "seq"],
+        **kwargs
+    ):
+        self.num_dists = num_dists
+        self.locked_room_prob = locked_room_prob
+        self.locations = locations
+        self.unblocking = unblocking
+        self.implicit_unlock = implicit_unlock
+        self.action_kinds = action_kinds
+        self.instr_kinds = instr_kinds
+
+        self.locked_room = None
+
+        super().__init__(
+            room_size=room_size, num_rows=num_rows, num_cols=num_cols, **kwargs
+        )
+
+    def gen_mission(self):
+        if self._rand_float(0, 1) < self.locked_room_prob:
+            self.add_locked_room()
+
+        self.connect_all()
+
+        self.add_distractors(num_distractors=self.num_dists, all_unique=False)
+
+        # The agent must be placed after all the object to respect constraints
+        while True:
+            self.place_agent()
+            start_room = self.room_from_pos(*self.agent_pos)
+            # Ensure that we are not placing the agent in the locked room
+            if start_room is self.locked_room:
+                continue
+            break
+
+        # If no unblocking required, make sure all objects are
+        # reachable without unblocking
+        if not self.unblocking:
+            self.check_objs_reachable()
+
+        # Generate random instructions
+        self.instrs = self.rand_instr(
+            action_kinds=self.action_kinds, instr_kinds=self.instr_kinds
+        )
+
+    def add_locked_room(self):
+        # Until we've successfully added a locked room
+        while True:
+            i = self._rand_int(0, self.num_cols)
+            j = self._rand_int(0, self.num_rows)
+            door_idx = self._rand_int(0, 4)
+            self.locked_room = self.get_room(i, j)
+
+            # Don't add a locked door in an external wall
+            if self.locked_room.neighbors[door_idx] is None:
+                continue
+
+            door, _ = self.add_door(i, j, door_idx, locked=True)
+
+            # Done adding locked room
+            break
+
+        # Until we find a room to put the key
+        while True:
+            i = self._rand_int(0, self.num_cols)
+            j = self._rand_int(0, self.num_rows)
+            key_room = self.get_room(i, j)
+
+            if key_room is self.locked_room:
+                continue
+
+            self.add_object(i, j, "key", door.color)
+            break
+
+    def rand_obj(self, types=OBJ_TYPES, colors=COLOR_NAMES, max_tries=100):
+        """
+        Generate a random object descriptor
+        """
+
+        num_tries = 0
+
+        # Keep trying until we find a matching object
+        while True:
+            if num_tries > max_tries:
+                raise RecursionError("failed to find suitable object")
+            num_tries += 1
+
+            color = self._rand_elem([None, *colors])
+            type = self._rand_elem(types)
+
+            loc = None
+            if self.locations and self._rand_bool():
+                loc = self._rand_elem(LOC_NAMES)
+
+            desc = ObjDesc(type, color, loc)
+
+            # Find all objects matching the descriptor
+            objs, poss = desc.find_matching_objs(self)
+
+            # The description must match at least one object
+            if len(objs) == 0:
+                continue
+
+            # If no implicit unlocking is required
+            if not self.implicit_unlock and isinstance(self.locked_room, Room):
+                locked_room = self.locked_room
+                # Check that at least one object is not in the locked room
+                pos_not_locked = list(
+                    filter(lambda p: not locked_room.pos_inside(*p), poss)
+                )
+
+                if len(pos_not_locked) == 0:
+                    continue
+
+            # Found a valid object description
+            return desc
+
+    def rand_instr(self, action_kinds, instr_kinds, depth=0):
+        """
+        Generate random instructions
+        """
+
+        kind = self._rand_elem(instr_kinds)
+
+        if kind == "action":
+            action = self._rand_elem(action_kinds)
+
+            if action == "goto":
+                return GoToInstr(self.rand_obj())
+            elif action == "pickup":
+                return PickupInstr(self.rand_obj(types=OBJ_TYPES_NOT_DOOR))
+            elif action == "open":
+                return OpenInstr(self.rand_obj(types=["door"]))
+            elif action == "putnext":
+                return PutNextInstr(
+                    self.rand_obj(types=OBJ_TYPES_NOT_DOOR), self.rand_obj()
+                )
+
+            assert False
+
+        elif kind == "and":
+            instr_a = self.rand_instr(
+                action_kinds=action_kinds, instr_kinds=["action"], depth=depth + 1
+            )
+            instr_b = self.rand_instr(
+                action_kinds=action_kinds, instr_kinds=["action"], depth=depth + 1
+            )
+            return AndInstr(instr_a, instr_b)
+
+        elif kind == "seq":
+            instr_a = self.rand_instr(
+                action_kinds=action_kinds,
+                instr_kinds=["action", "and"],
+                depth=depth + 1,
+            )
+            instr_b = self.rand_instr(
+                action_kinds=action_kinds,
+                instr_kinds=["action", "and"],
+                depth=depth + 1,
+            )
+
+            kind = self._rand_elem(["before", "after"])
+
+            if kind == "before":
+                return BeforeInstr(instr_a, instr_b)
+            elif kind == "after":
+                return AfterInstr(instr_a, instr_b)
+
+            assert False
+
+        assert False

+ 280 - 0
minigrid/envs/babyai/core/roomgrid_level.py

@@ -0,0 +1,280 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai
+"""
+from minigrid.core.roomgrid import RoomGrid
+from minigrid.envs.babyai.core.verifier import (
+    ActionInstr,
+    AfterInstr,
+    AndInstr,
+    BeforeInstr,
+    PutNextInstr,
+    SeqInstr,
+)
+from minigrid.minigrid_env import MissionSpace
+
+
+class RejectSampling(Exception):
+    """
+    Exception used for rejection sampling
+    """
+
+    pass
+
+
+class BabyAIMissionSpace(MissionSpace):
+    """
+    Class that mimics the behavior required by minigrid.minigrid_env.MissionSpace,
+    but does not change how missions are generated for BabyAI. It silences
+    the gymnasium.utils.passive_env_checker given that it considers all strings to be
+    plausible samples.
+    """
+
+    def __init__(self):
+        super().__init__(mission_func=lambda: "go")
+
+    def contains(self, x: str):
+        return True
+
+
+class RoomGridLevel(RoomGrid):
+    """
+    Base for levels based on RoomGrid.
+    A level, generates missions generated from
+    one or more patterns. Levels should produce a family of missions
+    of approximately similar difficulty.
+    """
+
+    def __init__(self, room_size=8, **kwargs):
+        mission_space = BabyAIMissionSpace()
+        super().__init__(room_size=room_size, mission_space=mission_space, **kwargs)
+
+    def reset(self, **kwargs):
+        obs = super().reset(**kwargs)
+
+        # Recreate the verifier
+        self.instrs.reset_verifier(self)
+
+        # Compute the time step limit based on the maze size and instructions
+        nav_time_room = self.room_size**2
+        nav_time_maze = nav_time_room * self.num_rows * self.num_cols
+        num_navs = self.num_navs_needed(self.instrs)
+        self.max_steps = num_navs * nav_time_maze
+
+        return obs
+
+    def step(self, action):
+        obs, reward, terminated, truncated, info = super().step(action)
+
+        # If we drop an object, we need to update its position in the environment
+        if action == self.actions.drop:
+            self.update_objs_poss()
+
+        # If we've successfully completed the mission
+        status = self.instrs.verify(action)
+
+        if status == "success":
+            terminated = True
+            reward = self._reward()
+        elif status == "failure":
+            terminated = True
+            reward = 0
+
+        return obs, reward, terminated, truncated, info
+
+    def update_objs_poss(self, instr=None):
+        if instr is None:
+            instr = self.instrs
+        if (
+            isinstance(instr, BeforeInstr)
+            or isinstance(instr, AndInstr)
+            or isinstance(instr, AfterInstr)
+        ):
+            self.update_objs_poss(instr.instr_a)
+            self.update_objs_poss(instr.instr_b)
+        else:
+            instr.update_objs_poss()
+
+    def _gen_grid(self, width, height):
+        # We catch RecursionError to deal with rare cases where
+        # rejection sampling gets stuck in an infinite loop
+        while True:
+            try:
+                super()._gen_grid(width, height)
+
+                # Generate the mission
+                self.gen_mission()
+
+                # Validate the instructions
+                self.validate_instrs(self.instrs)
+
+            except RecursionError as error:
+                print("Timeout during mission generation:", error)
+                continue
+
+            except RejectSampling as error:
+                print("Sampling rejected:", error)
+                continue
+
+            break
+
+        # Generate the surface form for the instructions
+        self.surface = self.instrs.surface(self)
+        self.mission = self.surface
+
+    def validate_instrs(self, instr):
+        """
+        Perform some validation on the generated instructions
+        """
+        # Gather the colors of locked doors
+        colors_of_locked_doors = []
+        if hasattr(self, "unblocking") and self.unblocking:
+            for i in range(self.num_cols):
+                for j in range(self.num_rows):
+                    room = self.get_room(i, j)
+                    for door in room.doors:
+                        if door and door.is_locked:
+                            colors_of_locked_doors.append(door.color)
+
+        if isinstance(instr, PutNextInstr):
+            # Resolve the objects referenced by the instruction
+            instr.reset_verifier(self)
+
+            # Check that the objects are not already next to each other
+            if set(instr.desc_move.obj_set).intersection(set(instr.desc_fixed.obj_set)):
+                raise RejectSampling(
+                    "there are objects that match both lhs and rhs of PutNext"
+                )
+            if instr.objs_next():
+                raise RejectSampling("objs already next to each other")
+
+            # Check that we are not asking to move an object next to itself
+            move = instr.desc_move
+            fixed = instr.desc_fixed
+            if len(move.obj_set) == 1 and len(fixed.obj_set) == 1:
+                if move.obj_set[0] is fixed.obj_set[0]:
+                    raise RejectSampling("cannot move an object next to itself")
+
+        if isinstance(instr, ActionInstr):
+            if not hasattr(self, "unblocking") or not self.unblocking:
+                return
+            # TODO: either relax this a bit or make the bot handle this super corner-y scenarios
+            # Check that the instruction doesn't involve a key that matches the color of a locked door
+            potential_objects = ("desc", "desc_move", "desc_fixed")
+            for attr in potential_objects:
+                if hasattr(instr, attr):
+                    obj = getattr(instr, attr)
+                    if obj.type == "key" and obj.color in colors_of_locked_doors:
+                        raise RejectSampling(
+                            "cannot do anything with/to a key that can be used to open a door"
+                        )
+            return
+
+        if isinstance(instr, SeqInstr):
+            self.validate_instrs(instr.instr_a)
+            self.validate_instrs(instr.instr_b)
+            return
+
+        assert False, "unhandled instruction type"
+
+    def gen_mission(self):
+        """
+        Generate a mission (instructions and matching environment)
+        Derived level classes should implement this method
+        """
+        raise NotImplementedError
+
+    @property
+    def level_name(self):
+        return self.__class__.level_name
+
+    @property
+    def gym_id(self):
+        return self.__class__.gym_id
+
+    def num_navs_needed(self, instr) -> int:
+        """
+        Compute the maximum number of navigations needed to perform
+        a simple or complex instruction
+        """
+
+        if isinstance(instr, PutNextInstr):
+            return 2
+
+        elif isinstance(instr, ActionInstr):
+            return 1
+
+        elif isinstance(instr, SeqInstr):
+            na = self.num_navs_needed(instr.instr_a)
+            nb = self.num_navs_needed(instr.instr_b)
+            return na + nb
+
+        else:
+            raise NotImplementedError(
+                "instr needs to be an instance of PutNextInstr, ActionInstr, or SeqInstr"
+            )
+
+    def open_all_doors(self):
+        """
+        Open all the doors in the maze
+        """
+
+        for i in range(self.num_cols):
+            for j in range(self.num_rows):
+                room = self.get_room(i, j)
+                for door in room.doors:
+                    if door:
+                        door.is_open = True
+
+    def check_objs_reachable(self, raise_exc=True):
+        """
+        Check that all objects are reachable from the agent's starting
+        position without requiring any other object to be moved
+        (without unblocking)
+        """
+
+        # Reachable positions
+        reachable = set()
+
+        # Work list
+        stack = [self.agent_pos]
+
+        while len(stack) > 0:
+            i, j = stack.pop()
+
+            if i < 0 or i >= self.grid.width or j < 0 or j >= self.grid.height:
+                continue
+
+            if (i, j) in reachable:
+                continue
+
+            # This position is reachable
+            reachable.add((i, j))
+
+            cell = self.grid.get(i, j)
+
+            # If there is something other than a door in this cell, it
+            # blocks reachability
+            if cell and cell.type != "door":
+                continue
+
+            # Visit the horizontal and vertical neighbors
+            stack.append((i + 1, j))
+            stack.append((i - 1, j))
+            stack.append((i, j + 1))
+            stack.append((i, j - 1))
+
+        # Check that all objects are reachable
+        for i in range(self.grid.width):
+            for j in range(self.grid.height):
+                cell = self.grid.get(i, j)
+
+                if not cell or cell.type == "wall":
+                    continue
+
+                if (i, j) not in reachable:
+                    if not raise_exc:
+                        return False
+                    raise RejectSampling("unreachable object at " + str((i, j)))
+
+        # All objects reachable
+        return True

+ 564 - 0
minigrid/envs/babyai/core/verifier.py

@@ -0,0 +1,564 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai
+"""
+import os
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+from minigrid.core.constants import COLOR_NAMES, DIR_TO_VEC
+from minigrid.minigrid_env import MiniGridEnv
+
+# Object types we are allowed to describe in language
+OBJ_TYPES = ["box", "ball", "key", "door"]
+
+# Object types we are allowed to describe in language
+OBJ_TYPES_NOT_DOOR = list(filter(lambda t: t != "door", OBJ_TYPES))
+
+# Locations are all relative to the agent's starting position
+LOC_NAMES = ["left", "right", "front", "behind"]
+
+# Environment flag to indicate that done actions should be
+# used by the verifier
+use_done_actions = os.environ.get("BABYAI_DONE_ACTIONS", False)
+
+
+def dot_product(v1, v2):
+    """
+    Compute the dot product of the vectors v1 and v2.
+    """
+
+    return sum(i * j for i, j in zip(v1, v2))
+
+
+def pos_next_to(pos_a, pos_b):
+    """
+    Test if two positions are next to each other.
+    The positions have to line up either horizontally or vertically,
+    but positions that are diagonally adjacent are not counted.
+    """
+
+    xa, ya = pos_a
+    xb, yb = pos_b
+    d = abs(xa - xb) + abs(ya - yb)
+    return d == 1
+
+
+class ObjDesc:
+    """
+    Description of a set of objects in an environment
+    """
+
+    def __init__(self, type, color=None, loc=None):
+        assert type in [None, *OBJ_TYPES], type
+        assert color in [None, *COLOR_NAMES], color
+        assert loc in [None, *LOC_NAMES], loc
+
+        self.color = color
+        self.type = type
+        self.loc = loc
+
+        # Set of objects possibly matching the description
+        self.obj_set = []
+
+        # Set of initial object positions
+        self.obj_poss = []
+
+    def __repr__(self):
+        return f"{self.color} {self.type} {self.loc}"
+
+    def surface(self, env):
+        """
+        Generate a natural language representation of the object description
+        """
+
+        self.find_matching_objs(env)
+        assert len(self.obj_set) > 0, "no object matching description"
+
+        if self.type:
+            s = str(self.type)
+        else:
+            s = "object"
+
+        if self.color:
+            s = self.color + " " + s
+
+        if self.loc:
+            if self.loc == "front":
+                s = s + " in front of you"
+            elif self.loc == "behind":
+                s = s + " behind you"
+            else:
+                s = s + " on your " + self.loc
+
+        # Singular vs plural
+        if len(self.obj_set) > 1:
+            s = "a " + s
+        else:
+            s = "the " + s
+
+        return s
+
+    def find_matching_objs(self, env, use_location=True):
+        """
+        Find the set of objects matching the description and their positions.
+        When use_location is False, we only update the positions of already tracked objects, without taking into account
+        the location of the object. e.g. A ball that was on "your right" initially will still be tracked as being "on
+        your right" when you move.
+        """
+
+        if use_location:
+            self.obj_set = []
+            # otherwise we keep the same obj_set
+
+        self.obj_poss = []
+
+        agent_room = env.room_from_pos(*env.agent_pos)
+
+        for i in range(env.grid.width):
+            for j in range(env.grid.height):
+                cell = env.grid.get(i, j)
+                if cell is None:
+                    continue
+
+                if not use_location:
+                    # we should keep tracking the same objects initially tracked only
+                    already_tracked = any([cell is obj for obj in self.obj_set])
+                    if not already_tracked:
+                        continue
+
+                # Check if object's type matches description
+                if self.type is not None and cell.type != self.type:
+                    continue
+
+                # Check if object's color matches description
+                if self.color is not None and cell.color != self.color:
+                    continue
+
+                # Check if object's position matches description
+                if use_location and self.loc in ["left", "right", "front", "behind"]:
+                    # Locations apply only to objects in the same room
+                    # the agent starts in
+                    if not agent_room.pos_inside(i, j):
+                        continue
+
+                    # Direction from the agent to the object
+                    v = (i - env.agent_pos[0], j - env.agent_pos[1])
+
+                    # (d1, d2) is an oriented orthonormal basis
+                    d1 = DIR_TO_VEC[env.agent_dir]
+                    d2 = (-d1[1], d1[0])
+
+                    # Check if object's position matches with location
+                    pos_matches = {
+                        "left": dot_product(v, d2) < 0,
+                        "right": dot_product(v, d2) > 0,
+                        "front": dot_product(v, d1) > 0,
+                        "behind": dot_product(v, d1) < 0,
+                    }
+
+                    if not (pos_matches[self.loc]):
+                        continue
+
+                if use_location:
+                    self.obj_set.append(cell)
+                self.obj_poss.append((i, j))
+
+        return self.obj_set, self.obj_poss
+
+
+class Instr(ABC):
+    """
+    Base class for all instructions in the baby language
+    """
+
+    def __init__(self):
+        self.env: MiniGridEnv
+
+    @abstractmethod
+    def surface(self, env):
+        """
+        Produce a natural language representation of the instruction
+        """
+
+        raise NotImplementedError
+
+    def reset_verifier(self, env):
+        """
+        Must be called at the beginning of the episode
+        """
+
+        self.env = env
+
+    @abstractmethod
+    def verify(self, action):
+        """
+        Verify if the task described by the instruction is incomplete,
+        complete with success or failed. The return value is a string,
+        one of: 'success', 'failure' or 'continue'.
+        """
+
+        raise NotImplementedError
+
+    def update_objs_poss(self):
+        """
+        Update the position of objects present in the instruction if needed
+        """
+        potential_objects = ("desc", "desc_move", "desc_fixed")
+        for attr in potential_objects:
+            if hasattr(self, attr):
+                getattr(self, attr).find_matching_objs(self.env, use_location=False)
+
+
+class ActionInstr(Instr, ABC):
+    """
+    Base class for all action instructions (clauses)
+    """
+
+    def __init__(self):
+        super().__init__()
+
+        # Indicates that the action was completed on the last step
+        self.lastStepMatch = False
+
+    def verify(self, action):
+        """
+        Verifies actions, with and without the done action.
+        """
+
+        if not use_done_actions:
+            return self.verify_action(action)
+
+        if action == self.env.actions.done:
+            if self.lastStepMatch:
+                return "success"
+            return "failure"
+
+        res = self.verify_action(action)
+        self.lastStepMatch = res == "success"
+
+    @abstractmethod
+    def verify_action(self):
+        """
+        Each action instruction class should implement this method
+        to verify the action.
+        """
+
+        raise NotImplementedError
+
+
+class OpenInstr(ActionInstr):
+    def __init__(self, obj_desc, strict=False):
+        super().__init__()
+        assert obj_desc.type == "door"
+        self.desc = obj_desc
+        self.strict = strict
+
+    def surface(self, env):
+        return "open " + self.desc.surface(env)
+
+    def reset_verifier(self, env):
+        super().reset_verifier(env)
+
+        # Identify set of possible matching objects in the environment
+        self.desc.find_matching_objs(env)
+
+    def verify_action(self, action):
+        # Only verify when the toggle action is performed
+        if action != self.env.actions.toggle:
+            return "continue"
+
+        # Get the contents of the cell in front of the agent
+        front_cell = self.env.grid.get(*self.env.front_pos)
+
+        for door in self.desc.obj_set:
+            if front_cell and front_cell is door and door.is_open:
+                return "success"
+
+        # If in strict mode and the wrong door is opened, failure
+        if self.strict:
+            if front_cell and front_cell.type == "door":
+                return "failure"
+
+        return "continue"
+
+
+class GoToInstr(ActionInstr):
+    """
+    Go next to (and look towards) an object matching a given description
+    eg: go to the door
+    """
+
+    def __init__(self, obj_desc):
+        super().__init__()
+        self.desc = obj_desc
+
+    def surface(self, env):
+        return "go to " + self.desc.surface(env)
+
+    def reset_verifier(self, env):
+        super().reset_verifier(env)
+
+        # Identify set of possible matching objects in the environment
+        self.desc.find_matching_objs(env)
+
+    def verify_action(self, action):
+        # For each object position
+        for pos in self.desc.obj_poss:
+            # If the agent is next to (and facing) the object
+            if np.array_equal(pos, self.env.front_pos):
+                return "success"
+
+        return "continue"
+
+
+class PickupInstr(ActionInstr):
+    """
+    Pick up an object matching a given description
+    eg: pick up the grey ball
+    """
+
+    def __init__(self, obj_desc, strict=False):
+        super().__init__()
+        assert obj_desc.type != "door"
+        self.desc = obj_desc
+        self.strict = strict
+
+    def surface(self, env):
+        return "pick up " + self.desc.surface(env)
+
+    def reset_verifier(self, env):
+        super().reset_verifier(env)
+
+        # Object previously being carried
+        self.preCarrying = None
+
+        # Identify set of possible matching objects in the environment
+        self.desc.find_matching_objs(env)
+
+    def verify_action(self, action):
+        # To keep track of what was carried at the last time step
+        preCarrying = self.preCarrying
+        self.preCarrying = self.env.carrying
+
+        # Only verify when the pickup action is performed
+        if action != self.env.actions.pickup:
+            return "continue"
+
+        for obj in self.desc.obj_set:
+            if preCarrying is None and self.env.carrying is obj:
+                return "success"
+
+        # If in strict mode and the wrong door object is picked up, failure
+        if self.strict:
+            if self.env.carrying:
+                return "failure"
+
+        self.preCarrying = self.env.carrying
+
+        return "continue"
+
+
+class PutNextInstr(ActionInstr):
+    """
+    Put an object next to another object
+    eg: put the red ball next to the blue key
+    """
+
+    def __init__(self, obj_move, obj_fixed, strict=False):
+        super().__init__()
+        assert obj_move.type != "door"
+        self.desc_move = obj_move
+        self.desc_fixed = obj_fixed
+        self.strict = strict
+
+    def surface(self, env):
+        return (
+            "put "
+            + self.desc_move.surface(env)
+            + " next to "
+            + self.desc_fixed.surface(env)
+        )
+
+    def reset_verifier(self, env):
+        super().reset_verifier(env)
+
+        # Object previously being carried
+        self.preCarrying = None
+
+        # Identify set of possible matching objects in the environment
+        self.desc_move.find_matching_objs(env)
+        self.desc_fixed.find_matching_objs(env)
+
+    def objs_next(self):
+        """
+        Check if the objects are next to each other
+        This is used for rejection sampling
+        """
+
+        for obj_a in self.desc_move.obj_set:
+            pos_a = obj_a.cur_pos
+
+            for pos_b in self.desc_fixed.obj_poss:
+                if pos_next_to(pos_a, pos_b):
+                    return True
+        return False
+
+    def verify_action(self, action):
+        # To keep track of what was carried at the last time step
+        preCarrying = self.preCarrying
+        self.preCarrying = self.env.carrying
+
+        # In strict mode, picking up the wrong object fails
+        if self.strict:
+            if action == self.env.actions.pickup and self.env.carrying:
+                return "failure"
+
+        # Only verify when the drop action is performed
+        if action != self.env.actions.drop:
+            return "continue"
+
+        for obj_a in self.desc_move.obj_set:
+            if preCarrying is not obj_a:
+                continue
+
+            pos_a = obj_a.cur_pos
+
+            for pos_b in self.desc_fixed.obj_poss:
+                if pos_next_to(pos_a, pos_b):
+                    return "success"
+
+        return "continue"
+
+
+class SeqInstr(Instr, ABC):
+    """
+    Base class for sequencing instructions (before, after, and)
+    """
+
+    def __init__(self, instr_a, instr_b, strict=False):
+        assert isinstance(instr_a, ActionInstr) or isinstance(instr_a, AndInstr)
+        assert isinstance(instr_b, ActionInstr) or isinstance(instr_b, AndInstr)
+        self.instr_a = instr_a
+        self.instr_b = instr_b
+        self.strict = strict
+
+
+class BeforeInstr(SeqInstr):
+    """
+    Sequence two instructions in order:
+    eg: go to the red door then pick up the blue ball
+    """
+
+    def surface(self, env):
+        return self.instr_a.surface(env) + ", then " + self.instr_b.surface(env)
+
+    def reset_verifier(self, env):
+        super().reset_verifier(env)
+        self.instr_a.reset_verifier(env)
+        self.instr_b.reset_verifier(env)
+        self.a_done = False
+        self.b_done = False
+
+    def verify(self, action):
+        if self.a_done == "success":
+            self.b_done = self.instr_b.verify(action)
+
+            if self.b_done == "failure":
+                return "failure"
+
+            if self.b_done == "success":
+                return "success"
+        else:
+            self.a_done = self.instr_a.verify(action)
+            if self.a_done == "failure":
+                return "failure"
+
+            if self.a_done == "success":
+                return self.verify(action)
+
+            # In strict mode, completing b first means failure
+            if self.strict:
+                if self.instr_b.verify(action) == "success":
+                    return "failure"
+
+        return "continue"
+
+
+class AfterInstr(SeqInstr):
+    """
+    Sequence two instructions in reverse order:
+    eg: go to the red door after you pick up the blue ball
+    """
+
+    def surface(self, env):
+        return self.instr_a.surface(env) + " after you " + self.instr_b.surface(env)
+
+    def reset_verifier(self, env):
+        super().reset_verifier(env)
+        self.instr_a.reset_verifier(env)
+        self.instr_b.reset_verifier(env)
+        self.a_done = False
+        self.b_done = False
+
+    def verify(self, action):
+        if self.b_done == "success":
+            self.a_done = self.instr_a.verify(action)
+
+            if self.a_done == "success":
+                return "success"
+
+            if self.a_done == "failure":
+                return "failure"
+        else:
+            self.b_done = self.instr_b.verify(action)
+            if self.b_done == "failure":
+                return "failure"
+
+            if self.b_done == "success":
+                return self.verify(action)
+
+            # In strict mode, completing a first means failure
+            if self.strict:
+                if self.instr_a.verify(action) == "success":
+                    return "failure"
+
+        return "continue"
+
+
+class AndInstr(SeqInstr):
+    """
+    Conjunction of two actions, both can be completed in any other
+    eg: go to the red door and pick up the blue ball
+    """
+
+    def __init__(self, instr_a, instr_b, strict=False):
+        assert isinstance(instr_a, ActionInstr)
+        assert isinstance(instr_b, ActionInstr)
+        super().__init__(instr_a, instr_b, strict)
+
+    def surface(self, env):
+        return self.instr_a.surface(env) + " and " + self.instr_b.surface(env)
+
+    def reset_verifier(self, env):
+        super().reset_verifier(env)
+        self.instr_a.reset_verifier(env)
+        self.instr_b.reset_verifier(env)
+        self.a_done = False
+        self.b_done = False
+
+    def verify(self, action):
+        if self.a_done != "success":
+            self.a_done = self.instr_a.verify(action)
+
+        if self.b_done != "success":
+            self.b_done = self.instr_b.verify(action)
+
+        if use_done_actions and action is self.env.actions.done:
+            if self.a_done == "failure" and self.b_done == "failure":
+                return "failure"
+
+        if self.a_done == "success" and self.b_done == "success":
+            return "success"
+
+        return "continue"

+ 277 - 0
minigrid/envs/babyai/goto.py

@@ -0,0 +1,277 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai.
+Levels described in the Baby AI ICLR 2019 submission, with the `Go to` instruction.
+"""
+from minigrid.envs.babyai.core.levelgen import LevelGen
+from minigrid.envs.babyai.core.roomgrid_level import RejectSampling, RoomGridLevel
+from minigrid.envs.babyai.core.verifier import GoToInstr, ObjDesc
+
+
+class GoToRedBallGrey(RoomGridLevel):
+    """
+    Go to the red ball, single room, with distractors.
+    The distractors are all grey to reduce perceptual complexity.
+    This level has distractors but doesn't make use of language.
+    """
+
+    def __init__(self, room_size=8, num_dists=7, **kwargs):
+        self.num_dists = num_dists
+        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
+
+    def gen_mission(self):
+        self.place_agent()
+        obj, _ = self.add_object(0, 0, "ball", "red")
+        dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
+
+        for dist in dists:
+            dist.color = "grey"
+
+        # Make sure no unblocking is required
+        self.check_objs_reachable()
+
+        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
+
+
+class GoToRedBall(RoomGridLevel):
+    """
+    Go to the red ball, single room, with distractors.
+    This level has distractors but doesn't make use of language.
+    """
+
+    def __init__(self, room_size=8, num_dists=7, **kwargs):
+        self.num_dists = num_dists
+        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
+
+    def gen_mission(self):
+        self.place_agent()
+        obj, _ = self.add_object(0, 0, "ball", "red")
+        self.add_distractors(num_distractors=self.num_dists, all_unique=False)
+
+        # Make sure no unblocking is required
+        self.check_objs_reachable()
+
+        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
+
+
+class GoToRedBallNoDists(GoToRedBall):
+    """
+    Go to the red ball. No distractors present.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(room_size=8, num_dists=0, **kwargs)
+
+
+class GoToObj(RoomGridLevel):
+    """
+    Go to an object, inside a single room with no doors, no distractors
+    """
+
+    def __init__(self, room_size=8, **kwargs):
+        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
+
+    def gen_mission(self):
+        self.place_agent()
+        objs = self.add_distractors(num_distractors=1)
+        obj = objs[0]
+        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
+
+
+class GoToLocal(RoomGridLevel):
+    """
+    Go to an object, inside a single room with no doors, no distractors
+    """
+
+    def __init__(self, room_size=8, num_dists=8, **kwargs):
+        self.num_dists = num_dists
+        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
+
+    def gen_mission(self):
+        self.place_agent()
+        objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
+        self.check_objs_reachable()
+        obj = self._rand_elem(objs)
+        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
+
+
+class GoTo(RoomGridLevel):
+    """
+    Go to an object, the object may be in another room. Many distractors.
+    """
+
+    def __init__(
+        self,
+        room_size=8,
+        num_rows=3,
+        num_cols=3,
+        num_dists=18,
+        doors_open=False,
+        **kwargs
+    ):
+        self.num_dists = num_dists
+        self.doors_open = doors_open
+        super().__init__(
+            num_rows=num_rows, num_cols=num_cols, room_size=room_size, **kwargs
+        )
+
+    def gen_mission(self):
+        self.place_agent()
+        self.connect_all()
+        objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
+        self.check_objs_reachable()
+        obj = self._rand_elem(objs)
+        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
+
+        # If requested, open all the doors
+        if self.doors_open:
+            self.open_all_doors()
+
+
+class GoToImpUnlock(RoomGridLevel):
+    """
+    Go to an object, which may be in a locked room.
+    Competencies: Maze, GoTo, ImpUnlock
+    No unblocking.
+    """
+
+    def gen_mission(self):
+        # Add a locked door to a random room
+        id = self._rand_int(0, self.num_cols)
+        jd = self._rand_int(0, self.num_rows)
+        door, pos = self.add_door(id, jd, locked=True)
+        locked_room = self.get_room(id, jd)
+
+        # Add the key to a different room
+        while True:
+            ik = self._rand_int(0, self.num_cols)
+            jk = self._rand_int(0, self.num_rows)
+            if ik is id and jk is jd:
+                continue
+            self.add_object(ik, jk, "key", door.color)
+            break
+
+        self.connect_all()
+
+        # Add distractors to all but the locked room.
+        # We do this to speed up the reachability test,
+        # which otherwise will reject all levels with
+        # objects in the locked room.
+        for i in range(self.num_cols):
+            for j in range(self.num_rows):
+                if i is not id or j is not jd:
+                    self.add_distractors(i, j, num_distractors=2, all_unique=False)
+
+        # The agent must be placed after all the object to respect constraints
+        while True:
+            self.place_agent()
+            start_room = self.room_from_pos(*self.agent_pos)
+            # Ensure that we are not placing the agent in the locked room
+            if start_room is locked_room:
+                continue
+            break
+
+        self.check_objs_reachable()
+
+        # Add a single object to the locked room
+        # The instruction requires going to an object matching that description
+        (obj,) = self.add_distractors(id, jd, num_distractors=1, all_unique=False)
+        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
+
+
+class GoToSeq(LevelGen):
+    """
+    Sequencing of go-to-object commands.
+
+    Competencies: Maze, GoTo, Seq
+    No locked room.
+    No locations.
+    No unblocking.
+    """
+
+    def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
+        super().__init__(
+            room_size=room_size,
+            num_rows=num_rows,
+            num_cols=num_cols,
+            num_dists=num_dists,
+            action_kinds=["goto"],
+            locked_room_prob=0,
+            locations=False,
+            unblocking=False,
+            **kwargs
+        )
+
+
+class GoToRedBlueBall(RoomGridLevel):
+    """
+    Go to the red ball or to the blue ball.
+    There is exactly one red or blue ball, and some distractors.
+    The distractors are guaranteed not to be red or blue balls.
+    Language is not required to solve this level.
+    """
+
+    def __init__(self, room_size=8, num_dists=7, **kwargs):
+        self.num_dists = num_dists
+        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
+
+    def gen_mission(self):
+        self.place_agent()
+
+        dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
+
+        # Ensure there is only one red or blue ball
+        for dist in dists:
+            if dist.type == "ball" and (dist.color == "blue" or dist.color == "red"):
+                raise RejectSampling("can only have one blue or red ball")
+
+        color = self._rand_elem(["red", "blue"])
+        obj, _ = self.add_object(0, 0, "ball", color)
+
+        # Make sure no unblocking is required
+        self.check_objs_reachable()
+
+        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
+
+
+class GoToDoor(RoomGridLevel):
+    """
+    Go to a door
+    (of a given color, in the current room)
+    No distractors, no language variation
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(room_size=7, **kwargs)
+
+    def gen_mission(self):
+        objs = []
+        for _ in range(4):
+            door, _ = self.add_door(1, 1)
+            objs.append(door)
+        self.place_agent(1, 1)
+
+        obj = self._rand_elem(objs)
+        self.instrs = GoToInstr(ObjDesc("door", obj.color))
+
+
+class GoToObjDoor(RoomGridLevel):
+    """
+    Go to an object or door
+    (of a given type and color, in the current room)
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(room_size=8, **kwargs)
+
+    def gen_mission(self):
+        self.place_agent(1, 1)
+        objs = self.add_distractors(1, 1, num_distractors=8, all_unique=False)
+
+        for _ in range(4):
+            door, _ = self.add_door(1, 1)
+            objs.append(door)
+
+        self.check_objs_reachable()
+
+        obj = self._rand_elem(objs)
+        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))

+ 166 - 0
minigrid/envs/babyai/open.py

@@ -0,0 +1,166 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai.
+Levels described in the Baby AI ICLR 2019 submission, with the `Open` instruction.
+"""
+
+from minigrid.core.constants import COLOR_NAMES
+from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
+from minigrid.envs.babyai.core.verifier import (
+    LOC_NAMES,
+    AfterInstr,
+    BeforeInstr,
+    ObjDesc,
+    OpenInstr,
+)
+
+
+class Open(RoomGridLevel):
+    """
+    Open a door, which may be in another room
+    """
+
+    def gen_mission(self):
+        self.place_agent()
+        self.connect_all()
+        self.add_distractors(num_distractors=18, all_unique=False)
+        self.check_objs_reachable()
+
+        # Collect a list of all the doors in the environment
+        doors = []
+        for i in range(self.num_cols):
+            for j in range(self.num_rows):
+                room = self.get_room(i, j)
+                for door in room.doors:
+                    if door:
+                        doors.append(door)
+
+        door = self._rand_elem(doors)
+        self.instrs = OpenInstr(ObjDesc(door.type, door.color))
+
+
+class OpenRedDoor(RoomGridLevel):
+    """
+    Go to the red door
+    (always unlocked, in the current room)
+    Note: this level is intentionally meant for debugging and is
+    intentionally kept very simple.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(num_rows=1, num_cols=2, room_size=5, **kwargs)
+
+    def gen_mission(self):
+        obj, _ = self.add_door(0, 0, 0, "red", locked=False)
+        self.place_agent(0, 0)
+        self.instrs = OpenInstr(ObjDesc("door", "red"))
+
+
+class OpenDoor(RoomGridLevel):
+    """
+    Go to the door
+    The door to open is given by its color or by its location.
+    (always unlocked, in the current room)
+    """
+
+    def __init__(self, debug=False, select_by=None, **kwargs):
+        self.select_by = select_by
+        self.debug = debug
+        super().__init__(**kwargs)
+
+    def gen_mission(self):
+        door_colors = self._rand_subset(COLOR_NAMES, 4)
+        objs = []
+
+        for i, color in enumerate(door_colors):
+            obj, _ = self.add_door(1, 1, door_idx=i, color=color, locked=False)
+            objs.append(obj)
+
+        select_by = self.select_by
+        if select_by is None:
+            select_by = self._rand_elem(["color", "loc"])
+        if select_by == "color":
+            object = ObjDesc(objs[0].type, color=objs[0].color)
+        elif select_by == "loc":
+            object = ObjDesc(objs[0].type, loc=self._rand_elem(LOC_NAMES))
+        else:
+            raise NotImplementedError("Not implemented.")
+
+        self.place_agent(1, 1)
+        self.instrs = OpenInstr(object, strict=self.debug)
+
+
+class OpenTwoDoors(RoomGridLevel):
+    """
+    Open door X, then open door Y
+    The two doors are facing opposite directions, so that the agent
+    Can't see whether the door behind him is open.
+    This task requires memory (recurrent policy) to be solved effectively.
+    """
+
+    def __init__(self, first_color=None, second_color=None, strict=False, **kwargs):
+        self.first_color = first_color
+        self.second_color = second_color
+        self.strict = strict
+
+        room_size = 6
+        super().__init__(room_size=room_size, max_steps=20 * room_size**2, **kwargs)
+
+    def gen_mission(self):
+        colors = self._rand_subset(COLOR_NAMES, 2)
+
+        first_color = self.first_color
+        if first_color is None:
+            first_color = colors[0]
+        second_color = self.second_color
+        if second_color is None:
+            second_color = colors[1]
+
+        door1, _ = self.add_door(1, 1, 2, color=first_color, locked=False)
+        door2, _ = self.add_door(1, 1, 0, color=second_color, locked=False)
+
+        self.place_agent(1, 1)
+
+        self.instrs = BeforeInstr(
+            OpenInstr(ObjDesc(door1.type, door1.color), strict=self.strict),
+            OpenInstr(ObjDesc(door2.type, door2.color)),
+        )
+
+
+class OpenDoorsOrder(RoomGridLevel):
+    """
+    Open one or two doors in the order specified.
+    """
+
+    def __init__(self, num_doors, debug=False, **kwargs):
+        assert num_doors >= 2
+        self.num_doors = num_doors
+        self.debug = debug
+
+        room_size = 6
+        super().__init__(room_size=room_size, max_steps=20 * room_size**2, **kwargs)
+
+    def gen_mission(self):
+        colors = self._rand_subset(COLOR_NAMES, self.num_doors)
+        doors = []
+        for i in range(self.num_doors):
+            door, _ = self.add_door(1, 1, color=colors[i], locked=False)
+            doors.append(door)
+        self.place_agent(1, 1)
+
+        door1, door2 = self._rand_subset(doors, 2)
+        desc1 = ObjDesc(door1.type, door1.color)
+        desc2 = ObjDesc(door2.type, door2.color)
+
+        mode = self._rand_int(0, 3)
+        if mode == 0:
+            self.instrs = OpenInstr(desc1, strict=self.debug)
+        elif mode == 1:
+            self.instrs = BeforeInstr(
+                OpenInstr(desc1, strict=self.debug), OpenInstr(desc2, strict=self.debug)
+            )
+        elif mode == 2:
+            self.instrs = AfterInstr(
+                OpenInstr(desc1, strict=self.debug), OpenInstr(desc2, strict=self.debug)
+            )
+        else:
+            assert False

+ 167 - 0
minigrid/envs/babyai/other.py

@@ -0,0 +1,167 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai.
+Levels described in the Baby AI ICLR 2019 submission, with different instructions than those in other files.
+"""
+
+from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
+from minigrid.envs.babyai.core.verifier import (
+    BeforeInstr,
+    GoToInstr,
+    ObjDesc,
+    OpenInstr,
+    PickupInstr,
+    PutNextInstr,
+)
+
+
+class ActionObjDoor(RoomGridLevel):
+    """
+    [pick up an object] or
+    [go to an object or door] or
+    [open a door]
+    (in the current room)
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(room_size=7, **kwargs)
+
+    def gen_mission(self):
+        objs = self.add_distractors(1, 1, num_distractors=5)
+        for _ in range(4):
+            door, _ = self.add_door(1, 1, locked=False)
+            objs.append(door)
+
+        self.place_agent(1, 1)
+
+        obj = self._rand_elem(objs)
+        desc = ObjDesc(obj.type, obj.color)
+
+        if obj.type == "door":
+            if self._rand_bool():
+                self.instrs = GoToInstr(desc)
+            else:
+                self.instrs = OpenInstr(desc)
+        else:
+            if self._rand_bool():
+                self.instrs = GoToInstr(desc)
+            else:
+                self.instrs = PickupInstr(desc)
+
+
+class FindObjS5(RoomGridLevel):
+    """
+    Pick up an object (in a random room)
+    Rooms have a size of 5
+    This level requires potentially exhaustive exploration
+    """
+
+    def __init__(self, room_size=5, **kwargs):
+        super().__init__(room_size=room_size, max_steps=20 * room_size**2, **kwargs)
+
+    def gen_mission(self):
+        # Add a random object to a random room
+        i = self._rand_int(0, self.num_rows)
+        j = self._rand_int(0, self.num_cols)
+        obj, _ = self.add_object(i, j)
+        self.place_agent(1, 1)
+        self.connect_all()
+
+        self.instrs = PickupInstr(ObjDesc(obj.type))
+
+
+class KeyCorridor(RoomGridLevel):
+    """
+    A ball is behind a locked door, the key is placed in a
+    random room.
+    """
+
+    def __init__(self, num_rows=3, obj_type="ball", room_size=6, **kwargs):
+        self.obj_type = obj_type
+
+        super().__init__(
+            room_size=room_size,
+            num_rows=num_rows,
+            max_steps=30 * room_size**2,
+            **kwargs
+        )
+
+    def gen_mission(self):
+        # Connect the middle column rooms into a hallway
+        for j in range(1, self.num_rows):
+            self.remove_wall(1, j, 3)
+
+        # Add a locked door on the bottom right
+        # Add an object behind the locked door
+        room_idx = self._rand_int(0, self.num_rows)
+        door, _ = self.add_door(2, room_idx, 2, locked=True)
+        obj, _ = self.add_object(2, room_idx, kind=self.obj_type)
+
+        # Add a key in a random room on the left side
+        self.add_object(0, self._rand_int(0, self.num_rows), "key", door.color)
+
+        # Place the agent in the middle
+        self.place_agent(1, self.num_rows // 2)
+
+        # Make sure all rooms are accessible
+        self.connect_all()
+
+        self.instrs = PickupInstr(ObjDesc(obj.type))
+
+
+class OneRoomS8(RoomGridLevel):
+    """
+    Pick up the ball
+    Rooms have a size of 8
+    """
+
+    def __init__(self, room_size=8, **kwargs):
+        super().__init__(room_size=room_size, num_rows=1, num_cols=1, **kwargs)
+
+    def gen_mission(self):
+        obj, _ = self.add_object(0, 0, kind="ball")
+        self.place_agent()
+        self.instrs = PickupInstr(ObjDesc(obj.type))
+
+
+class MoveTwoAcross(RoomGridLevel):
+    """
+    Task of the form: move the A next to the B and the C next to the D.
+    This task is structured to have a very large number of possible
+    instructions.
+    """
+
+    def __init__(self, room_size, objs_per_room, **kwargs):
+        assert objs_per_room <= 9
+        self.objs_per_room = objs_per_room
+
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            room_size=room_size,
+            max_steps=16 * room_size**2,
+            **kwargs
+        )
+
+    def gen_mission(self):
+        self.place_agent(0, 0)
+
+        # Add objects to both the left and right rooms
+        # so that we know that we have two non-adjacent set of objects
+        objs_l = self.add_distractors(0, 0, self.objs_per_room)
+        objs_r = self.add_distractors(1, 0, self.objs_per_room)
+
+        # Remove the wall between the two rooms
+        self.remove_wall(0, 0, 0)
+
+        # Select objects from both subsets
+        objs_l = self._rand_subset(objs_l, 2)
+        objs_r = self._rand_subset(objs_r, 2)
+        a = objs_l[0]
+        b = objs_r[0]
+        c = objs_r[1]
+        d = objs_l[1]
+
+        self.instrs = BeforeInstr(
+            PutNextInstr(ObjDesc(a.type, a.color), ObjDesc(b.type, b.color)),
+            PutNextInstr(ObjDesc(c.type, c.color), ObjDesc(d.type, d.color)),
+        )

+ 116 - 0
minigrid/envs/babyai/pickup.py

@@ -0,0 +1,116 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai.
+Levels described in the Baby AI ICLR 2019 submission, with the `Pick up` instruction.
+"""
+
+from minigrid.envs.babyai.core.levelgen import LevelGen
+from minigrid.envs.babyai.core.roomgrid_level import RejectSampling, RoomGridLevel
+from minigrid.envs.babyai.core.verifier import ObjDesc, PickupInstr
+
+
+class Pickup(RoomGridLevel):
+    """
+    Pick up an object, the object may be in another room.
+    """
+
+    def gen_mission(self):
+        self.place_agent()
+        self.connect_all()
+        objs = self.add_distractors(num_distractors=18, all_unique=False)
+        self.check_objs_reachable()
+        obj = self._rand_elem(objs)
+        self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))
+
+
+class UnblockPickup(RoomGridLevel):
+    """
+    Pick up an object, the object may be in another room. The path may
+    be blocked by one or more obstructors.
+    """
+
+    def gen_mission(self):
+        self.place_agent()
+        self.connect_all()
+        objs = self.add_distractors(num_distractors=20, all_unique=False)
+
+        # Ensure that at least one object is not reachable without unblocking
+        # Note: the selected object will still be reachable most of the time
+        if self.check_objs_reachable(raise_exc=False):
+            raise RejectSampling("all objects reachable")
+
+        obj = self._rand_elem(objs)
+        self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))
+
+
+class PickupLoc(LevelGen):
+    """
+    Pick up an object which may be described using its location. This is a
+    single room environment.
+
+    Competencies: PickUp, Loc. No unblocking.
+    """
+
+    def __init__(self, **kwargs):
+        # We add many distractors to increase the probability
+        # of ambiguous locations within the same room
+        super().__init__(
+            action_kinds=["pickup"],
+            instr_kinds=["action"],
+            num_rows=1,
+            num_cols=1,
+            num_dists=8,
+            locked_room_prob=0,
+            locations=True,
+            unblocking=False,
+            **kwargs
+        )
+
+
+class PickupDist(RoomGridLevel):
+    """
+    Pick up an object
+    The object to pick up is given by its type only, or
+    by its color, or by its type and color.
+    (in the current room, with distractors)
+    """
+
+    def __init__(self, debug=False, **kwargs):
+        self.debug = debug
+        super().__init__(num_rows=1, num_cols=1, room_size=7, **kwargs)
+
+    def gen_mission(self):
+        # Add 5 random objects in the room
+        objs = self.add_distractors(num_distractors=5)
+        self.place_agent(0, 0)
+        obj = self._rand_elem(objs)
+        type = obj.type
+        color = obj.color
+
+        select_by = self._rand_elem(["type", "color", "both"])
+        if select_by == "color":
+            type = None
+        elif select_by == "type":
+            color = None
+
+        self.instrs = PickupInstr(ObjDesc(type, color), strict=self.debug)
+
+
+class PickupAbove(RoomGridLevel):
+    """
+    Pick up an object (in the room above)
+    This task requires to use the compass to be solved effectively.
+    """
+
+    def __init__(self, **kwargs):
+        room_size = 6
+        super().__init__(room_size=room_size, max_steps=8 * room_size**2, **kwargs)
+
+    def gen_mission(self):
+        # Add a random object to the top-middle room
+        obj, pos = self.add_object(1, 0)
+        # Make sure the two rooms are directly connected
+        self.add_door(1, 1, 3, locked=False)
+        self.place_agent(1, 1)
+        self.connect_all()
+
+        self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))

+ 85 - 0
minigrid/envs/babyai/putnext.py

@@ -0,0 +1,85 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai.
+Levels described in the Baby AI ICLR 2019 submission, with the `Put Next` instruction.
+"""
+
+from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
+from minigrid.envs.babyai.core.verifier import ObjDesc, PutNextInstr
+
+
+class PutNextLocal(RoomGridLevel):
+    """
+    Put an object next to another object, inside a single room
+    with no doors, no distractors
+    """
+
+    def __init__(self, room_size=8, num_objs=8, **kwargs):
+        self.num_objs = num_objs
+        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
+
+    def gen_mission(self):
+        self.place_agent()
+        objs = self.add_distractors(num_distractors=self.num_objs, all_unique=True)
+        self.check_objs_reachable()
+        o1, o2 = self._rand_subset(objs, 2)
+
+        self.instrs = PutNextInstr(
+            ObjDesc(o1.type, o1.color), ObjDesc(o2.type, o2.color)
+        )
+
+
+class PutNext(RoomGridLevel):
+    """
+    Task of the form: move the A next to the B and the C next to the D.
+    This task is structured to have a very large number of possible
+    instructions.
+    """
+
+    def __init__(self, room_size, objs_per_room, start_carrying=False, **kwargs):
+        assert room_size >= 4
+        assert objs_per_room <= 9
+        self.objs_per_room = objs_per_room
+        self.start_carrying = start_carrying
+
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            room_size=room_size,
+            max_steps=8 * room_size**2,
+            **kwargs
+        )
+
+    def gen_mission(self):
+        self.place_agent(0, 0)
+
+        # Add objects to both the left and right rooms
+        # so that we know that we have two non-adjacent set of objects
+        objs_l = self.add_distractors(0, 0, self.objs_per_room)
+        objs_r = self.add_distractors(1, 0, self.objs_per_room)
+
+        # Remove the wall between the two rooms
+        self.remove_wall(0, 0, 0)
+
+        # Select objects from both subsets
+        a = self._rand_elem(objs_l)
+        b = self._rand_elem(objs_r)
+
+        # Randomly flip the object to be moved
+        if self._rand_bool():
+            t = a
+            a = b
+            b = t
+
+        self.obj_a = a
+
+        self.instrs = PutNextInstr(ObjDesc(a.type, a.color), ObjDesc(b.type, b.color))
+
+    def reset(self, **kwargs):
+        obs = super().reset(**kwargs)
+
+        # If the agent starts off carrying the object
+        if self.start_carrying:
+            self.grid.set(*self.obj_a.init_pos, None)
+            self.carrying = self.obj_a
+
+        return obs

+ 95 - 0
minigrid/envs/babyai/synth.py

@@ -0,0 +1,95 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai.
+Levels described in the Baby AI ICLR 2019 submission.
+The instructions are a synthesis of those from `PutNext`, `Open`, `GoTo`, and `Pickup`.
+"""
+
+from minigrid.envs.babyai.core.levelgen import LevelGen
+
+
+class Synth(LevelGen):
+    """
+    Union of all instructions from PutNext, Open, Goto and PickUp. The agent
+    may need to move objects around. The agent may have to unlock the door,
+    but only if it is explicitly referred by the instruction.
+
+    Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open
+    """
+
+    def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
+        # We add many distractors to increase the probability
+        # of ambiguous locations within the same room
+        super().__init__(
+            room_size=room_size,
+            num_rows=num_rows,
+            num_cols=num_cols,
+            num_dists=num_dists,
+            instr_kinds=["action"],
+            locations=False,
+            unblocking=True,
+            implicit_unlock=False,
+            **kwargs
+        )
+
+
+class SynthS5R2(Synth):
+    def __init__(self, **kwargs):
+        super().__init__(room_size=5, num_rows=2, num_cols=2, num_dists=7, **kwargs)
+
+
+class SynthLoc(LevelGen):
+    """
+    Like Synth, but a significant share of object descriptions involves
+    location language like in PickUpLoc. No implicit unlocking.
+
+    Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc
+    """
+
+    def __init__(self, **kwargs):
+        # We add many distractors to increase the probability
+        # of ambiguous locations within the same room
+        super().__init__(
+            instr_kinds=["action"],
+            locations=True,
+            unblocking=True,
+            implicit_unlock=False,
+            **kwargs
+        )
+
+
+class SynthSeq(LevelGen):
+    """
+    Like SynthLoc, but now with multiple commands, combined just like in GoToSeq.
+    No implicit unlocking.
+
+    Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc, Seq
+    """
+
+    def __init__(self, **kwargs):
+        # We add many distractors to increase the probability
+        # of ambiguous locations within the same room
+        super().__init__(
+            locations=True, unblocking=True, implicit_unlock=False, **kwargs
+        )
+
+
+class MiniBossLevel(LevelGen):
+    def __init__(self, **kwargs):
+        super().__init__(
+            num_cols=2,
+            num_rows=2,
+            room_size=5,
+            num_dists=7,
+            locked_room_prob=0.25,
+            **kwargs
+        )
+
+
+class BossLevel(LevelGen):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+
+class BossLevelNoUnlock(LevelGen):
+    def __init__(self, **kwargs):
+        super().__init__(locked_room_prob=0, implicit_unlock=False, **kwargs)

+ 205 - 0
minigrid/envs/babyai/unlock.py

@@ -0,0 +1,205 @@
+"""
+Copied and adapted from https://github.com/mila-iqia/babyai.
+Levels described in the Baby AI ICLR 2019 submission, with the `Unlock` instruction.
+"""
+
+from minigrid.core.constants import COLOR_NAMES
+from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
+from minigrid.envs.babyai.core.verifier import ObjDesc, OpenInstr, PickupInstr
+from minigrid.minigrid_env import Ball, Box, Key
+
+
+class Unlock(RoomGridLevel):
+    """
+    Unlock a door.
+
+    Competencies: Maze, Open, Unlock. No unblocking.
+    """
+
+    def gen_mission(self):
+        # Add a locked door to a random room
+        id = self._rand_int(0, self.num_cols)
+        jd = self._rand_int(0, self.num_rows)
+        door, pos = self.add_door(id, jd, locked=True)
+        locked_room = self.get_room(id, jd)
+
+        # Add the key to a different room
+        while True:
+            ik = self._rand_int(0, self.num_cols)
+            jk = self._rand_int(0, self.num_rows)
+            if ik is id and jk is jd:
+                continue
+            self.add_object(ik, jk, "key", door.color)
+            break
+
+        # With 50% probability, ensure that the locked door is the only
+        # door of that color
+        if self._rand_bool():
+            colors = list(filter(lambda c: c is not door.color, COLOR_NAMES))
+            self.connect_all(door_colors=colors)
+        else:
+            self.connect_all()
+
+        # Add distractors to all but the locked room.
+        # We do this to speed up the reachability test,
+        # which otherwise will reject all levels with
+        # objects in the locked room.
+        for i in range(self.num_cols):
+            for j in range(self.num_rows):
+                if i is not id or j is not jd:
+                    self.add_distractors(i, j, num_distractors=3, all_unique=False)
+
+        # The agent must be placed after all the object to respect constraints
+        while True:
+            self.place_agent()
+            start_room = self.room_from_pos(*self.agent_pos)
+            # Ensure that we are not placing the agent in the locked room
+            if start_room is locked_room:
+                continue
+            break
+
+        self.check_objs_reachable()
+
+        self.instrs = OpenInstr(ObjDesc(door.type, door.color))
+
+
+class UnlockLocal(RoomGridLevel):
+    """
+    Fetch a key and unlock a door
+    (in the current room)
+    """
+
+    def __init__(self, distractors=False, **kwargs):
+        self.distractors = distractors
+        super().__init__(**kwargs)
+
+    def gen_mission(self):
+        door, _ = self.add_door(1, 1, locked=True)
+        self.add_object(1, 1, "key", door.color)
+        if self.distractors:
+            self.add_distractors(1, 1, num_distractors=3)
+        self.place_agent(1, 1)
+
+        self.instrs = OpenInstr(ObjDesc(door.type))
+
+
+class KeyInBox(RoomGridLevel):
+    """
+    Unlock a door. Key is in a box (in the current room).
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def gen_mission(self):
+        door, _ = self.add_door(1, 1, locked=True)
+
+        # Put the key in the box, then place the box in the room
+        key = Key(door.color)
+        box = Box(self._rand_color(), key)
+        self.place_in_room(1, 1, box)
+
+        self.place_agent(1, 1)
+
+        self.instrs = OpenInstr(ObjDesc(door.type))
+
+
+class UnlockPickup(RoomGridLevel):
+    """
+    Unlock a door, then pick up a box in another room
+    """
+
+    def __init__(self, distractors=False, **kwargs):
+        self.distractors = distractors
+
+        room_size = 6
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            room_size=room_size,
+            max_steps=8 * room_size**2,
+            **kwargs
+        )
+
+    def gen_mission(self):
+        # Add a random object to the room on the right
+        obj, _ = self.add_object(1, 0, kind="box")
+        # Make sure the two rooms are directly connected by a locked door
+        door, _ = self.add_door(0, 0, 0, locked=True)
+        # Add a key to unlock the door
+        self.add_object(0, 0, "key", door.color)
+        if self.distractors:
+            self.add_distractors(num_distractors=4)
+
+        self.place_agent(0, 0)
+
+        self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))
+
+
+class BlockedUnlockPickup(RoomGridLevel):
+    """
+    Unlock a door blocked by a ball, then pick up a box
+    in another room
+    """
+
+    def __init__(self, **kwargs):
+        room_size = 6
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            room_size=room_size,
+            max_steps=16 * room_size**2,
+            **kwargs
+        )
+
+    def gen_mission(self):
+        # Add a box to the room on the right
+        obj, _ = self.add_object(1, 0, kind="box")
+        # Make sure the two rooms are directly connected by a locked door
+        door, pos = self.add_door(0, 0, 0, locked=True)
+        # Block the door with a ball
+        color = self._rand_color()
+        self.grid.set(pos[0] - 1, pos[1], Ball(color))
+        # Add a key to unlock the door
+        self.add_object(0, 0, "key", door.color)
+
+        self.place_agent(0, 0)
+
+        self.instrs = PickupInstr(ObjDesc(obj.type))
+
+
+class UnlockToUnlock(RoomGridLevel):
+    """
+    Unlock a door A that requires to unlock a door B before
+    """
+
+    def __init__(self, **kwargs):
+        room_size = 6
+        super().__init__(
+            num_rows=1,
+            num_cols=3,
+            room_size=room_size,
+            max_steps=30 * room_size**2,
+            **kwargs
+        )
+
+    def gen_mission(self):
+        colors = self._rand_subset(COLOR_NAMES, 2)
+
+        # Add a door of color A connecting left and middle room
+        self.add_door(0, 0, door_idx=0, color=colors[0], locked=True)
+
+        # Add a key of color A in the room on the right
+        self.add_object(2, 0, kind="key", color=colors[0])
+
+        # Add a door of color B connecting middle and right room
+        self.add_door(1, 0, door_idx=0, color=colors[1], locked=True)
+
+        # Add a key of color B in the middle room
+        self.add_object(1, 0, kind="key", color=colors[1])
+
+        obj, _ = self.add_object(0, 0, kind="ball")
+
+        self.place_agent(1, 0)
+
+        self.instrs = PickupInstr(ObjDesc(obj.type))

+ 5 - 1
minigrid/wrappers.py

@@ -247,6 +247,8 @@ class DictObservationSpaceWrapper(ObservationWrapper):
     """
     """
     Transforms the observation space (that has a textual component) to a fully numerical observation space,
     Transforms the observation space (that has a textual component) to a fully numerical observation space,
     where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
     where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
+
+    This wrapper is not applicable to BabyAI environments, given that these have their own language component.
     """
     """
 
 
     def __init__(self, env, max_words_in_mission=50, word_dict=None):
     def __init__(self, env, max_words_in_mission=50, word_dict=None):
@@ -364,7 +366,9 @@ class DictObservationSpaceWrapper(ObservationWrapper):
 class FlatObsWrapper(ObservationWrapper):
 class FlatObsWrapper(ObservationWrapper):
     """
     """
     Encode mission strings using a one-hot scheme,
     Encode mission strings using a one-hot scheme,
-    and combine these with observed images into one flat array
+    and combine these with observed images into one flat array.
+
+    This wrapper is not applicable to BabyAI environments, given that these have their own language component.
     """
     """
 
 
     def __init__(self, env, maxStrLen=96):
     def __init__(self, env, maxStrLen=96):

+ 12 - 3
tests/test_wrappers.py

@@ -19,7 +19,7 @@ from minigrid.wrappers import (
     StateBonus,
     StateBonus,
     ViewSizeWrapper,
     ViewSizeWrapper,
 )
 )
-from tests.utils import all_testing_env_specs, assert_equals
+from tests.utils import all_testing_env_specs, assert_equals, minigrid_testing_env_specs
 
 
 SEEDS = [100, 243, 500]
 SEEDS = [100, 243, 500]
 NUM_STEPS = 100
 NUM_STEPS = 100
@@ -126,8 +126,10 @@ def test_action_bonus_wrapper(env_id):
 
 
 
 
 @pytest.mark.parametrize(
 @pytest.mark.parametrize(
-    "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
-)
+    "env_spec",
+    minigrid_testing_env_specs,
+    ids=[spec.id for spec in minigrid_testing_env_specs],
+)  # DictObservationSpaceWrapper is not compatible with BabyAI levels. See minigrid/wrappers.py for more details.
 def test_dict_observation_space_wrapper(env_spec):
 def test_dict_observation_space_wrapper(env_spec):
     env = env_spec.make()
     env = env_spec.make()
     env = DictObservationSpaceWrapper(env)
     env = DictObservationSpaceWrapper(env)
@@ -157,6 +159,13 @@ def test_dict_observation_space_wrapper(env_spec):
     "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
     "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 )
 def test_main_wrappers(wrapper, env_spec):
 def test_main_wrappers(wrapper, env_spec):
+    if (
+        wrapper in (DictObservationSpaceWrapper, FlatObsWrapper)
+        and env_spec not in minigrid_testing_env_specs
+    ):
+        # DictObservationSpaceWrapper and FlatObsWrapper are not compatible with BabyAI levels
+        # See minigrid/wrappers.py for more details
+        pytest.skip()
     env = env_spec.make()
     env = env_spec.make()
     env = wrapper(env)
     env = wrapper(env)
     for _ in range(10):
     for _ in range(10):

+ 6 - 0
tests/utils.py

@@ -8,6 +8,12 @@ all_testing_env_specs = [
     if env_spec.entry_point.startswith("minigrid.envs")
     if env_spec.entry_point.startswith("minigrid.envs")
 ]
 ]
 
 
+minigrid_testing_env_specs = [
+    env_spec
+    for env_spec in all_testing_env_specs
+    if not env_spec.entry_point.startswith("minigrid.envs.babyai")
+]
+
 
 
 def assert_equals(a, b, prefix=None):
 def assert_equals(a, b, prefix=None):
     """Assert equality of data structures `a` and `b`.
     """Assert equality of data structures `a` and `b`.