| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 | # Copyright (c) Meta Platforms, Inc. and affiliates.# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.from unittest.mock import patchimport pytestimport torchimport osimport shutilfrom llama_recipes.utils.train_utils import trainTEMP_OUTPUT_DIR = os.getcwd() + "/tmp"@pytest.fixture(scope="session")def temp_output_dir():    # Create the directory during the session-level setup    temp_output_dir = "tmp"    os.mkdir(os.path.join(os.getcwd(), temp_output_dir))    yield temp_output_dir    # Delete the directory during the session-level teardown    shutil.rmtree(temp_output_dir)@patch("llama_recipes.utils.train_utils.MemoryTrace")@patch("llama_recipes.utils.train_utils.nullcontext")@patch("llama_recipes.utils.train_utils.torch.cuda.amp.GradScaler")@patch("llama_recipes.utils.train_utils.torch.cuda.amp.autocast")def test_gradient_accumulation(autocast, scaler, nullcontext, mem_trace, mocker):    model = mocker.MagicMock(name="model")    model().loss.__truediv__().detach.return_value = torch.tensor(1)    mock_tensor = mocker.MagicMock(name="tensor")    batch = {"input": mock_tensor}    train_dataloader = [batch, batch, batch, batch, batch]    eval_dataloader = None    tokenizer = mocker.MagicMock()    optimizer = mocker.MagicMock()    lr_scheduler = mocker.MagicMock()    gradient_accumulation_steps = 1    train_config = mocker.MagicMock()    train_config.enable_fsdp = False    train_config.use_fp16 = False    train_config.run_validation = False    train_config.gradient_clipping = False    train_config.max_train_step = 0    train_config.max_eval_step = 0    train_config.save_metrics = False    train(        model,        train_dataloader,        eval_dataloader,        tokenizer,        optimizer,        lr_scheduler,        gradient_accumulation_steps,        train_config,    )    assert optimizer.zero_grad.call_count == 5    optimizer.zero_grad.reset_mock()    assert nullcontext.call_count == 5    nullcontext.reset_mock()    assert autocast.call_count == 0    gradient_accumulation_steps = 2    train_config.use_fp16 = True    train(        model,        train_dataloader,        eval_dataloader,        tokenizer,        optimizer,        lr_scheduler,        gradient_accumulation_steps,        train_config,    )    assert optimizer.zero_grad.call_count == 3    assert nullcontext.call_count == 0    assert autocast.call_count == 5def test_save_to_json(temp_output_dir, mocker):    model = mocker.MagicMock(name="model")    model().loss.__truediv__().detach.return_value = torch.tensor(1)    mock_tensor = mocker.MagicMock(name="tensor")    batch = {"input": mock_tensor}    train_dataloader = [batch, batch, batch, batch, batch]    eval_dataloader = None    tokenizer = mocker.MagicMock()    optimizer = mocker.MagicMock()    lr_scheduler = mocker.MagicMock()    gradient_accumulation_steps = 1    train_config = mocker.MagicMock()    train_config.enable_fsdp = False    train_config.use_fp16 = False    train_config.run_validation = False    train_config.gradient_clipping = False    train_config.save_metrics = True    train_config.max_train_step = 0    train_config.max_eval_step = 0    train_config.output_dir = temp_output_dir    results = train(        model,        train_dataloader,        eval_dataloader,        tokenizer,        optimizer,        lr_scheduler,        gradient_accumulation_steps,        train_config,        local_rank=0    )    assert results["metrics_filename"] not in ["", None]    assert os.path.isfile(results["metrics_filename"])
 |