|
@@ -1,40 +1,56 @@
|
|
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
|
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
|
|
|
|
|
|
-import pytest
|
|
|
-from pytest import approx
|
|
|
+import os
|
|
|
from unittest.mock import patch
|
|
|
|
|
|
+import pytest
|
|
|
+
|
|
|
import torch
|
|
|
+from llama_recipes.data.sampler import LengthBasedBatchSampler
|
|
|
+
|
|
|
+from llama_recipes.finetuning import main
|
|
|
+from pytest import approx
|
|
|
from torch.optim import AdamW
|
|
|
from torch.utils.data.dataloader import DataLoader
|
|
|
from torch.utils.data.sampler import BatchSampler
|
|
|
|
|
|
-from llama_recipes.finetuning import main
|
|
|
-from llama_recipes.data.sampler import LengthBasedBatchSampler
|
|
|
-
|
|
|
|
|
|
def get_fake_dataset():
|
|
|
- return [{
|
|
|
- "input_ids":[1],
|
|
|
- "attention_mask":[1],
|
|
|
- "labels":[1],
|
|
|
- }]
|
|
|
-
|
|
|
-@patch('llama_recipes.finetuning.torch.cuda.is_available')
|
|
|
-@patch('llama_recipes.finetuning.train')
|
|
|
-@patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
|
|
|
-@patch('llama_recipes.finetuning.AutoTokenizer.from_pretrained')
|
|
|
-@patch('llama_recipes.finetuning.get_preprocessed_dataset')
|
|
|
-@patch('llama_recipes.finetuning.optim.AdamW')
|
|
|
-@patch('llama_recipes.finetuning.StepLR')
|
|
|
+ return [
|
|
|
+ {
|
|
|
+ "input_ids": [1],
|
|
|
+ "attention_mask": [1],
|
|
|
+ "labels": [1],
|
|
|
+ }
|
|
|
+ ]
|
|
|
+
|
|
|
+
|
|
|
+@patch("llama_recipes.finetuning.torch.cuda.is_available")
|
|
|
+@patch("llama_recipes.finetuning.train")
|
|
|
+@patch("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained")
|
|
|
+@patch("llama_recipes.finetuning.AutoTokenizer.from_pretrained")
|
|
|
+@patch("llama_recipes.finetuning.get_preprocessed_dataset")
|
|
|
+@patch("llama_recipes.finetuning.optim.AdamW")
|
|
|
+@patch("llama_recipes.finetuning.StepLR")
|
|
|
@pytest.mark.parametrize("cuda_is_available", [True, False])
|
|
|
-def test_finetuning_no_validation(step_lr, optimizer, get_dataset, tokenizer, get_model, train, cuda, cuda_is_available):
|
|
|
+def test_finetuning_no_validation(
|
|
|
+ step_lr,
|
|
|
+ optimizer,
|
|
|
+ get_dataset,
|
|
|
+ tokenizer,
|
|
|
+ get_model,
|
|
|
+ train,
|
|
|
+ cuda,
|
|
|
+ cuda_is_available,
|
|
|
+):
|
|
|
kwargs = {"run_validation": False}
|
|
|
|
|
|
get_dataset.return_value = get_fake_dataset()
|
|
|
cuda.return_value = cuda_is_available
|
|
|
|
|
|
+ get_model.return_value.get_input_embeddings.return_value.weight.shape = [0]
|
|
|
+
|
|
|
main(**kwargs)
|
|
|
|
|
|
assert train.call_count == 1
|
|
@@ -53,20 +69,31 @@ def test_finetuning_no_validation(step_lr, optimizer, get_dataset, tokenizer, ge
|
|
|
assert get_model.return_value.to.call_count == 0
|
|
|
|
|
|
|
|
|
-@patch('llama_recipes.finetuning.torch.cuda.is_available')
|
|
|
-@patch('llama_recipes.finetuning.train')
|
|
|
-@patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
|
|
|
-@patch('llama_recipes.finetuning.AutoTokenizer.from_pretrained')
|
|
|
-@patch('llama_recipes.finetuning.get_preprocessed_dataset')
|
|
|
-@patch('llama_recipes.finetuning.optim.AdamW')
|
|
|
-@patch('llama_recipes.finetuning.StepLR')
|
|
|
+@patch("llama_recipes.finetuning.torch.cuda.is_available")
|
|
|
+@patch("llama_recipes.finetuning.train")
|
|
|
+@patch("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained")
|
|
|
+@patch("llama_recipes.finetuning.AutoTokenizer.from_pretrained")
|
|
|
+@patch("llama_recipes.finetuning.get_preprocessed_dataset")
|
|
|
+@patch("llama_recipes.finetuning.optim.AdamW")
|
|
|
+@patch("llama_recipes.finetuning.StepLR")
|
|
|
@pytest.mark.parametrize("cuda_is_available", [True, False])
|
|
|
-def test_finetuning_with_validation(step_lr, optimizer, get_dataset, tokenizer, get_model, train, cuda, cuda_is_available):
|
|
|
+def test_finetuning_with_validation(
|
|
|
+ step_lr,
|
|
|
+ optimizer,
|
|
|
+ get_dataset,
|
|
|
+ tokenizer,
|
|
|
+ get_model,
|
|
|
+ train,
|
|
|
+ cuda,
|
|
|
+ cuda_is_available,
|
|
|
+):
|
|
|
kwargs = {"run_validation": True}
|
|
|
|
|
|
get_dataset.return_value = get_fake_dataset()
|
|
|
cuda.return_value = cuda_is_available
|
|
|
|
|
|
+ get_model.return_value.get_input_embeddings.return_value.weight.shape = [0]
|
|
|
+
|
|
|
main(**kwargs)
|
|
|
|
|
|
assert train.call_count == 1
|
|
@@ -83,22 +110,36 @@ def test_finetuning_with_validation(step_lr, optimizer, get_dataset, tokenizer,
|
|
|
else:
|
|
|
assert get_model.return_value.to.call_count == 0
|
|
|
|
|
|
-@patch('llama_recipes.finetuning.torch.cuda.is_available')
|
|
|
-@patch('llama_recipes.finetuning.train')
|
|
|
-@patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
|
|
|
-@patch('llama_recipes.finetuning.AutoTokenizer.from_pretrained')
|
|
|
-@patch('llama_recipes.finetuning.get_preprocessed_dataset')
|
|
|
-@patch('llama_recipes.finetuning.generate_peft_config')
|
|
|
-@patch('llama_recipes.finetuning.get_peft_model')
|
|
|
-@patch('llama_recipes.finetuning.optim.AdamW')
|
|
|
-@patch('llama_recipes.finetuning.StepLR')
|
|
|
+
|
|
|
+@patch("llama_recipes.finetuning.torch.cuda.is_available")
|
|
|
+@patch("llama_recipes.finetuning.train")
|
|
|
+@patch("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained")
|
|
|
+@patch("llama_recipes.finetuning.AutoTokenizer.from_pretrained")
|
|
|
+@patch("llama_recipes.finetuning.get_preprocessed_dataset")
|
|
|
+@patch("llama_recipes.finetuning.generate_peft_config")
|
|
|
+@patch("llama_recipes.finetuning.get_peft_model")
|
|
|
+@patch("llama_recipes.finetuning.optim.AdamW")
|
|
|
+@patch("llama_recipes.finetuning.StepLR")
|
|
|
@pytest.mark.parametrize("cuda_is_available", [True, False])
|
|
|
-def test_finetuning_peft(step_lr, optimizer, get_peft_model, gen_peft_config, get_dataset, tokenizer, get_model, train, cuda, cuda_is_available):
|
|
|
+def test_finetuning_peft_lora(
|
|
|
+ step_lr,
|
|
|
+ optimizer,
|
|
|
+ get_peft_model,
|
|
|
+ gen_peft_config,
|
|
|
+ get_dataset,
|
|
|
+ tokenizer,
|
|
|
+ get_model,
|
|
|
+ train,
|
|
|
+ cuda,
|
|
|
+ cuda_is_available,
|
|
|
+):
|
|
|
kwargs = {"use_peft": True}
|
|
|
|
|
|
get_dataset.return_value = get_fake_dataset()
|
|
|
cuda.return_value = cuda_is_available
|
|
|
|
|
|
+ get_model.return_value.get_input_embeddings.return_value.weight.shape = [0]
|
|
|
+
|
|
|
main(**kwargs)
|
|
|
|
|
|
if cuda_is_available:
|
|
@@ -117,7 +158,7 @@ def test_finetuning_peft(step_lr, optimizer, get_peft_model, gen_peft_config, ge
|
|
|
@patch("llama_recipes.finetuning.AutoTokenizer.from_pretrained")
|
|
|
@patch("llama_recipes.finetuning.get_preprocessed_dataset")
|
|
|
def test_finetuning_peft_llama_adapter(
|
|
|
- get_dataset, tokenizer, get_model, train, setup, get_peft_model, mocker
|
|
|
+ get_dataset, tokenizer, get_model, train, setup, get_peft_model
|
|
|
):
|
|
|
kwargs = {
|
|
|
"use_peft": True,
|
|
@@ -127,11 +168,7 @@ def test_finetuning_peft_llama_adapter(
|
|
|
|
|
|
get_dataset.return_value = get_fake_dataset()
|
|
|
|
|
|
- model = mocker.MagicMock(name="Model")
|
|
|
- model.parameters.return_value = [torch.ones(1, 1)]
|
|
|
- model.get_input_embeddings.return_value.weight.shape = [0]
|
|
|
-
|
|
|
- get_model.return_value = model
|
|
|
+ get_model.return_value.get_input_embeddings.return_value.weight.shape = [0]
|
|
|
|
|
|
os.environ["RANK"] = "0"
|
|
|
os.environ["LOCAL_RANK"] = "0"
|
|
@@ -164,16 +201,14 @@ def test_finetuning_peft_llama_adapter(
|
|
|
@patch("llama_recipes.finetuning.get_peft_model")
|
|
|
@patch("llama_recipes.finetuning.StepLR")
|
|
|
def test_finetuning_weight_decay(
|
|
|
- step_lr, get_peft_model, get_dataset, tokenizer, get_model, train, mocker
|
|
|
+ step_lr, get_peft_model, get_dataset, tokenizer, get_model, train
|
|
|
):
|
|
|
kwargs = {"weight_decay": 0.01}
|
|
|
|
|
|
get_dataset.return_value = get_fake_dataset()
|
|
|
|
|
|
- model = mocker.MagicMock(name="Model")
|
|
|
- model.parameters.return_value = [torch.ones(1,1)]
|
|
|
-
|
|
|
- get_model.return_value = model
|
|
|
+ get_model.return_value.parameters.return_value = [torch.ones(1, 1)]
|
|
|
+ get_model.return_value.get_input_embeddings.return_value.weight.shape = [0]
|
|
|
|
|
|
main(**kwargs)
|
|
|
|
|
@@ -188,17 +223,21 @@ def test_finetuning_weight_decay(
|
|
|
assert optimizer.state_dict()["param_groups"][0]["weight_decay"] == approx(0.01)
|
|
|
|
|
|
|
|
|
-@patch('llama_recipes.finetuning.train')
|
|
|
-@patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
|
|
|
-@patch('llama_recipes.finetuning.AutoTokenizer.from_pretrained')
|
|
|
-@patch('llama_recipes.finetuning.get_preprocessed_dataset')
|
|
|
-@patch('llama_recipes.finetuning.optim.AdamW')
|
|
|
-@patch('llama_recipes.finetuning.StepLR')
|
|
|
-def test_batching_strategy(step_lr, optimizer, get_dataset, tokenizer, get_model, train):
|
|
|
+@patch("llama_recipes.finetuning.train")
|
|
|
+@patch("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained")
|
|
|
+@patch("llama_recipes.finetuning.AutoTokenizer.from_pretrained")
|
|
|
+@patch("llama_recipes.finetuning.get_preprocessed_dataset")
|
|
|
+@patch("llama_recipes.finetuning.optim.AdamW")
|
|
|
+@patch("llama_recipes.finetuning.StepLR")
|
|
|
+def test_batching_strategy(
|
|
|
+ step_lr, optimizer, get_dataset, tokenizer, get_model, train
|
|
|
+):
|
|
|
kwargs = {"batching_strategy": "packing"}
|
|
|
|
|
|
get_dataset.return_value = get_fake_dataset()
|
|
|
|
|
|
+ get_model.return_value.get_input_embeddings.return_value.weight.shape = [0]
|
|
|
+
|
|
|
main(**kwargs)
|
|
|
|
|
|
assert train.call_count == 1
|