2 år sedan · 091d58df17
--- a/recipes/finetuning/README.md
+++ b/recipes/finetuning/README.md
@@ -70,7 +70,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
				 
			
 
				 * [Datasets config file](../../src/llama_recipes/configs/datasets.py) provides the available options for datasets.
			
 
				 
			
 
				-* [peft config file](../../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified.
			
 
				+* [peft config file](../../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. We currently support LoRA and LLaMA-Adapter. Please note that LoRA is the only technique which is supported in combination with FSDP.
			
 
				 
			
 
				 * [FSDP config file](../../src/llama_recipes/configs/fsdp.py) provides FSDP settings such as:
			
 
				 
			
--- a/src/llama_recipes/configs/peft.py
+++ b/src/llama_recipes/configs/peft.py
@@ -20,7 +20,8 @@ class llama_adapter_config:
 
				      adapter_layers: int= 30
			
 
				      task_type: str= "CAUSAL_LM"
			
 
				 
			
 
				+#CAUTION prefix tuning is currently not supported
			
 
				 @dataclass
			
 
				 class prefix_config:
			
 
				      num_virtual_tokens: int=30
			
 
				-     task_type: str= "CAUSAL_LM"    
			
 
				+     task_type: str= "CAUSAL_LM"
			
--- a/src/llama_recipes/configs/training.py
+++ b/src/llama_recipes/configs/training.py
@@ -29,7 +29,7 @@ class train_config:
 
				     mixed_precision: bool=True
			
 
				     val_batch_size: int=1
			
 
				     dataset = "samsum_dataset"
			
 
				-    peft_method: str = "lora" # None,llama_adapter, prefix
			
 
				+    peft_method: str = "lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
			
 
				     use_peft: bool=False
			
 
				     output_dir: str = "PATH/to/save/PEFT/model"
			
 
				     freeze_layers: bool = False
			
--- a/src/llama_recipes/utils/config_utils.py
+++ b/src/llama_recipes/utils/config_utils.py
@@ -45,7 +45,17 @@ def generate_peft_config(train_config, kwargs):
 
				     peft_configs = (LoraConfig, AdaptionPromptConfig, PrefixTuningConfig)
			
 
				     names = tuple(c.__name__.rstrip("_config") for c in configs)
			
 
				 
			
 
				-    assert train_config.peft_method in names, f"Peft config not found: {train_config.peft_method}"
			
 
				+    assert (
			
 
				+        train_config.peft_method in names
			
 
				+    ), f"Peft config not found: {train_config.peft_method}"
			
 
				+
			
 
				+    assert (
			
 
				+        train_config.peft_method != "prefix"
			
 
				+    ), "PrefixTuning is currently not supported (see https://github.com/meta-llama/llama-recipes/issues/359#issuecomment-2089350811)"
			
 
				+    if train_config.enable_fsdp:
			
 
				+        assert (
			
 
				+            train_config.peft_method != "llama_adapter"
			
 
				+        ), "Llama_adapter is currently not supported in combination with FSDP (see https://github.com/meta-llama/llama-recipes/issues/359#issuecomment-2089274425)"
			
 
				 
			
 
				     config = configs[names.index(train_config.peft_method)]()
			
 
				 
			
--- a/src/llama_recipes/utils/fsdp_utils.py
+++ b/src/llama_recipes/utils/fsdp_utils.py
@@ -8,8 +8,6 @@ def fsdp_auto_wrap_policy(model, transformer_layer_name):
 
				 
			
 
				     from torch.distributed.fsdp.wrap import _or_policy, lambda_auto_wrap_policy, transformer_auto_wrap_policy
			
 
				 
			
 
				-    from peft.tuners import PrefixEncoder, PromptEmbedding, PromptEncoder
			
 
				-
			
 
				     def lambda_policy_fn(module):
			
 
				         if (
			
 
				             len(list(module.named_children())) == 0
			
@@ -23,13 +21,7 @@ def fsdp_auto_wrap_policy(model, transformer_layer_name):
 
				     transformer_wrap_policy = functools.partial(
			
 
				         transformer_auto_wrap_policy,
			
 
				         transformer_layer_cls=(
			
 
				-            PrefixEncoder,
			
 
				-            PromptEncoder,
			
 
				-            PromptEmbedding,
			
 
				             transformer_layer_name,
			
 
				-            # FullyShardedDataParallelPlugin.get_module_class_from_name(
			
 
				-            #     model, transformer_layer_name
			
 
				-            # ),
			
 
				         ),
			
 
				     )