2 lat temu · 091d58df17
--- a/recipes/finetuning/README.md
+++ b/recipes/finetuning/README.md
@@ -70,7 +70,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
																 * [Datasets config file](../../src/llama_recipes/configs/datasets.py) provides the available options for datasets.
															
 
																-* [peft config file](../../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified.
															
 
																+* [peft config file](../../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. We currently support LoRA and LLaMA-Adapter. Please note that LoRA is the only technique which is supported in combination with FSDP.
															
 
																 * [FSDP config file](../../src/llama_recipes/configs/fsdp.py) provides FSDP settings such as:
															
--- a/src/llama_recipes/configs/peft.py
+++ b/src/llama_recipes/configs/peft.py
@@ -20,7 +20,8 @@ class llama_adapter_config:
 
																      adapter_layers: int= 30
															
 
																      task_type: str= "CAUSAL_LM"
															
 
																+#CAUTION prefix tuning is currently not supported
															
 
																 @dataclass
															
 
																 class prefix_config:
															
 
																      num_virtual_tokens: int=30
															
 
																-     task_type: str= "CAUSAL_LM"    
															
 
																+     task_type: str= "CAUSAL_LM"
															
--- a/src/llama_recipes/configs/training.py
+++ b/src/llama_recipes/configs/training.py
@@ -29,7 +29,7 @@ class train_config:
 
																     mixed_precision: bool=True
															
 
																     val_batch_size: int=1
															
 
																     dataset = "samsum_dataset"
															
 
																-    peft_method: str = "lora" # None,llama_adapter, prefix
															
 
																+    peft_method: str = "lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
															
 
																     use_peft: bool=False
															
 
																     output_dir: str = "PATH/to/save/PEFT/model"
															
 
																     freeze_layers: bool = False
															
--- a/src/llama_recipes/utils/config_utils.py
+++ b/src/llama_recipes/utils/config_utils.py
@@ -45,7 +45,17 @@ def generate_peft_config(train_config, kwargs):
 
																     peft_configs = (LoraConfig, AdaptionPromptConfig, PrefixTuningConfig)
															
 
																     names = tuple(c.__name__.rstrip("_config") for c in configs)
															
 
																-    assert train_config.peft_method in names, f"Peft config not found: {train_config.peft_method}"
															
 
																+    assert (
															
 
																+        train_config.peft_method in names
															
 
																+    ), f"Peft config not found: {train_config.peft_method}"
															
 
																+
															
 
																+    assert (
															
 
																+        train_config.peft_method != "prefix"
															
 
																+    ), "PrefixTuning is currently not supported (see https://github.com/meta-llama/llama-recipes/issues/359#issuecomment-2089350811)"
															
 
																+    if train_config.enable_fsdp:
															
 
																+        assert (
															
 
																+            train_config.peft_method != "llama_adapter"
															
 
																+        ), "Llama_adapter is currently not supported in combination with FSDP (see https://github.com/meta-llama/llama-recipes/issues/359#issuecomment-2089274425)"
															
 
																     config = configs[names.index(train_config.peft_method)]()
															
--- a/src/llama_recipes/utils/fsdp_utils.py
+++ b/src/llama_recipes/utils/fsdp_utils.py
@@ -8,8 +8,6 @@ def fsdp_auto_wrap_policy(model, transformer_layer_name):
 
																     from torch.distributed.fsdp.wrap import _or_policy, lambda_auto_wrap_policy, transformer_auto_wrap_policy
															
 
																-    from peft.tuners import PrefixEncoder, PromptEmbedding, PromptEncoder
															
 
																-
															
 
																     def lambda_policy_fn(module):
															
 
																         if (
															
 
																             len(list(module.named_children())) == 0
															
@@ -23,13 +21,7 @@ def fsdp_auto_wrap_policy(model, transformer_layer_name):
 
																     transformer_wrap_policy = functools.partial(
															
 
																         transformer_auto_wrap_policy,
															
 
																         transformer_layer_cls=(
															
 
																-            PrefixEncoder,
															
 
																-            PromptEncoder,
															
 
																-            PromptEmbedding,
															
 
																             transformer_layer_name,
															
 
																-            # FullyShardedDataParallelPlugin.get_module_class_from_name(
															
 
																-            #     model, transformer_layer_name
															
 
																-            # ),
															
 
																         ),
															
 
																     )