|
@@ -27,8 +27,8 @@ It lets us specify the training settings for everything from `model_name` to `da
|
|
```python
|
|
```python
|
|
model_name: str="PATH/to/Model"
|
|
model_name: str="PATH/to/Model"
|
|
tokenizer_name: str=None
|
|
tokenizer_name: str=None
|
|
- enable_fsdp: bool=False
|
|
|
|
- low_cpu_fsdp: bool=False
|
|
|
|
|
|
+ enable_fsdp: bool=False # shards model parameters, optimizer states and gradients across DDP ranks
|
|
|
|
+ low_cpu_fsdp: bool=False # saves cpu memory by loading pretrained model on rank0 only
|
|
run_validation: bool=True
|
|
run_validation: bool=True
|
|
batch_size_training: int=4
|
|
batch_size_training: int=4
|
|
batching_strategy: str="packing" #alternative: padding
|
|
batching_strategy: str="packing" #alternative: padding
|
|
@@ -42,14 +42,14 @@ It lets us specify the training settings for everything from `model_name` to `da
|
|
num_workers_dataloader: int=1
|
|
num_workers_dataloader: int=1
|
|
lr: float=1e-4
|
|
lr: float=1e-4
|
|
weight_decay: float=0.0
|
|
weight_decay: float=0.0
|
|
- gamma: float= 0.85
|
|
|
|
|
|
+ gamma: float= 0.85 # multiplicatively decay the learning rate by gamma after each epoch
|
|
seed: int=42
|
|
seed: int=42
|
|
use_fp16: bool=False
|
|
use_fp16: bool=False
|
|
mixed_precision: bool=True
|
|
mixed_precision: bool=True
|
|
val_batch_size: int=1
|
|
val_batch_size: int=1
|
|
dataset = "samsum_dataset"
|
|
dataset = "samsum_dataset"
|
|
peft_method: str = "lora" # None,llama_adapter, prefix
|
|
peft_method: str = "lora" # None,llama_adapter, prefix
|
|
- use_peft: bool=False
|
|
|
|
|
|
+ use_peft: bool=False # use parameter efficient fine tuning
|
|
output_dir: str = "PATH/to/save/PEFT/model"
|
|
output_dir: str = "PATH/to/save/PEFT/model"
|
|
freeze_layers: bool = False
|
|
freeze_layers: bool = False
|
|
num_freeze_layers: int = 1
|
|
num_freeze_layers: int = 1
|