9 ماه پیش · 079f05c090
--- a/getting-started/finetuning/vision/11B_full_w2.yaml
+++ b/getting-started/finetuning/vision/11B_full_w2.yaml
@@ -51,19 +51,19 @@ collate_fn: torchtune.data.padded_collate_tiled_images_and_mask
 
																 epochs: 5
															
 
																 max_steps_per_epoch: null
															
 
																-batch_size: 4
															
 
																-gradient_accumulation_steps: 8 # Use to increase effective batch size
															
 
																+batch_size: 1
															
 
																+gradient_accumulation_steps: 1 # Use to increase effective batch size
															
 
																 # explicit optimizer / scheduler / loss
															
 
																 optimizer:
															
 
																   _component_: bitsandbytes.optim.PagedAdamW8bit
															
 
																   lr: 2e-5
															
 
																-optimizer_in_bwd: False  # True saves memory. Requires gradient_accumulation_steps=1
															
 
																+optimizer_in_bwd: True  # True saves memory. Requires gradient_accumulation_steps=1
															
 
																 loss:
															
 
																   _component_: torchtune.modules.loss.LinearCrossEntropyLoss
															
 
																-clip_grad_norm: 1.0
															
 
																-compile: false
															
 
																+# clip_grad_norm: 1.0
															
 
																+compile: true
															
 
																 # Device & memory
															
 
																 device: cuda
															
--- a/getting-started/finetuning/vision/11B_lora_w2.yaml
+++ b/getting-started/finetuning/vision/11B_lora_w2.yaml
@@ -1,5 +1,5 @@
 
																 # Top-level output directory
															
 
																-output_dir: ./outputs/Llama-3.2-11B-Instruct-w2-lora-80
															
 
																+output_dir: ./outputs/Llama-3.2-11B-Instruct-w2-lora
															
 
																 # Model + LoRA settings
															
 
																 model:
															
@@ -46,8 +46,7 @@ dataset:
 
																   _component_: torchtune.datasets.multimodal.vqa_dataset
															
 
																   source: arrow
															
 
																   data_files:
															
 
																-    # train: "w2_with_input/train/data-00000-of-00001.arrow"
															
 
																-    train: "fake_w2_us_tax_form_dataset_train80_test20/train/data-00000-of-00001.arrow"
															
 
																+    train: "fake_w2_us_tax_form_dataset_train30_test70/train/data-00000-of-00001.arrow"
															
 
																   split: train
															
 
																   column_map:
															
 
																     input: input
															
@@ -62,16 +61,17 @@ collate_fn: torchtune.data.padded_collate_tiled_images_and_mask
 
																 # Training loop & hyperparams
															
 
																 # example’s train-control
															
 
																-epochs: 10
															
 
																+epochs: 5
															
 
																 max_steps_per_epoch: null
															
 
																-batch_size: 4
															
 
																-gradient_accumulation_steps: 8 # Use to increase effective batch size
															
 
																+batch_size: 1
															
 
																+gradient_accumulation_steps: 1 # Use to increase effective batch size
															
 
																 # explicit optimizer / scheduler / loss
															
 
																 optimizer:
															
 
																   _component_: torch.optim.AdamW
															
 
																   fused: true
															
 
																   weight_decay: 0.01
															
 
																   lr: 1e-4
															
 
																+optimizer_in_bwd: true
															
 
																 lr_scheduler:
															
 
																   _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup