1 year ago · f0850a3ee5
--- a/recipes/quickstart/finetuning/README.md
+++ b/recipes/quickstart/finetuning/README.md
@@ -8,7 +8,7 @@ This folder contains instructions to fine-tune Meta Llama 3 on a
 
				 
			
 
				 using the canonical [finetuning script](../../../src/llama_recipes/finetuning.py) in the llama-recipes package.
			
 
				 
			
 
				-If you are new to fine-tuning techniques, check out an overview: [](./LLM_finetuning_overview.md)
			
 
				+If you are new to fine-tuning techniques, check out [an overview](./LLM_finetuning_overview.md).
			
 
				 
			
 
				 > [!TIP]
			
 
				 > If you want to try finetuning Meta Llama 3 in a Jupyter notebook you can find a quickstart notebook [here](./quickstart_peft_finetuning.ipynb)
			
--- a/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb
+++ b/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb
@@ -8,7 +8,7 @@
 
				     "Copyright (c) Meta Platforms, Inc. and affiliates.\n",
			
 
				     "This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n",
			
 
				     "\n",
			
 
				-    "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/finetuning/quickstart_peft_finetuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
			
 
				+    "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
			
 
				    ]
			
 
				   },
			
 
				   {
			
--- a/src/llama_recipes/finetuning.py
+++ b/src/llama_recipes/finetuning.py
@@ -167,7 +167,7 @@ def main(**kwargs):
 
				         # Load the pre-trained peft model checkpoint and setup its configuration
			
 
				         if train_config.from_peft_checkpoint:
			
 
				             model = PeftModel.from_pretrained(model, train_config.from_peft_checkpoint, is_trainable=True)
			
 
				-            peft_config = model.peft_config()
			
 
				+            peft_config = model.peft_config
			
 
				         # Generate the peft config and start fine-tuning from original model
			
 
				         else:
			
 
				             peft_config = generate_peft_config(train_config, kwargs)
			
--- a/src/llama_recipes/utils/train_utils.py
+++ b/src/llama_recipes/utils/train_utils.py
@@ -151,11 +151,11 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
 
				                                 batch[key] = batch[key].to('cuda:0')
			
 
				                     with autocast():
			
 
				                         loss = model(**batch).loss
			
 
				+                    total_loss += loss.detach().float()
			
 
				                     loss = loss / gradient_accumulation_steps
			
 
				                     if train_config.save_metrics:
			
 
				                         train_step_loss.append(loss.detach().float().item())
			
 
				                         train_step_perplexity.append(float(torch.exp(loss.detach().float())))
			
 
				-                    total_loss += loss.detach().float()
			
 
				                     if train_config.use_fp16:
			
 
				                         # if fp16 is enabled, use gradient scaler to handle gradient update
			
 
				                         scaler.scale(loss).backward()