Browse Source

Merge remote-tracking branch 'origin' into 3p-integrations-crusoe

Ethan 6 months ago
parent
commit
f0850a3ee5

+ 1 - 1
recipes/quickstart/finetuning/README.md

@@ -8,7 +8,7 @@ This folder contains instructions to fine-tune Meta Llama 3 on a
 
 using the canonical [finetuning script](../../../src/llama_recipes/finetuning.py) in the llama-recipes package.
 
-If you are new to fine-tuning techniques, check out an overview: [](./LLM_finetuning_overview.md)
+If you are new to fine-tuning techniques, check out [an overview](./LLM_finetuning_overview.md).
 
 > [!TIP]
 > If you want to try finetuning Meta Llama 3 in a Jupyter notebook you can find a quickstart notebook [here](./quickstart_peft_finetuning.ipynb)

+ 1 - 1
recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb

@@ -8,7 +8,7 @@
     "Copyright (c) Meta Platforms, Inc. and affiliates.\n",
     "This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n",
     "\n",
-    "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/finetuning/quickstart_peft_finetuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+    "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
    ]
   },
   {

+ 1 - 1
src/llama_recipes/finetuning.py

@@ -167,7 +167,7 @@ def main(**kwargs):
         # Load the pre-trained peft model checkpoint and setup its configuration
         if train_config.from_peft_checkpoint:
             model = PeftModel.from_pretrained(model, train_config.from_peft_checkpoint, is_trainable=True)
-            peft_config = model.peft_config()
+            peft_config = model.peft_config
         # Generate the peft config and start fine-tuning from original model
         else:
             peft_config = generate_peft_config(train_config, kwargs)

+ 1 - 1
src/llama_recipes/utils/train_utils.py

@@ -151,11 +151,11 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
                                 batch[key] = batch[key].to('cuda:0')
                     with autocast():
                         loss = model(**batch).loss
+                    total_loss += loss.detach().float()
                     loss = loss / gradient_accumulation_steps
                     if train_config.save_metrics:
                         train_step_loss.append(loss.detach().float().item())
                         train_step_perplexity.append(float(torch.exp(loss.detach().float())))
-                    total_loss += loss.detach().float()
                     if train_config.use_fp16:
                         # if fp16 is enabled, use gradient scaler to handle gradient update
                         scaler.scale(loss).backward()