Преглед изворни кода

Fix fine-tuning training loss accumulation (#725)

celestinoalan пре 6 месеци
родитељ
комит
d6ae2031c3
1 измењених фајлова са 1 додато и 1 уклоњено
  1. 1 1
      src/llama_recipes/utils/train_utils.py

+ 1 - 1
src/llama_recipes/utils/train_utils.py

@@ -151,11 +151,11 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
                                 batch[key] = batch[key].to('cuda:0')
                                 batch[key] = batch[key].to('cuda:0')
                     with autocast():
                     with autocast():
                         loss = model(**batch).loss
                         loss = model(**batch).loss
+                    total_loss += loss.detach().float()
                     loss = loss / gradient_accumulation_steps
                     loss = loss / gradient_accumulation_steps
                     if train_config.save_metrics:
                     if train_config.save_metrics:
                         train_step_loss.append(loss.detach().float().item())
                         train_step_loss.append(loss.detach().float().item())
                         train_step_perplexity.append(float(torch.exp(loss.detach().float())))
                         train_step_perplexity.append(float(torch.exp(loss.detach().float())))
-                    total_loss += loss.detach().float()
                     if train_config.use_fp16:
                     if train_config.use_fp16:
                         # if fp16 is enabled, use gradient scaler to handle gradient update
                         # if fp16 is enabled, use gradient scaler to handle gradient update
                         scaler.scale(loss).backward()
                         scaler.scale(loss).backward()