2 سال پیش · 41a46d811d
--- a/src/llama_recipes/datasets/alpaca_dataset.py
+++ b/src/llama_recipes/datasets/alpaca_dataset.py
@@ -26,10 +26,12 @@ PROMPT_DICT = {
 
				 class InstructionDataset(Dataset):
			
 
				     def __init__(self, dataset_config, tokenizer, partition="train"):
			
 
				         self.ann = json.load(open(dataset_config.data_path))
			
 
				+        # Use 5% of the dataset for evaluation
			
 
				+        eval_length = int(len(self.ann)/20)
			
 
				         if partition == "train":
			
 
				-            self.ann = self.ann[200:]
			
 
				+            self.ann = self.ann[eval_length:]
			
 
				         else:
			
 
				-            self.ann = self.ann[:200]
			
 
				+            self.ann = self.ann[:eval_length]
			
 
				 
			
 
				         self.tokenizer = tokenizer
			
 
				 
			
--- a/src/llama_recipes/finetuning.py
+++ b/src/llama_recipes/finetuning.py
@@ -250,6 +250,10 @@ def main(**kwargs):
 
				             pin_memory=True,
			
 
				             **val_dl_kwargs,
			
 
				         )
			
 
				+        if len(eval_dataloader) == 0:
			
 
				+            raise ValueError("The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set.")
			
 
				+        else:
			
 
				+            print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
			
 
				 
			
 
				     # Initialize the optimizer and learning rate scheduler
			
 
				     if fsdp_config.pure_bf16 and fsdp_config.optimizer == "anyprecision":