2 лет назад · 7a5ca61136
--- a/src/llama_recipes/configs/datasets.py
+++ b/src/llama_recipes/configs/datasets.py
@@ -31,4 +31,5 @@ class custom_dataset:
 
																     dataset: str = "custom_dataset"
															
 
																     file: str = "examples/custom_dataset.py"
															
 
																     train_split: str = "train"
															
 
																-    test_split: str = "validation"
															
 
																+    test_split: str = "validation"
															
 
																+    data_path: str = "custom_dataset.json"
															
--- a/src/llama_recipes/finetuning.py
+++ b/src/llama_recipes/finetuning.py
@@ -184,13 +184,14 @@ def main(**kwargs):
 
																     if not train_config.enable_fsdp or rank == 0:
															
 
																         print(f"--> Training Set Length = {len(dataset_train)}")
															
 
																-    dataset_val = get_preprocessed_dataset(
															
 
																-        tokenizer,
															
 
																-        dataset_config,
															
 
																-        split="test",
															
 
																-    )
															
 
																-    if not train_config.enable_fsdp or rank == 0:
															
 
																-            print(f"--> Validation Set Length = {len(dataset_val)}")
															
 
																+    if train_config.run_validation:
															
 
																+        dataset_val = get_preprocessed_dataset(
															
 
																+            tokenizer,
															
 
																+            dataset_config,
															
 
																+            split="test",
															
 
																+        )
															
 
																+        if not train_config.enable_fsdp or rank == 0:
															
 
																+                print(f"--> Validation Set Length = {len(dataset_val)}")
															
 
																     if train_config.batching_strategy == "packing":
															
 
																         dataset_train = ConcatDataset(dataset_train, chunk_size=train_config.context_length)