1 سال پیش · 48ba6805af
--- a/src/llama_recipes/configs/datasets.py
+++ b/src/llama_recipes/configs/datasets.py
@@ -9,6 +9,7 @@ class samsum_dataset:
 
				     dataset: str =  "samsum_dataset"
			
 
				     train_split: str = "train"
			
 
				     test_split: str = "validation"
			
 
				+    trust_remote_code: bool = False
			
 
				 
			
 
				 
			
 
				 @dataclass
			
@@ -37,4 +38,4 @@ class custom_dataset:
 
				 class llamaguard_toxicchat_dataset:
			
 
				     dataset: str = "llamaguard_toxicchat_dataset"
			
 
				     train_split: str = "train"
			
 
				-    test_split: str = "test"
			
 
				+    test_split: str = "test"
			
--- a/src/llama_recipes/datasets/samsum_dataset.py
+++ b/src/llama_recipes/datasets/samsum_dataset.py
@@ -8,7 +8,9 @@ import datasets
 
				 
			
 
				 
			
 
				 def get_preprocessed_samsum(dataset_config, tokenizer, split):
			
 
				-    dataset = datasets.load_dataset("samsum", split=split)
			
 
				+    if not hasattr(dataset_config, "trust_remote_code") or not dataset_config.trust_remote_code:
			
 
				+        raise ValueError("The repository for samsum contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/samsum. To activate `trust_remote_code` option use this config: --samsum_dataset.trust_remote_code=True")
			
 
				+    dataset = datasets.load_dataset("samsum", split=split, trust_remote_code=dataset_config.trust_remote_code)
			
 
				 
			
 
				     prompt = (
			
 
				         f"Summarize this dialog:\n{{dialog}}\n---\nSummary:\n"