2 năm trước cách đây · d176550ba0
--- a/configs/training.py
+++ b/configs/training.py
@@ -6,8 +6,8 @@ from typing import ClassVar
 
				 
			
 
				 @dataclass
			
 
				 class train_config:
			
 
				-    model_name: str="PATH/to/LLAMA/7B"
			
 
				-    enable_fsdp: bool=False
			
 
				+    model_name: str=" meta-llama/Llama-2-7b-chat-hf"
			
 
				+    enable_fsdp: bool=True
			
 
				     low_cpu_fsdp: bool=False
			
 
				     run_validation: bool=True
			
 
				     batch_size_training: int=4
			
@@ -23,8 +23,8 @@ class train_config:
 
				     val_batch_size: int=1
			
 
				     dataset = "samsum_dataset"
			
 
				     peft_method: str = "lora" # None , llama_adapter, prefix
			
 
				-    use_peft: bool=False
			
 
				-    output_dir: str = "PATH/to/save/PEFT/model"
			
 
				+    use_peft: bool=True
			
 
				+    output_dir: str = "PEFT-7b-model"
			
 
				     freeze_layers: bool = False
			
 
				     num_freeze_layers: int = 1
			
 
				     quantization: bool = False
			
--- a/start_job.py
+++ b/start_job.py
@@ -0,0 +1,27 @@
 
				+import datetime
			
 
				+from sagemaker.pytorch import PyTorch
			
 
				+import sagemaker
			
 
				+import os
			
 
				+sagemaker_session = sagemaker.Session()
			
 
				+role = sagemaker.get_execution_role()
			
 
				+try:
			
 
				+    role = sagemaker.get_execution_role()
			
 
				+except ValueError:
			
 
				+    iam = boto3.client('iam')
			
 
				+    role = iam.get_role(RoleName='...')['Role']['Arn']
			
 
				+print(role)
			
 
				+
			
 
				+volume_size = 500
			
 
				+pytorch_estimator = PyTorch(
			
 
				+    entry_point="llama_finetuning.py", # the name of the script
			
 
				+    instance_type="ml.g5.12xlarge", 
			
 
				+    instance_count=2, # this determines the number of p4d instances
			
 
				+    source_dir=os.getcwd(),
			
 
				+    framework_version="1.11.0",
			
 
				+    py_version="py38",
			
 
				+    volume_size=volume_size,
			
 
				+    # dependencies=[''],
			
 
				+    region='us-west-2',
			
 
				+)
			
 
				+pytorch_estimator.fit(
			
 
				+    job_name='FSDP' + '-' + datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ"))