| 
					
				 | 
			
			
				@@ -11,6 +11,24 @@ import nltk 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import yaml 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from datasets import Dataset, load_dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+LLAMA_3_1_INSTRUCT_EVALS=[ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.1-8B-Instruct-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.1-70B-Instruct-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.1-405B-Instruct-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+LLAMA_3_1_PRETRAIN_EVALS=[ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.1-8B-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.1-70B-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.1-405B-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+LLAMA_3_2_INSTRUCT_EVALS=[ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.2-1B-Instruct-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.2-3B-Instruct-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+LLAMA_3_2_PRETRAIN_EVALS=[ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.2-1B-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "meta-llama/Llama-3.2-3B-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # get the ifeval  from the evals dataset and join it with the original ifeval datasets 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def get_ifeval_data(model_name, output_dir): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -56,8 +74,8 @@ def get_ifeval_data(model_name, output_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # get the math_hard data from the evals dataset and join it with the original math_hard dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def get_math_data(model_name, output_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(f"preparing the math data using {model_name}'s evals dataset") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def get_math_hard_data(model_name, output_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print(f"preparing the math hard data using {model_name}'s evals dataset") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if model_name not in [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         "Llama-3.1-8B-Instruct", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         "Llama-3.1-70B-Instruct", 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -74,6 +92,30 @@ def get_math_data(model_name, output_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         split="latest", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     math_data = load_dataset(original_dataset_name, split="test") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    joined = join_meta_and_original_math_data(meta_data, math_data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    joined.to_parquet(output_dir + "/joined_math_hard.parquet") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def get_math_data(model_name, output_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print(f"preparing the math data using {model_name}'s evals dataset") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if model_name not in [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Llama-3.2-1B-Instruct", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Llama-3.2-3B-Instruct", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise ValueError( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "Only Llama-3.2-1B-Instruct and Llama-3.2-3B-Instruct models are supported for MATH" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    original_dataset_name = "lighteval/MATH" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    meta_dataset_name = f"meta-llama/{model_name}-evals" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    meta_data = load_dataset( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        meta_dataset_name, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        name=f"{model_name}-evals__math__details", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        split="latest", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    math_data = load_dataset(original_dataset_name, split="test") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    joined = join_meta_and_original_math_data(meta_data, math_data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    joined.to_parquet(output_dir + "/joined_math.parquet") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def join_meta_and_original_math_data(meta_data, math_data): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     meta_df = meta_data.to_pandas() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     math_df = math_data.to_pandas() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     math_df = math_df.rename(columns={"problem": "input_question"}) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -94,9 +136,7 @@ def get_math_data(model_name, output_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     joined = joined.rename_column( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         "output_prediction_text", "previous_output_prediction_text" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    joined.to_parquet(output_dir + "/joined_math.parquet") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return joined 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # get the question from the ifeval dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def get_question(example): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -134,18 +174,33 @@ def change_yaml(args, base_name): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         "WORK_DIR", str(yaml_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # 3.2 evals dataset has a differents set of tasks from 3.1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # Update tasks in meta_pretrain.yaml 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    with open(args.template_dir + "/meta_pretrain.yaml", "r") as yaml_file: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        meta_pretrain = yaml.safe_load(yaml_file) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if args.evals_dataset in LLAMA_3_1_PRETRAIN_EVALS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        meta_pretrain["task"] = ["meta_bbh", "meta_mmlu_pro_pretrain"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif args.evals_dataset in LLAMA_3_2_PRETRAIN_EVALS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        meta_pretrain["task"] = ["meta_mmlu"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    with open(args.work_dir + "/meta_pretrain.yaml", "w") as yaml_file: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        yaml.dump(meta_pretrain, yaml_file) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # Update tasks in meta_instruct.yaml 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    with open(args.template_dir + "/meta_instruct.yaml", "r") as yaml_file: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        meta_instruct = yaml.safe_load(yaml_file) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if args.evals_dataset in LLAMA_3_1_INSTRUCT_EVALS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        meta_instruct["task"] = ["meta_ifeval", "meta_math_hard", "meta_gpqa_cot", "meta_mmlu_pro_instruct"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif args.evals_dataset in LLAMA_3_2_INSTRUCT_EVALS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        meta_instruct["task"] = ["meta_mmlu", "meta_math", "meta_gpqa"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    with open(args.work_dir + "/meta_instruct.yaml", "w") as yaml_file: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        yaml.dump(meta_instruct, yaml_file) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # copy the files and change the yaml file to use the correct model name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def copy_and_prepare(args): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # nltk punkt_tab package is needed 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     nltk.download('punkt_tab') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if not os.path.exists(args.work_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # Copy the all files, including yaml files and python files, from template folder to the work folder 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        copy_dir(args.template_dir, args.work_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print("work_dir already exists, no need to copy files") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    copy_dir(args.template_dir, args.work_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # Use the template yaml to get the correct model name in work_dir yaml 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     base_name = ( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         args.evals_dataset.split("/")[-1].replace("-evals", "").replace("-Instruct", "") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -169,21 +224,22 @@ def prepare_datasets(args): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # model_name are derived from the evals_dataset name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     task_list = args.tasks.split(",") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     model_name = args.evals_dataset.split("/")[-1].replace("-evals", "") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if "meta_instruct" in task_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if "meta_instruct" in task_list and args.evals_dataset in LLAMA_3_1_INSTRUCT_EVALS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         get_ifeval_data(model_name, args.work_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        get_math_hard_data(model_name, args.work_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif "meta_instruct" in task_list and args.evals_dataset in LLAMA_3_2_INSTRUCT_EVALS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         get_math_data(model_name, args.work_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if "meta_ifeval" in task_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             get_ifeval_data(model_name, args.work_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if "meta_math_hard" in task_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            get_math_data(model_name, args.work_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            get_math_hard_data(model_name, args.work_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # copy the files from src to dst 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def copy_dir(src, dst): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        shutil.copytree(src, dst) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        shutil.copytree(src, dst, dirs_exist_ok=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except OSError as exc:  # python >2.5 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if exc.errno in (errno.ENOTDIR, errno.EINVAL): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             shutil.copy(src, dst) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -207,16 +263,14 @@ if __name__ == "__main__": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         args.__setattr__(k, v) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if not os.path.exists(args.template_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         raise ValueError("The template_dir does not exist, please check the path") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if args.evals_dataset not in [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "meta-llama/Llama-3.1-8B-Instruct-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "meta-llama/Llama-3.1-70B-Instruct-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "meta-llama/Llama-3.1-405B-Instruct-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "meta-llama/Llama-3.1-8B-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "meta-llama/Llama-3.1-70B-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "meta-llama/Llama-3.1-405B-evals", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    ]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if args.evals_dataset not in ( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        LLAMA_3_1_INSTRUCT_EVALS + 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        LLAMA_3_1_PRETRAIN_EVALS + 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        LLAMA_3_2_INSTRUCT_EVALS + 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        LLAMA_3_2_PRETRAIN_EVALS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         raise ValueError( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 Evals collection" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 or 3.2 Evals collection." 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     args.model_args = f"pretrained={args.model_name},tensor_parallel_size={args.tensor_parallel_size},dtype=auto,gpu_memory_utilization={args.gpu_memory_utilization},data_parallel_size={args.data_parallel_size},max_model_len={args.max_model_len},add_bos_token=True,seed=42" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # Copy the all files from template folder to the work folder 
			 |