prepare_meta_eval.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
  3. import argparse
  4. import errno
  5. import shutil
  6. import glob
  7. import os
  8. from pathlib import Path
  9. import nltk
  10. import yaml
  11. from datasets import Dataset, load_dataset
  12. # get the ifeval from the evals dataset and join it with the original ifeval datasets
  13. def get_ifeval_data(model_name, output_dir):
  14. print(f"preparing the ifeval data using {model_name}'s evals dataset")
  15. if model_name not in [
  16. "Llama-3.1-8B-Instruct",
  17. "Llama-3.1-70B-Instruct",
  18. "Llama-3.1-405B-Instruct",
  19. ]:
  20. raise ValueError(
  21. "Only Llama-3.1-8B-Instruct, Llama-3.1-70B-Instruct, Llama-3.1-405B-Instruct models are supported for IFEval"
  22. )
  23. original_dataset_name = "wis-k/instruction-following-eval"
  24. meta_dataset_name = f"meta-llama/{model_name}-evals"
  25. meta_data = load_dataset(
  26. meta_dataset_name,
  27. name=f"{model_name}-evals__ifeval__strict__details",
  28. split="latest",
  29. )
  30. ifeval_data = load_dataset(original_dataset_name, split="train")
  31. meta_data = meta_data.map(get_question)
  32. meta_df = meta_data.to_pandas()
  33. ifeval_df = ifeval_data.to_pandas()
  34. ifeval_df = ifeval_df.rename(columns={"prompt": "input_question"})
  35. # join the two datasets on the input_question column
  36. joined = meta_df.join(ifeval_df.set_index("input_question"), on="input_question")
  37. joined = joined.rename(columns={"input_final_prompts": "prompt"})
  38. joined = joined.rename(columns={"is_correct": "previous_is_correct"})
  39. joined = Dataset.from_pandas(joined)
  40. joined = joined.select_columns(
  41. [
  42. "input_question",
  43. "prompt",
  44. "previous_is_correct",
  45. "instruction_id_list",
  46. "kwargs",
  47. "output_prediction_text",
  48. "key",
  49. ]
  50. )
  51. joined.rename_column("output_prediction_text", "previous_output_prediction_text")
  52. joined.to_parquet(output_dir + "/joined_ifeval.parquet")
  53. # get the math_hard data from the evals dataset and join it with the original math_hard dataset
  54. def get_math_data(model_name, output_dir):
  55. print(f"preparing the math data using {model_name}'s evals dataset")
  56. if model_name not in [
  57. "Llama-3.1-8B-Instruct",
  58. "Llama-3.1-70B-Instruct",
  59. "Llama-3.1-405B-Instruct",
  60. ]:
  61. raise ValueError(
  62. "Only Llama-3.1-8B-Instruct, Llama-3.1-70B-Instruct, Llama-3.1-405B-Instruct models are supported for MATH_hard"
  63. )
  64. original_dataset_name = "lighteval/MATH-Hard"
  65. meta_dataset_name = f"meta-llama/{model_name}-evals"
  66. meta_data = load_dataset(
  67. meta_dataset_name,
  68. name=f"{model_name}-evals__math_hard__details",
  69. split="latest",
  70. )
  71. math_data = load_dataset(original_dataset_name, split="test")
  72. meta_df = meta_data.to_pandas()
  73. math_df = math_data.to_pandas()
  74. math_df = math_df.rename(columns={"problem": "input_question"})
  75. # join the two datasets on the input_question column
  76. joined = meta_df.join(math_df.set_index("input_question"), on="input_question")
  77. joined = Dataset.from_pandas(joined)
  78. joined = joined.select_columns(
  79. [
  80. "input_question",
  81. "input_correct_responses",
  82. "input_final_prompts",
  83. "is_correct",
  84. "solution",
  85. "output_prediction_text",
  86. ]
  87. )
  88. joined = joined.rename_column("is_correct", "previous_is_correct")
  89. joined = joined.rename_column(
  90. "output_prediction_text", "previous_output_prediction_text"
  91. )
  92. joined.to_parquet(output_dir + "/joined_math.parquet")
  93. # get the question from the ifeval dataset
  94. def get_question(example):
  95. try:
  96. example["input_question"] = (
  97. eval(
  98. example["input_question"]
  99. .replace("null", "None")
  100. .replace("true", "True")
  101. .replace("false", "False")
  102. )["dialog"][0]["body"]
  103. .replace("Is it True that the first song", "Is it true that the first song")
  104. .replace("Is the following True", "Is the following true")
  105. )
  106. example["input_final_prompts"] = example["input_final_prompts"][0]
  107. return example
  108. except:
  109. print(example["input_question"])
  110. return
  111. # change the yaml file to use the correct model name
  112. def change_yaml(args, base_name):
  113. for yaml_file in glob.glob(args.template_dir + "**/*/*.yaml", recursive=True):
  114. with open(yaml_file, "r") as sources:
  115. lines = sources.readlines()
  116. output_path = yaml_file.replace(args.template_dir, args.work_dir)
  117. print(f"changing {yaml_file} to output_path: {output_path}")
  118. path = Path(output_path)
  119. yaml_dir = path.parent
  120. with open(output_path, "w") as output:
  121. for line in lines:
  122. output.write(
  123. line.replace("Llama-3.1-8B", base_name).replace(
  124. "WORK_DIR", str(yaml_dir)
  125. )
  126. )
  127. # 3.2 evals dataset has a differents set of evals from 3.1
  128. # so update the tasks in the meta_pretrain.yaml file (3.2 for meta_instruct.yaml not supported yet)
  129. with open(args.template_dir + "/meta_pretrain.yaml", "r") as yaml_file:
  130. meta_pretrain = yaml.safe_load(yaml_file)
  131. if args.evals_dataset in [
  132. "meta-llama/Llama-3.2-1B-evals",
  133. "meta-llama/Llama-3.2-3B-evals",
  134. ]:
  135. meta_pretrain["task"] = ["meta_mmlu"]
  136. elif args.evals_dataset in [
  137. "meta-llama/Llama-3.1-8B-evals",
  138. "meta-llama/Llama-3.1-70B-evals",
  139. "meta-llama/Llama-3.1-405B-evals",
  140. ]:
  141. meta_pretrain["task"] = ["meta_bbh", "meta_mmlu_pro_pretrain"]
  142. with open(args.work_dir + "/meta_pretrain.yaml", "w") as yaml_file:
  143. yaml.dump(meta_pretrain, yaml_file)
  144. # copy the files and change the yaml file to use the correct model name
  145. def copy_and_prepare(args):
  146. # nltk punkt_tab package is needed
  147. nltk.download('punkt_tab')
  148. if not os.path.exists(args.work_dir):
  149. # Copy the all files, including yaml files and python files, from template folder to the work folder
  150. copy_dir(args.template_dir, args.work_dir)
  151. else:
  152. print("work_dir already exists, no need to copy files")
  153. # Use the template yaml to get the correct model name in work_dir yaml
  154. base_name = (
  155. args.evals_dataset.split("/")[-1].replace("-evals", "").replace("-Instruct", "")
  156. )
  157. change_yaml(args, base_name)
  158. def parse_eval_args():
  159. parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
  160. parser.add_argument(
  161. "--config_path",
  162. type=str,
  163. default="./eval_config.yaml",
  164. help="the config yaml file that contains all the eval parameters",
  165. )
  166. return parser.parse_args()
  167. def prepare_datasets(args):
  168. # Prepare the dataset for the IFeval and MATH_Hard tasks as we need to join the original dataset with the evals dataset by the actual questions.
  169. # model_name are derived from the evals_dataset name
  170. task_list = args.tasks.split(",")
  171. model_name = args.evals_dataset.split("/")[-1].replace("-evals", "")
  172. if "meta_instruct" in task_list:
  173. get_ifeval_data(model_name, args.work_dir)
  174. get_math_data(model_name, args.work_dir)
  175. else:
  176. if "meta_ifeval" in task_list:
  177. get_ifeval_data(model_name, args.work_dir)
  178. if "meta_math_hard" in task_list:
  179. get_math_data(model_name, args.work_dir)
  180. # copy the files from src to dst
  181. def copy_dir(src, dst):
  182. try:
  183. shutil.copytree(src, dst)
  184. except OSError as exc: # python >2.5
  185. if exc.errno in (errno.ENOTDIR, errno.EINVAL):
  186. shutil.copy(src, dst)
  187. else:
  188. raise
  189. # load the config yaml file
  190. def load_config(config_path: str = "./config.yaml"):
  191. # Read the YAML configuration file
  192. with open(config_path, "r") as file:
  193. config = yaml.safe_load(file)
  194. return config
  195. if __name__ == "__main__":
  196. args = parse_eval_args()
  197. config = load_config(args.config_path)
  198. # Create VLLM model args
  199. for k, v in config.items():
  200. args.__setattr__(k, v)
  201. if not os.path.exists(args.template_dir):
  202. raise ValueError("The template_dir does not exist, please check the path")
  203. if args.evals_dataset not in [
  204. "meta-llama/Llama-3.1-8B-Instruct-evals",
  205. "meta-llama/Llama-3.1-70B-Instruct-evals",
  206. "meta-llama/Llama-3.1-405B-Instruct-evals",
  207. "meta-llama/Llama-3.1-8B-evals",
  208. "meta-llama/Llama-3.1-70B-evals",
  209. "meta-llama/Llama-3.1-405B-evals",
  210. "meta-llama/Llama-3.2-1B-evals",
  211. "meta-llama/Llama-3.2-3B-evals",
  212. ]:
  213. raise ValueError(
  214. "The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 or 3.2 Evals collection. Note that 3.2-Instruct evals are not yet supported."
  215. )
  216. args.model_args = f"pretrained={args.model_name},tensor_parallel_size={args.tensor_parallel_size},dtype=auto,gpu_memory_utilization={args.gpu_memory_utilization},data_parallel_size={args.data_parallel_size},max_model_len={args.max_model_len},add_bos_token=True,seed=42"
  217. # Copy the all files from template folder to the work folder
  218. copy_and_prepare(args)
  219. # Prepare the datasets for the IFeval and MATH_Hard tasks as we need to join the original dataset
  220. prepare_datasets(args)
  221. print(
  222. f"prepration for the {args.model_name} using {args.evals_dataset} is done, all saved the work_dir: {args.work_dir}"
  223. )
  224. command_str = f"lm_eval --model vllm --model_args {args.model_args} --tasks {args.tasks} --batch_size auto --output_path { args.output_path} --include_path {os.path.abspath(args.work_dir)} --seed 42 "
  225. if args.limit:
  226. command_str += f" --limit {args.limit}"
  227. if args.log_samples:
  228. command_str += " --log_samples "
  229. if args.show_config:
  230. command_str += " --show_config "
  231. print("please use the following command to run the meta reproduce evals:")
  232. print(command_str)