prepare_meta_eval.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
  3. import argparse
  4. import errno
  5. import shutil
  6. import glob
  7. import os
  8. from pathlib import Path
  9. import nltk
  10. import yaml
  11. from datasets import Dataset, load_dataset
  12. # get the ifeval from the evals dataset and join it with the original ifeval datasets
  13. def get_ifeval_data(model_name, output_dir):
  14. print(f"preparing the ifeval data using {model_name}'s evals dataset")
  15. if model_name not in [
  16. "Llama-3.1-8B-Instruct",
  17. "Llama-3.1-70B-Instruct",
  18. "Llama-3.1-405B-Instruct",
  19. ]:
  20. raise ValueError(
  21. "Only Llama-3.1-8B-Instruct, Llama-3.1-70B-Instruct, Llama-3.1-405B-Instruct models are supported for IFEval"
  22. )
  23. original_dataset_name = "wis-k/instruction-following-eval"
  24. meta_dataset_name = f"meta-llama/{model_name}-evals"
  25. meta_data = load_dataset(
  26. meta_dataset_name,
  27. name=f"{model_name}-evals__ifeval__strict__details",
  28. split="latest",
  29. )
  30. ifeval_data = load_dataset(original_dataset_name, split="train")
  31. meta_data = meta_data.map(get_question)
  32. meta_df = meta_data.to_pandas()
  33. ifeval_df = ifeval_data.to_pandas()
  34. ifeval_df = ifeval_df.rename(columns={"prompt": "input_question"})
  35. # join the two datasets on the input_question column
  36. joined = meta_df.join(ifeval_df.set_index("input_question"), on="input_question")
  37. joined = joined.rename(columns={"input_final_prompts": "prompt"})
  38. joined = joined.rename(columns={"is_correct": "previous_is_correct"})
  39. joined = Dataset.from_pandas(joined)
  40. joined = joined.select_columns(
  41. [
  42. "input_question",
  43. "prompt",
  44. "previous_is_correct",
  45. "instruction_id_list",
  46. "kwargs",
  47. "output_prediction_text",
  48. "key",
  49. ]
  50. )
  51. joined.rename_column("output_prediction_text", "previous_output_prediction_text")
  52. joined.to_parquet(output_dir + "/joined_ifeval.parquet")
  53. # get the math_hard data from the evals dataset and join it with the original math_hard dataset
  54. def get_math_data(model_name, output_dir):
  55. print(f"preparing the math data using {model_name}'s evals dataset")
  56. if model_name not in [
  57. "Llama-3.1-8B-Instruct",
  58. "Llama-3.1-70B-Instruct",
  59. "Llama-3.1-405B-Instruct",
  60. ]:
  61. raise ValueError(
  62. "Only Llama-3.1-8B-Instruct, Llama-3.1-70B-Instruct, Llama-3.1-405B-Instruct models are supported for MATH_hard"
  63. )
  64. original_dataset_name = "lighteval/MATH-Hard"
  65. meta_dataset_name = f"meta-llama/{model_name}-evals"
  66. meta_data = load_dataset(
  67. meta_dataset_name,
  68. name=f"{model_name}-evals__math_hard__details",
  69. split="latest",
  70. )
  71. math_data = load_dataset(original_dataset_name, split="test")
  72. meta_df = meta_data.to_pandas()
  73. math_df = math_data.to_pandas()
  74. math_df = math_df.rename(columns={"problem": "input_question"})
  75. # join the two datasets on the input_question column
  76. joined = meta_df.join(math_df.set_index("input_question"), on="input_question")
  77. joined = Dataset.from_pandas(joined)
  78. joined = joined.select_columns(
  79. [
  80. "input_question",
  81. "input_correct_responses",
  82. "input_final_prompts",
  83. "is_correct",
  84. "solution",
  85. "output_prediction_text",
  86. ]
  87. )
  88. joined = joined.rename_column("is_correct", "previous_is_correct")
  89. joined = joined.rename_column(
  90. "output_prediction_text", "previous_output_prediction_text"
  91. )
  92. joined.to_parquet(output_dir + "/joined_math.parquet")
  93. # get the question from the ifeval dataset
  94. def get_question(example):
  95. try:
  96. example["input_question"] = (
  97. eval(
  98. example["input_question"]
  99. .replace("null", "None")
  100. .replace("true", "True")
  101. .replace("false", "False")
  102. )["dialog"][0]["body"]
  103. .replace("Is it True that the first song", "Is it true that the first song")
  104. .replace("Is the following True", "Is the following true")
  105. )
  106. example["input_final_prompts"] = example["input_final_prompts"][0]
  107. return example
  108. except:
  109. print(example["input_question"])
  110. return
  111. # change the yaml file to use the correct model name
  112. def change_yaml(args, base_name):
  113. for yaml_file in glob.glob(args.template_dir + "**/*/*.yaml", recursive=True):
  114. with open(yaml_file, "r") as sources:
  115. lines = sources.readlines()
  116. output_path = yaml_file.replace(args.template_dir, args.work_dir)
  117. print(f"changing {yaml_file} to output_path: {output_path}")
  118. path = Path(output_path)
  119. yaml_dir = path.parent
  120. with open(output_path, "w") as output:
  121. for line in lines:
  122. output.write(
  123. line.replace("Llama-3.1-8B", base_name).replace(
  124. "WORK_DIR", str(yaml_dir)
  125. )
  126. )
  127. # copy the files and change the yaml file to use the correct model name
  128. def copy_and_prepare(args):
  129. # nltk punkt_tab package is needed
  130. nltk.download('punkt_tab')
  131. if not os.path.exists(args.work_dir):
  132. # Copy the all files, including yaml files and python files, from template folder to the work folder
  133. copy_dir(args.template_dir, args.work_dir)
  134. else:
  135. print("work_dir already exists, no need to copy files")
  136. # Use the template yaml to get the correct model name in work_dir yaml
  137. base_name = (
  138. args.evals_dataset.split("/")[-1].replace("-evals", "").replace("-Instruct", "")
  139. )
  140. change_yaml(args, base_name)
  141. def parse_eval_args():
  142. parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
  143. parser.add_argument(
  144. "--config_path",
  145. type=str,
  146. default="./eval_config.yaml",
  147. help="the config yaml file that contains all the eval parameters",
  148. )
  149. return parser.parse_args()
  150. def prepare_datasets(args):
  151. # Prepare the dataset for the IFeval and MATH_Hard tasks as we need to join the original dataset with the evals dataset by the actual questions.
  152. # model_name are derived from the evals_dataset name
  153. task_list = args.tasks.split(",")
  154. model_name = args.evals_dataset.split("/")[-1].replace("-evals", "")
  155. if "meta_instruct" in task_list:
  156. get_ifeval_data(model_name, args.work_dir)
  157. get_math_data(model_name, args.work_dir)
  158. else:
  159. if "meta_ifeval" in task_list:
  160. get_ifeval_data(model_name, args.work_dir)
  161. if "meta_math_hard" in task_list:
  162. get_math_data(model_name, args.work_dir)
  163. # copy the files from src to dst
  164. def copy_dir(src, dst):
  165. try:
  166. shutil.copytree(src, dst)
  167. except OSError as exc: # python >2.5
  168. if exc.errno in (errno.ENOTDIR, errno.EINVAL):
  169. shutil.copy(src, dst)
  170. else:
  171. raise
  172. # load the config yaml file
  173. def load_config(config_path: str = "./config.yaml"):
  174. # Read the YAML configuration file
  175. with open(config_path, "r") as file:
  176. config = yaml.safe_load(file)
  177. return config
  178. if __name__ == "__main__":
  179. args = parse_eval_args()
  180. config = load_config(args.config_path)
  181. # Create VLLM model args
  182. for k, v in config.items():
  183. args.__setattr__(k, v)
  184. if not os.path.exists(args.template_dir):
  185. raise ValueError("The template_dir does not exist, please check the path")
  186. if args.evals_dataset not in [
  187. "meta-llama/Llama-3.1-8B-Instruct-evals",
  188. "meta-llama/Llama-3.1-70B-Instruct-evals",
  189. "meta-llama/Llama-3.1-405B-Instruct-evals",
  190. "meta-llama/Llama-3.1-8B-evals",
  191. "meta-llama/Llama-3.1-70B-evals",
  192. "meta-llama/Llama-3.1-405B-evals",
  193. ]:
  194. raise ValueError(
  195. "The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 Evals collection"
  196. )
  197. args.model_args = f"pretrained={args.model_name},tensor_parallel_size={args.tensor_parallel_size},dtype=auto,gpu_memory_utilization={args.gpu_memory_utilization},data_parallel_size={args.data_parallel_size},max_model_len={args.max_model_len},add_bos_token=True,seed=42"
  198. # Copy the all files from template folder to the work folder
  199. copy_and_prepare(args)
  200. # Prepare the datasets for the IFeval and MATH_Hard tasks as we need to join the original dataset
  201. prepare_datasets(args)
  202. print(
  203. f"prepration for the {args.model_name} using {args.evals_dataset} is done, all saved the work_dir: {args.work_dir}"
  204. )
  205. command_str = f"lm_eval --model vllm --model_args {args.model_args} --tasks {args.tasks} --batch_size auto --output_path { args.output_path} --include_path {os.path.abspath(args.work_dir)} --seed 42 "
  206. if args.limit:
  207. command_str += f" --limit {args.limit}"
  208. if args.log_samples:
  209. command_str += " --log_samples "
  210. if args.show_config:
  211. command_str += " --show_config "
  212. print("please use the following command to run the meta reproduce evals:")
  213. print(command_str)