meta_eval.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
  3. import argparse
  4. import json
  5. import logging
  6. import os
  7. import re
  8. import sys
  9. from pathlib import Path
  10. import glob
  11. import numpy as np
  12. import lm_eval
  13. from lm_eval import tasks
  14. from lm_eval.utils import make_table
  15. from prepare_dataset import get_ifeval_data, get_math_data
  16. import shutil, errno
  17. import yaml
  18. from datetime import datetime
  19. def _handle_non_serializable(o):
  20. if isinstance(o, np.int64) or isinstance(o, np.int32):
  21. return int(o)
  22. elif isinstance(o, set):
  23. return list(o)
  24. else:
  25. return str(o)
  26. def setup_logging(verbosity):
  27. logging.basicConfig(
  28. level=verbosity.upper(), format="%(asctime)s - %(levelname)s - %(message)s"
  29. )
  30. return logging.getLogger(__name__)
  31. def change_yaml(args, base_name):
  32. for yaml_file in glob.glob(args.template_dir+'**/*/*.yaml', recursive=True):
  33. with open(yaml_file, "r") as sources:
  34. lines = sources.readlines()
  35. output_path = yaml_file.replace(args.template_dir,args.work_dir)
  36. print(f"changing {yaml_file} to output_path: {output_path}")
  37. path = Path(output_path)
  38. yaml_dir = path.parent
  39. with open(output_path, "w") as output:
  40. for line in lines:
  41. output.write(line.replace("Meta-Llama-3.1-8B",base_name).replace("WORK_DIR",str(yaml_dir)))
  42. def handle_output(args, results, logger):
  43. if not results:
  44. logger.error("No results found.")
  45. sys.exit(1)
  46. if not args.output_path:
  47. if args.log_samples:
  48. logger.error("Specify --output_path for logging samples.")
  49. sys.exit(1)
  50. return
  51. if args.log_samples:
  52. samples = results.pop("samples")
  53. results_str = json.dumps(
  54. results, indent=2, default=_handle_non_serializable, ensure_ascii=False
  55. )
  56. if args.show_config:
  57. logger.info(results_str)
  58. date_id = datetime.now().isoformat().replace(":", "-")
  59. path = Path(args.output_path)
  60. output_dir = path.parent if path.suffix in (".json", ".jsonl") else path
  61. output_dir.mkdir(parents=True, exist_ok=True)
  62. file_path = os.path.join(args.output_path, "eval_results_" + date_id + ".json")
  63. with open(file_path , "w", encoding="utf-8") as f:
  64. f.write(results_str)
  65. if args.log_samples:
  66. for task_name, _ in results.get("configs", {}).items():
  67. output_name = task_name + "_"+ date_id + re.sub(r"/|=", "_", args.model_args.split(",")[0].replace("pretrained",""))
  68. sample_file = output_dir.joinpath(f"{output_name}.jsonl")
  69. sample_data = json.dumps(
  70. samples.get(task_name, {}), indent=2, default=_handle_non_serializable
  71. )
  72. sample_file.write_text(sample_data, encoding="utf-8")
  73. batch_sizes = ",".join(map(str, results.get("config", {}).get("batch_sizes", [])))
  74. summary = f"{args.model_name} ({args.model_args})"
  75. logger.info(summary)
  76. logger.info(make_table(results))
  77. if "groups" in results:
  78. logger.info(make_table(results, "groups"))
  79. def load_tasks(args):
  80. if not args.tasks or "meta" not in args.tasks:
  81. raise ValueError("Please specify a valid meta task name")
  82. if args.tasks:
  83. tasks_list = args.tasks.split(",")
  84. else:
  85. print("No tasks specified. Please try again")
  86. sys.exit(1)
  87. current_dir = os.getcwd()
  88. config_dir = os.path.join(current_dir, args.work_dir)
  89. print(f"Including the config_dir to task manager: {config_dir}")
  90. task_manager = tasks.TaskManager(include_path=config_dir)
  91. return task_manager, tasks_list
  92. def copy_and_prepare(args):
  93. if not os.path.exists(args.work_dir):
  94. # Copy the all files, including yaml files and python files, from template folder to the work folder
  95. copy_dir(args.template_dir,args.work_dir)
  96. else:
  97. print("work_dir already exists, no need to copy files")
  98. # Use the template yaml to get the correct model name in work_dir yaml
  99. base_name = args.evals_dataset.split("/")[-1].replace("-evals","").replace("-Instruct","")
  100. change_yaml(args, base_name)
  101. def parse_eval_args():
  102. parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
  103. parser.add_argument(
  104. "--config_path",
  105. type=str,
  106. default="./eval_config.yaml",
  107. help="the config yaml file that contains all the eval parameters",
  108. )
  109. return parser.parse_args()
  110. def prepare_datasets(task_list,args):
  111. # Prepare the dataset for the IFeval and MATH_Hard tasks as we need to join the original dataset with the evals dataset by the actual questions.
  112. # model_name are derived from the evals_dataset name
  113. model_name = args.evals_dataset.split("/")[-1].replace("-evals","")
  114. if "meta_instruct" in task_list:
  115. get_ifeval_data(model_name,args.work_dir)
  116. get_math_data(model_name,args.work_dir)
  117. else:
  118. if "meta_ifeval" in task_list:
  119. get_ifeval_data(model_name,args.work_dir)
  120. if "meta_math_hard" in task_list:
  121. get_math_data(model_name,args.work_dir)
  122. def evaluate_model(args):
  123. # Customized model such as Quantized model etc.
  124. # In case you are working with a custom model, you can use the following guide to add it here:
  125. # https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage
  126. task_manager, task_list = load_tasks(args)
  127. logger.info(f"Loaded tasks: {task_list}")
  128. # We need to prepare the dataset for the IFeval and MATH_Hard tasks
  129. if "meta_instruct" in task_list or "meta_ifeval" in task_list or "meta_math_hard" in task_list:
  130. prepare_datasets(task_list, args)
  131. # Evaluate
  132. results = lm_eval.simple_evaluate(
  133. model="vllm",
  134. model_args=args.model_args,
  135. tasks=task_list,
  136. limit=args.limit,
  137. log_samples=args.log_samples,
  138. task_manager=task_manager,
  139. random_seed=42,
  140. numpy_random_seed=42,
  141. torch_random_seed=42,
  142. fewshot_random_seed=42
  143. )
  144. handle_output(args, results, logger)
  145. def copy_dir(src, dst):
  146. try:
  147. shutil.copytree(src, dst)
  148. except OSError as exc: # python >2.5
  149. if exc.errno in (errno.ENOTDIR, errno.EINVAL):
  150. shutil.copy(src, dst)
  151. else: raise
  152. def load_config(config_path: str = "./config.yaml"):
  153. # Read the YAML configuration file
  154. with open(config_path, "r") as file:
  155. config = yaml.safe_load(file)
  156. return config
  157. if __name__ == "__main__":
  158. args = parse_eval_args()
  159. config = load_config(args.config_path)
  160. # Create VLLM model args
  161. for k,v in config.items():
  162. args.__setattr__(k,v)
  163. if not os.path.exists(args.template_dir):
  164. raise ValueError("The template_dir does not exist, please check the path")
  165. if args.evals_dataset not in ["meta-llama/Meta-Llama-3.1-8B-Instruct-evals","meta-llama/Meta-Llama-3.1-70B-Instruct-evals","meta-llama/Meta-Llama-3.1-405B-Instruct-evals","meta-llama/Meta-Llama-3.1-8B-evals","meta-llama/Meta-Llama-3.1-70B-evals","meta-llama/Meta-Llama-3.1-405B-evals"]:
  166. raise ValueError("The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 Evals collection")
  167. args.model_args = f"pretrained={args.model_name},tensor_parallel_size={args.tensor_parallel_size},dtype=auto,gpu_memory_utilization={args.gpu_memory_utilization},data_parallel_size={args.data_parallel_size},max_model_len={args.max_model_len},add_bos_token=True,seed=42"
  168. # Copy the all files from template folder to the work folder
  169. copy_and_prepare(args)
  170. logger = setup_logging(args.verbosity)
  171. evaluate_model(args)