2 лет назад · a2a2ffd78a
--- a/recipes/evaluation/README.md
+++ b/recipes/evaluation/README.md
@@ -28,7 +28,7 @@ Before running the evaluation script, ensure you have all the necessary dependen
 
				 Clone the lm-evaluation-harness repository and install it:
			
 
				 
			
 
				 ```bash
			
 
				-git clone https://github.com/matthoffner/lm-evaluation-harness.git
			
 
				+git clone https://github.com/EleutherAI/lm-evaluation-harness.git
			
 
				 cd lm-evaluation-harness
			
 
				 pip install -e .
			
 
				 
			
--- a/recipes/evaluation/eval.py
+++ b/recipes/evaluation/eval.py
@@ -11,7 +11,7 @@ from pathlib import Path
 
				 
			
 
				 import numpy as np
			
 
				 import lm_eval
			
 
				-from lm_eval import evaluator, tasks
			
 
				+from lm_eval import tasks
			
 
				 from lm_eval.utils import make_table
			
 
				 
			
 
				 
			
@@ -73,12 +73,11 @@ def handle_output(args, results, logger):
 
				 
			
 
				 
			
 
				 def load_tasks(args):
			
 
				-    tasks.initialize_tasks()
			
 
				     if args.open_llm_leaderboard_tasks:
			
 
				         current_dir = os.getcwd()
			
 
				         config_dir = os.path.join(current_dir, "open_llm_leaderboard")
			
 
				-        lm_eval.tasks.include_path(config_dir)
			
 
				-        return [
			
 
				+        task_manager = tasks.TaskManager(include_path=config_dir)
			
 
				+        return task_manager, [
			
 
				             "arc_challenge_25_shot",
			
 
				             "hellaswag_10_shot",
			
 
				             "truthfulqa_mc2",
			
@@ -86,7 +85,7 @@ def load_tasks(args):
 
				             "gsm8k",
			
 
				             "mmlu",
			
 
				         ]
			
 
				-    return args.tasks.split(",") if args.tasks else []
			
 
				+    return None, args.tasks.split(",") if args.tasks else []
			
 
				 
			
 
				 
			
 
				 def parse_eval_args():
			
@@ -190,21 +189,18 @@ def parse_eval_args():
 
				         default=None,
			
 
				         help="Additional path to include if there are external tasks.",
			
 
				     )
			
 
				-    parser.add_argument(
			
 
				-        "--decontamination_ngrams_path", default=None
			
 
				-    )  # Not currently used
			
 
				     return parser.parse_args()
			
 
				 
			
 
				 
			
 
				 def evaluate_model(args):
			
 
				     try:
			
 
				-        task_list = load_tasks(args)
			
 
				+        task_manager, task_list = load_tasks(args)
			
 
				         # Customized model such as Quantized model etc.
			
 
				         # In case you are working with a custom model, you can use the following guide to add it here:
			
 
				         # https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage
			
 
				 
			
 
				         # Evaluate
			
 
				-        results = evaluator.simple_evaluate(
			
 
				+        results = lm_eval.simple_evaluate(
			
 
				             model=args.model,
			
 
				             model_args=args.model_args,
			
 
				             tasks=task_list,
			
@@ -214,11 +210,11 @@ def evaluate_model(args):
 
				             device=args.device,
			
 
				             use_cache=args.use_cache,
			
 
				             limit=args.limit,
			
 
				-            decontamination_ngrams_path=args.decontamination_ngrams_path,
			
 
				             check_integrity=args.check_integrity,
			
 
				             write_out=args.write_out,
			
 
				             log_samples=args.log_samples,
			
 
				             gen_kwargs=args.gen_kwargs,
			
 
				+            task_manager=task_manager,
			
 
				         )
			
 
				         handle_output(args, results, logger)