Browse Source

adding more on README

Kai Wu 9 months ago
parent
commit
1f666708cc

File diff suppressed because it is too large
+ 105 - 1
tools/benchmarks/meta_eval_reproduce/README.md


+ 3 - 2
tools/benchmarks/meta_eval_reproduce/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml

@@ -1,9 +1,9 @@
+task: meta_mmlu_pro_instruct
 dataset_path: meta-llama/Meta-Llama-3.1-8B-Instruct-evals
 dataset_name: Meta-Llama-3.1-8B-Instruct-evals__mmlu_pro__details
+test_split: latest
 output_type: generate_until
-task: meta_mmlu_pro_instruct
 process_docs: !function utils.process_docs
-test_split: latest
 doc_to_text: !function utils.doc_to_text
 doc_to_target: gold
 filter_list:
@@ -15,6 +15,7 @@ filter_list:
       - function: "take_first"
 generation_kwargs:
   until: []
+  do_sample: false
   temperature: 0
   max_gen_toks: 1024
 num_fewshot: 0

+ 3 - 2
tools/benchmarks/meta_eval_reproduce/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml

@@ -1,9 +1,9 @@
+task: meta_mmlu_pro_pretrain
 dataset_path: meta-llama/Meta-Llama-3.1-8B-evals
 dataset_name: Meta-Llama-3.1-8B-evals__mmlu_pro__details
+test_split: latest
 output_type: generate_until
-task: meta_mmlu_pro_pretrain
 process_docs: !function utils.process_docs
-test_split: latest
 doc_to_text: !function utils.doc_to_text
 doc_to_target: gold
 filter_list:
@@ -14,6 +14,7 @@ filter_list:
       - function: "take_first"
 generation_kwargs:
   until: "\n\nQ: "
+  do_sample: false
   temperature: 0
   max_gen_toks: 512
 num_fewshot: 0

+ 0 - 1
tools/benchmarks/meta_eval_reproduce/meta_template/mmlu_pro/utils.py

@@ -13,7 +13,6 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
         out_doc = {
             "problem": doc["input_question"],
             "gold": doc["input_correct_responses"][0],
-            "choices": list(doc["input_choice_list"])
         }
         return out_doc
     dataset = dataset.select_columns(["input_question", "input_correct_responses", "input_final_prompts", "is_correct","input_question_hash","input_choice_list","output_prediction_text"])