Pārlūkot izejas kodu

adding some fix

Kai Wu 9 mēneši atpakaļ
vecāks
revīzija
ed201fc8e5

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 29 - 8
tools/benchmarks/meta_eval_reproduce/README.md


+ 0 - 1
tools/benchmarks/meta_eval_reproduce/meta_template/gpqa_cot/utils.py

@@ -12,7 +12,6 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
         out_doc = {
             "problem": doc["input_question"],
             "gold": doc["input_correct_responses"][0],
-            "choices": list(doc["input_choice_list"])
         }
         return out_doc
     dataset = dataset.select_columns(["input_question", "input_correct_responses", "input_final_prompts", "is_correct","input_question_hash","input_choice_list","output_prediction_text"])

+ 1 - 37
tools/benchmarks/meta_eval_reproduce/meta_template/math_hard/utils.py

@@ -37,46 +37,10 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
     return dataset.map(_process_doc)
 
 
-
-def extract_result_from_boxed(answer: str) -> str:
-    box_start = "\\boxed"
-    # format is `\\boxed <value>$` or `\\boxed{<value>}`, with potential white spaces framing `<value>`
-    start = answer.rfind(box_start)
-    if start < 0:
-        return ""
-    answer = answer[start + len(box_start) :].strip()
-    ends_with_curly = answer.startswith("{")
-    i = 0
-    open_braces = 0
-    while i < len(answer):
-        if answer[i] == "{":
-            open_braces += 1
-        elif answer[i] == "}":
-            open_braces -= 1
-        if open_braces == 0:
-            if ends_with_curly:
-                answer = answer[: i + 1].strip()
-                break
-            elif answer[i] == "$":
-                answer = answer[:i].strip()
-                break
-        i += 1
-    else:
-        return ""
-    # remove extra curly braces
-    while True:
-        if answer.startswith("{") and answer.endswith("}"):
-            answer = answer[1:-1].strip()
-        else:
-            break
-    return answer
-
 def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
     candidates = results[0]
 
-    unnormalized_answer = get_unnormalized_answer(candidates)
-    if unnormalized_answer == "[invalidanswer]":
-        unnormalized_answer = extract_result_from_boxed(candidates)
+    unnormalized_answer = remove_boxed(last_boxed_only_string(candidates))
     answer = normalize_final_answer(unnormalized_answer)
 
     if answer.strip() == doc["answer"].strip() or is_equiv(answer, doc["answer"]):