utils.py 765 B

1234567891011121314151617181920212223
  1. import string
  2. import datasets
  3. def doc_to_text(doc: dict) -> str:
  4. return doc["input_final_prompts"][0]
  5. def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  6. def _process_doc(doc: dict) -> dict:
  7. out_doc = {
  8. "problem": doc["input_question"],
  9. "gold": doc["input_correct_responses"][0],
  10. "choices": list(doc["input_choice_list"])
  11. }
  12. return out_doc
  13. dataset = dataset.select_columns(["input_question", "input_correct_responses", "input_final_prompts", "is_correct","input_question_hash","input_choice_list","output_prediction_text"])
  14. dataset = dataset.rename_column("is_correct","previously_is_correct")
  15. dataset = dataset.map(_process_doc)
  16. return dataset.map(_process_doc)