utils.py 711 B

12345678910111213141516171819202122
  1. import string
  2. import datasets
  3. def doc_to_text(doc: dict) -> str:
  4. return doc["input_final_prompts"][0]
  5. def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  6. def _process_doc(doc: dict) -> dict:
  7. out_doc = {
  8. "problem": doc["input_question"],
  9. "gold": doc["input_correct_responses"][0],
  10. }
  11. return out_doc
  12. dataset = dataset.select_columns(["input_question", "input_correct_responses", "input_final_prompts", "is_correct","input_question_hash","input_choice_list","output_prediction_text"])
  13. dataset = dataset.rename_column("is_correct","previously_is_correct")
  14. dataset = dataset.map(_process_doc)
  15. return dataset.map(_process_doc)