utils.py 773 B

12345678910111213141516171819202122
  1. import random
  2. import re
  3. import datasets
  4. def doc_to_text(doc: dict) -> str:
  5. return doc["input_final_prompts"][0]
  6. def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  7. def _process_doc(doc: dict) -> dict:
  8. out_doc = {
  9. "problem": doc["input_question"],
  10. "gold": doc["input_correct_responses"][0],
  11. "choices": list(doc["input_choice_list"])
  12. }
  13. return out_doc
  14. dataset = dataset.select_columns(["input_question", "input_correct_responses", "input_final_prompts", "is_correct","input_question_hash","input_choice_list","output_prediction_text"])
  15. dataset = dataset.rename_column("is_correct","previously_is_correct")
  16. dataset = dataset.map(_process_doc)
  17. return dataset.map(_process_doc)