utils.py 702 B

12345678910111213141516171819202122
  1. import random
  2. import re
  3. import datasets
  4. def doc_to_text(doc: dict) -> str:
  5. return doc["input_final_prompts"][0]
  6. def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  7. def _process_doc(doc: dict) -> dict:
  8. out_doc = {
  9. "problem": doc["input_question"],
  10. "answer": doc["input_correct_responses"][0],
  11. }
  12. return out_doc
  13. dataset = dataset.select_columns(["input_question", "input_correct_responses", "input_final_prompts", "is_correct","input_question_hash","output_prediction_text"])
  14. dataset = dataset.rename_column("is_correct","previously_is_correct")
  15. dataset = dataset.map(_process_doc)
  16. return dataset.map(_process_doc)