llama_mmlu_pro.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import typing as t
  2. from datasets import load_dataset
  3. import dspy
  4. from .datatypes import TaskDatasets
  5. from .helpers import train_val_test_split
  6. def datasets(
  7. train_size: float = 0.1,
  8. validation_size: float = 0.2,
  9. ) -> TaskDatasets:
  10. """
  11. TODO:
  12. Load dataset, dataset should be datasets.Dataset type (NOT DatasetDict, OR split the dataset yourself how you want)
  13. """
  14. dataset = load_dataset("TODO")
  15. return train_val_test_split(dataset, _task_doc_example, train_size, validation_size)
  16. class TaskDoc(t.TypedDict):
  17. problem: str
  18. gold: str
  19. inputs = ["problem"]
  20. outputs = ["answer"]
  21. def _task_doc_example(doc: TaskDoc) -> dspy.Example:
  22. return dspy.Example(
  23. problem=doc["problem"],
  24. answer=doc["gold"],
  25. ).with_inputs(*inputs)
  26. def signature(instructions: str = "") -> dspy.Signature:
  27. class MMLUPro(dspy.Signature):
  28. __doc__ = instructions
  29. problem: str = dspy.InputField()
  30. answer: str = dspy.OutputField()
  31. return MMLUPro
  32. def metric(gold: dspy.Example, pred: dspy.Example, trace=False) -> bool:
  33. return gold.answer == pred.answer