download_mmlu_pro.py 841 B

12345678910111213141516171819202122232425262728
  1. from datasets import load_dataset
  2. import pandas as pd
  3. import os
  4. def download_mmlu_pro():
  5. # Create output directory if it doesn't exist
  6. output_dir = "mmlu_pro_data"
  7. os.makedirs(output_dir, exist_ok=True)
  8. # Load the dataset
  9. dataset = load_dataset("TIGER-Lab/MMLU-Pro")
  10. # Convert each split to CSV
  11. for split in dataset.keys():
  12. # Convert to pandas DataFrame
  13. df = pd.DataFrame(dataset[split])
  14. # Save to CSV
  15. output_path = os.path.join(output_dir, f"mmlu_pro_{split}.csv")
  16. df.to_csv(output_path, index=False)
  17. print(f"Saved {split} split to {output_path}")
  18. print(f"Number of examples in {split}: {len(df)}")
  19. if __name__ == "__main__":
  20. print("Downloading MMLU-Pro dataset...")
  21. download_mmlu_pro()
  22. print("Download complete!")