12345678910111213141516171819202122232425262728 |
- from datasets import load_dataset
- import pandas as pd
- import os
- def download_mmlu_pro():
- # Create output directory if it doesn't exist
- output_dir = "mmlu_pro_data"
- os.makedirs(output_dir, exist_ok=True)
-
- # Load the dataset
- dataset = load_dataset("TIGER-Lab/MMLU-Pro")
-
- # Convert each split to CSV
- for split in dataset.keys():
- # Convert to pandas DataFrame
- df = pd.DataFrame(dataset[split])
-
- # Save to CSV
- output_path = os.path.join(output_dir, f"mmlu_pro_{split}.csv")
- df.to_csv(output_path, index=False)
- print(f"Saved {split} split to {output_path}")
- print(f"Number of examples in {split}: {len(df)}")
-
- if __name__ == "__main__":
- print("Downloading MMLU-Pro dataset...")
- download_mmlu_pro()
- print("Download complete!")
|