_pre-process-ToolAce.py 779 B

1234567891011121314151617181920212223242526272829
  1. import json
  2. import re
  3. import uuid
  4. from collections import Counter, defaultdict
  5. from typing import Dict, List
  6. import matplotlib.pyplot as plt
  7. import networkx as nx
  8. import numpy as np
  9. import pandas as pd
  10. import seaborn as sns
  11. from datasets import Dataset, load_dataset
  12. from tqdm import tqdm
  13. dataset = load_dataset("Team-ACE/ToolACE")
  14. new_data = {"id": [], "conversations": []}
  15. # Process each example
  16. for example in dataset["train"]:
  17. # Add system message to conversations and create new structure
  18. new_data["id"].append(str(uuid.uuid4()))
  19. new_data["conversations"].append(
  20. [{"from": "system", "value": example["system"]}] + example["conversations"]
  21. )
  22. new_dataset = Dataset.from_dict(new_data)
  23. # Save it
  24. new_dataset.save_to_disk("transformed_toolace-new")