|
@@ -1,32 +0,0 @@
|
|
|
-import json
|
|
|
-import re
|
|
|
-import uuid
|
|
|
-from collections import Counter, defaultdict
|
|
|
-from typing import Dict, List
|
|
|
-
|
|
|
-import matplotlib.pyplot as plt
|
|
|
-import networkx as nx
|
|
|
-import numpy as np
|
|
|
-import pandas as pd
|
|
|
-import seaborn as sns
|
|
|
-from datasets import Dataset, load_dataset
|
|
|
-from tqdm import tqdm
|
|
|
-
|
|
|
-dataset = load_dataset("Team-ACE/ToolACE")
|
|
|
-
|
|
|
-# Transform data
|
|
|
-new_data = {"id": [], "conversations": []}
|
|
|
-
|
|
|
-# Process each example
|
|
|
-for example in dataset["train"]:
|
|
|
- # Add system message to conversations and create new structure
|
|
|
- new_data["id"].append(str(uuid.uuid4()))
|
|
|
- new_data["conversations"].append(
|
|
|
- [{"from": "system", "value": example["system"]}] + example["conversations"]
|
|
|
- )
|
|
|
-
|
|
|
-# Create new dataset with just id and conversations
|
|
|
-new_dataset = Dataset.from_dict(new_data)
|
|
|
-
|
|
|
-# Save it
|
|
|
-new_dataset.save_to_disk("transformed_toolace-new")
|