|
@@ -0,0 +1,32 @@
|
|
|
+import json
|
|
|
+import re
|
|
|
+import uuid
|
|
|
+from collections import Counter, defaultdict
|
|
|
+from typing import Dict, List
|
|
|
+
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+import networkx as nx
|
|
|
+import numpy as np
|
|
|
+import pandas as pd
|
|
|
+import seaborn as sns
|
|
|
+from datasets import Dataset, load_dataset
|
|
|
+from tqdm import tqdm
|
|
|
+
|
|
|
+dataset = load_dataset("Team-ACE/ToolACE")
|
|
|
+
|
|
|
+# Transform data
|
|
|
+new_data = {"id": [], "conversations": []}
|
|
|
+
|
|
|
+# Process each example
|
|
|
+for example in dataset["train"]:
|
|
|
+ # Add system message to conversations and create new structure
|
|
|
+ new_data["id"].append(str(uuid.uuid4()))
|
|
|
+ new_data["conversations"].append(
|
|
|
+ [{"from": "system", "value": example["system"]}] + example["conversations"]
|
|
|
+ )
|
|
|
+
|
|
|
+# Create new dataset with just id and conversations
|
|
|
+new_dataset = Dataset.from_dict(new_data)
|
|
|
+
|
|
|
+# Save it
|
|
|
+new_dataset.save_to_disk("transformed_toolace-new")
|