Browse Source

Create prep-for-FT.py

Sanyam Bhutani 3 months ago
parent
commit
7f4bc96259
1 changed files with 32 additions and 0 deletions
  1. 32 0
      end-to-end-use-cases/data-tool/dataprep-scripts/prep-for-FT.py

+ 32 - 0
end-to-end-use-cases/data-tool/dataprep-scripts/prep-for-FT.py

@@ -0,0 +1,32 @@
+import json
+import re
+import uuid
+from collections import Counter, defaultdict
+from typing import Dict, List
+
+import matplotlib.pyplot as plt
+import networkx as nx
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from datasets import Dataset, load_dataset
+from tqdm import tqdm
+
+dataset = load_dataset("Team-ACE/ToolACE")
+
+# Transform data
+new_data = {"id": [], "conversations": []}
+
+# Process each example
+for example in dataset["train"]:
+    # Add system message to conversations and create new structure
+    new_data["id"].append(str(uuid.uuid4()))
+    new_data["conversations"].append(
+        [{"from": "system", "value": example["system"]}] + example["conversations"]
+    )
+
+# Create new dataset with just id and conversations
+new_dataset = Dataset.from_dict(new_data)
+
+# Save it
+new_dataset.save_to_disk("transformed_toolace-new")