Venelin Valkov 1 gadu atpakaļ
vecāks
revīzija
3379cc3e38
1 mainītis faili ar 9 papildinājumiem un 11 dzēšanām
  1. 9 11
      14.fine-tuning-llama-2-7b-on-custom-dataset.ipynb

+ 9 - 11
14.fine-tuning-llama-2-7b-on-custom-dataset.ipynb

@@ -184,22 +184,20 @@
   {
    "cell_type": "code",
    "source": [
+    "def clean_text(text):\n",
+    "    text = re.sub(r\"http\\S+\", \"\", text)\n",
+    "    text = re.sub(r\"@[^\\s]+\", \"\", text)\n",
+    "    text = re.sub(r\"\\s+\", \" \", text)\n",
+    "    return re.sub(r\"\\^[^ ]+\", \"\", text)\n",
+    "\n",
+    "\n",
     "def create_conversation_text(data_point):\n",
     "    text = \"\"\n",
     "    for item in data_point[\"log\"]:\n",
-    "        user = item[\"user utterance\"]\n",
-    "\n",
-    "        user = re.sub(r\"http\\S+\", \"\", user)\n",
-    "        user = re.sub(r\"@[^\\s]+\", \"\", user)\n",
-    "        user = re.sub(r\"\\s+\", \" \", user)\n",
-    "        user = re.sub(r\"\\^[^ ]+\", \"\", user)\n",
+    "        user = clean_text(item[\"user utterance\"])\n",
     "        text += f\"user: {user.strip()}\\n\"\n",
     "\n",
-    "        agent = item[\"system response\"]\n",
-    "        agent = re.sub(r\"http\\S+\", \"\", agent)\n",
-    "        agent = re.sub(r\"@[^\\s]+\", \"\", agent)\n",
-    "        agent = re.sub(r\"\\s+\", \" \", agent)\n",
-    "        agent = re.sub(r\"\\^[^ ]+\", \"\", agent)\n",
+    "        agent = clean_text(item[\"system response\"])\n",
     "        text += f\"agent: {agent.strip()}\\n\"\n",
     "\n",
     "    return text"