瀏覽代碼

added inspect and modified harness

Justin Lee 2 月之前
父節點
當前提交
9ffb292272

文件差異過大導致無法顯示
+ 9378 - 13006
end-to-end-use-cases/prompt-migration/notebooks/harness.ipynb


+ 426 - 0
end-to-end-use-cases/prompt-migration/notebooks/inspect.ipynb

@@ -0,0 +1,426 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "005b80b1-9dbc-47cd-bde3-2c0da024b73a",
+   "metadata": {},
+   "source": [
+    "Inspect"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "39e4c357-02b1-42dd-8575-9d968ba25683",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "csv_path = \"prediction_results.csv\" "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ce23b503-3adf-47de-b637-99c74c4a6633",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Question</th>\n",
+       "      <th>Predicted Answer</th>\n",
+       "      <th>Correct Answer</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>We must assume that he is a Communist, because...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>D</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>A water bed sells for $330 cash or $40 down an...</td>\n",
+       "      <td>J</td>\n",
+       "      <td>D</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>The team that wins in a tug-of-war is the team...</td>\n",
+       "      <td>H</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>In a circuit of two lamps in parallel, if the ...</td>\n",
+       "      <td>H</td>\n",
+       "      <td>B</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Consider an arbitrage-free securities market m...</td>\n",
+       "      <td>B</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1127</th>\n",
+       "      <td>The annular space between two concentricalumin...</td>\n",
+       "      <td>B</td>\n",
+       "      <td>D</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1129</th>\n",
+       "      <td>A 0.1 mm thick neutral density filter gives 70...</td>\n",
+       "      <td>I</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1130</th>\n",
+       "      <td>Air (100°F, 1atm) is flowing at a velocity of ...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>I</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1132</th>\n",
+       "      <td>This question refers to the following informat...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1134</th>\n",
+       "      <td>Order the following (risk, return) pairs from ...</td>\n",
+       "      <td>E</td>\n",
+       "      <td>G</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>300 rows × 3 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               Question Predicted Answer  \\\n",
+       "2     We must assume that he is a Communist, because...                A   \n",
+       "14    A water bed sells for $330 cash or $40 down an...                J   \n",
+       "16    The team that wins in a tug-of-war is the team...                H   \n",
+       "32    In a circuit of two lamps in parallel, if the ...                H   \n",
+       "37    Consider an arbitrage-free securities market m...                B   \n",
+       "...                                                 ...              ...   \n",
+       "1127  The annular space between two concentricalumin...                B   \n",
+       "1129  A 0.1 mm thick neutral density filter gives 70...                I   \n",
+       "1130  Air (100°F, 1atm) is flowing at a velocity of ...                A   \n",
+       "1132  This question refers to the following informat...                A   \n",
+       "1134  Order the following (risk, return) pairs from ...                E   \n",
+       "\n",
+       "     Correct Answer  \n",
+       "2                 D  \n",
+       "14                D  \n",
+       "16                A  \n",
+       "32                B  \n",
+       "37                A  \n",
+       "...             ...  \n",
+       "1127              D  \n",
+       "1129              A  \n",
+       "1130              I  \n",
+       "1132              C  \n",
+       "1134              G  \n",
+       "\n",
+       "[300 rows x 3 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Showing first 300 rows out of 3271 incorrect answers.\n",
+      "\n",
+      "Accuracy: 72.69%\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from tabulate import tabulate\n",
+    "from IPython.display import display\n",
+    "\n",
+    "def display_incorrect_answers(csv_path):\n",
+    "    df = pd.read_csv(csv_path)\n",
+    "    \n",
+    "    incorrect_answers = df[df['Predicted Answer'] != df['Correct Answer']]\n",
+    "    \n",
+    "    max_rows = 300  # Adjust as needed\n",
+    "    incorrect_answers_limited = incorrect_answers[['Question', 'Predicted Answer', 'Correct Answer']].head(max_rows)\n",
+    "    \n",
+    "    display(pd.DataFrame(incorrect_answers_limited))\n",
+    "    \n",
+    "    if len(incorrect_answers) > max_rows:\n",
+    "        print(f\"\\nShowing first {max_rows} rows out of {len(incorrect_answers)} incorrect answers.\")\n",
+    "    \n",
+    "    total_questions = len(df)\n",
+    "    incorrect_count = len(incorrect_answers)\n",
+    "    accuracy = ((total_questions - incorrect_count) / total_questions) * 100\n",
+    "    print(f\"\\nAccuracy: {accuracy:.2f}%\")\n",
+    "\n",
+    "display_incorrect_answers(csv_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "6ad0e4b2-3845-4081-a686-9d2eac98fc25",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "All 'Is Correct' rows have matching answers.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from IPython.display import display\n",
+    "\n",
+    "def check_correct_answers(csv_path):\n",
+    "    df = pd.read_csv(csv_path)\n",
+    "    \n",
+    "    correct_mismatches = df[(df['Is Correct'] == True) & (df['Predicted Answer'] != df['Correct Answer'])]\n",
+    "    \n",
+    "    if not correct_mismatches.empty:\n",
+    "        print(\"\\nWarning: Some 'Is Correct' rows have mismatched answers!\")\n",
+    "        display(correct_mismatches[['Question', 'Predicted Answer', 'Correct Answer', 'Is Correct']])\n",
+    "    else:\n",
+    "        print(\"\\nAll 'Is Correct' rows have matching answers.\")\n",
+    "\n",
+    " \n",
+    "check_correct_answers(csv_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "b2a9ed17-8e39-4f81-8a27-358e948967de",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Incorrect Rows\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Question</th>\n",
+       "      <th>Predicted Answer</th>\n",
+       "      <th>Correct Answer</th>\n",
+       "      <th>Is Correct</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>We must assume that he is a Communist, because...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>D</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>A water bed sells for $330 cash or $40 down an...</td>\n",
+       "      <td>J</td>\n",
+       "      <td>D</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>The team that wins in a tug-of-war is the team...</td>\n",
+       "      <td>H</td>\n",
+       "      <td>A</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>In a circuit of two lamps in parallel, if the ...</td>\n",
+       "      <td>H</td>\n",
+       "      <td>B</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Consider an arbitrage-free securities market m...</td>\n",
+       "      <td>B</td>\n",
+       "      <td>A</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11968</th>\n",
+       "      <td>Four years ago the owner of a shopping center ...</td>\n",
+       "      <td>I</td>\n",
+       "      <td>E</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11972</th>\n",
+       "      <td>An electrical current flows along a flat plate...</td>\n",
+       "      <td>F</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11976</th>\n",
+       "      <td>A recent law school graduate took and passed t...</td>\n",
+       "      <td>B</td>\n",
+       "      <td>A</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11977</th>\n",
+       "      <td>LC oscillators are used for produce a waveform...</td>\n",
+       "      <td>D</td>\n",
+       "      <td>E</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11978</th>\n",
+       "      <td>A 50-hp, 500-volt shunt motor draws a line cur...</td>\n",
+       "      <td>D</td>\n",
+       "      <td>H</td>\n",
+       "      <td>❌</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>3271 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                Question Predicted Answer  \\\n",
+       "2      We must assume that he is a Communist, because...                A   \n",
+       "14     A water bed sells for $330 cash or $40 down an...                J   \n",
+       "16     The team that wins in a tug-of-war is the team...                H   \n",
+       "32     In a circuit of two lamps in parallel, if the ...                H   \n",
+       "37     Consider an arbitrage-free securities market m...                B   \n",
+       "...                                                  ...              ...   \n",
+       "11968  Four years ago the owner of a shopping center ...                I   \n",
+       "11972  An electrical current flows along a flat plate...                F   \n",
+       "11976  A recent law school graduate took and passed t...                B   \n",
+       "11977  LC oscillators are used for produce a waveform...                D   \n",
+       "11978  A 50-hp, 500-volt shunt motor draws a line cur...                D   \n",
+       "\n",
+       "      Correct Answer Is Correct  \n",
+       "2                  D          ❌  \n",
+       "14                 D          ❌  \n",
+       "16                 A          ❌  \n",
+       "32                 B          ❌  \n",
+       "37                 A          ❌  \n",
+       "...              ...        ...  \n",
+       "11968              E          ❌  \n",
+       "11972            NaN          ❌  \n",
+       "11976              A          ❌  \n",
+       "11977              E          ❌  \n",
+       "11978              H          ❌  \n",
+       "\n",
+       "[3271 rows x 4 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "def check_incorrect_answers(csv_path):\n",
+    "    df = pd.read_csv(csv_path)\n",
+    "    print(\"Incorrect Rows\")\n",
+    "    correct_mismatches = df[(df['Predicted Answer'] != df['Correct Answer'])]\n",
+    "    display(correct_mismatches[['Question', 'Predicted Answer', 'Correct Answer', 'Is Correct']])\n",
+    "\n",
+    "check_incorrect_answers(csv_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "501ed411-7145-4e8e-ae07-439fea4b38f7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}