瀏覽代碼

merge utils, added configs and sample notebook to run optimizer

Justin Lee 1 月之前
父節點
當前提交
4768a41a20

File diff suppressed because it is too large
+ 5 - 3
end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/work_dir/mmlu/utils.py


+ 107 - 0
end-to-end-use-cases/prompt-migration/llama3_405b_chat_template.jinja

@@ -0,0 +1,107 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- set date_string = "26 Jul 2024" %}
+{%- endif %}
+
+{# Extract system message if available #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+
+{# System message + tool setup #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if builtin_tools is defined or tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{%- if builtin_tools is defined %}
+    {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+
+{# Handle custom tools in user messages #}
+{%- if tools_in_user_message and not tools is none %}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+    {%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+
+{# Process messages #}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- "<|python_tag|>" + tool_call.name + ".call(" }}
+            {%- for arg_name, arg_val in tool_call.arguments | items %}
+                {{- arg_name + '="' + arg_val + '"' }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+            {%- endfor %}
+            {{- ")" }}
+        {%- else %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- '{"name": "' + tool_call.name + '", ' }}
+            {{- '"parameters": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- "}" }}
+        {%- endif %}
+        {%- if builtin_tools is defined %}
+            {{- "<|eom_id|>" }}
+        {%- else %}
+            {{- "<|eot_id|>" }}
+        {%- endif %}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}

+ 59 - 0
end-to-end-use-cases/prompt-migration/notebooks/config.py

@@ -0,0 +1,59 @@
+import os
+
+MODEL_CONFIGS = {
+    "vllm_llama_70b": {
+        "model": "hosted_vllm/meta-llama/Llama-3.3-70B-Instruct",
+        "api_base": "http://localhost:8001/v1",
+        "api_key": None,
+        "port": 8001,
+        "cuda_devices": "4,5,6,7",
+        "tensor_parallel": 4,
+        "gpu_util": 0.90,
+        "chat_template": None,
+    },
+    "vllm_llama_90b": {
+        "model": "hosted_vllm/meta-llama/Llama-3.2-90B-Vision-Instruct",
+        "api_base": "http://localhost:8090/v1",
+        "api_key": None,
+        "port": 8090,
+        "cuda_devices": "4,5,6,7",
+        "tensor_parallel": 4,
+        "gpu_util": 0.70,
+        "chat_template": None,
+    },
+    "vllm_llama_405b": {
+        "model": "hosted_vllm/meta-llama/Llama-3.1-405B-FP8",
+        "api_base": "http://localhost:8405/v1",
+        "api_key": None,
+        "port": 8405,
+        "cuda_devices": "0,1,2,3,4,5,6,7",
+        "tensor_parallel": 8,
+        "gpu_util": 0.80,
+        "chat_template": "./llama3_405b_chat_template.jinja",
+    },
+    "vllm_llama_8b": {
+        "model": "hosted_vllm/meta-llama/Llama-3.1-8B-Instruct",
+        "api_base": "http://localhost:8008/v1",
+        "api_key": None,
+        "port": 8008,
+        "cuda_devices": "0",
+        "tensor_parallel": 1,
+        "gpu_util": 0.95,
+        "chat_template": None,
+    },
+    "openrouter_gpt4o": {
+        "model": "openrouter/openai/gpt-4o",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key": os.getenv("OPENROUTER_API_KEY"),
+    },
+    "openrouter_gpt4o_mini": {
+        "model": "openrouter/openai/gpt-4o-mini",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key": os.getenv("OPENROUTER_API_KEY"),
+    },
+    "openrouter_llama_70b": {
+        "model": "openrouter/meta-llama/llama-3.3-70b-instruct",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key": os.getenv("OPENROUTER_API_KEY"),
+    },
+}

+ 0 - 426
end-to-end-use-cases/prompt-migration/notebooks/inspect.ipynb

@@ -1,426 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "005b80b1-9dbc-47cd-bde3-2c0da024b73a",
-   "metadata": {},
-   "source": [
-    "Inspect"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "39e4c357-02b1-42dd-8575-9d968ba25683",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "csv_path = \"prediction_results.csv\" "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "ce23b503-3adf-47de-b637-99c74c4a6633",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Question</th>\n",
-       "      <th>Predicted Answer</th>\n",
-       "      <th>Correct Answer</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>We must assume that he is a Communist, because...</td>\n",
-       "      <td>A</td>\n",
-       "      <td>D</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>A water bed sells for $330 cash or $40 down an...</td>\n",
-       "      <td>J</td>\n",
-       "      <td>D</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>The team that wins in a tug-of-war is the team...</td>\n",
-       "      <td>H</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>32</th>\n",
-       "      <td>In a circuit of two lamps in parallel, if the ...</td>\n",
-       "      <td>H</td>\n",
-       "      <td>B</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>37</th>\n",
-       "      <td>Consider an arbitrage-free securities market m...</td>\n",
-       "      <td>B</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1127</th>\n",
-       "      <td>The annular space between two concentricalumin...</td>\n",
-       "      <td>B</td>\n",
-       "      <td>D</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1129</th>\n",
-       "      <td>A 0.1 mm thick neutral density filter gives 70...</td>\n",
-       "      <td>I</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1130</th>\n",
-       "      <td>Air (100°F, 1atm) is flowing at a velocity of ...</td>\n",
-       "      <td>A</td>\n",
-       "      <td>I</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1132</th>\n",
-       "      <td>This question refers to the following informat...</td>\n",
-       "      <td>A</td>\n",
-       "      <td>C</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1134</th>\n",
-       "      <td>Order the following (risk, return) pairs from ...</td>\n",
-       "      <td>E</td>\n",
-       "      <td>G</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>300 rows × 3 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                               Question Predicted Answer  \\\n",
-       "2     We must assume that he is a Communist, because...                A   \n",
-       "14    A water bed sells for $330 cash or $40 down an...                J   \n",
-       "16    The team that wins in a tug-of-war is the team...                H   \n",
-       "32    In a circuit of two lamps in parallel, if the ...                H   \n",
-       "37    Consider an arbitrage-free securities market m...                B   \n",
-       "...                                                 ...              ...   \n",
-       "1127  The annular space between two concentricalumin...                B   \n",
-       "1129  A 0.1 mm thick neutral density filter gives 70...                I   \n",
-       "1130  Air (100°F, 1atm) is flowing at a velocity of ...                A   \n",
-       "1132  This question refers to the following informat...                A   \n",
-       "1134  Order the following (risk, return) pairs from ...                E   \n",
-       "\n",
-       "     Correct Answer  \n",
-       "2                 D  \n",
-       "14                D  \n",
-       "16                A  \n",
-       "32                B  \n",
-       "37                A  \n",
-       "...             ...  \n",
-       "1127              D  \n",
-       "1129              A  \n",
-       "1130              I  \n",
-       "1132              C  \n",
-       "1134              G  \n",
-       "\n",
-       "[300 rows x 3 columns]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Showing first 300 rows out of 3271 incorrect answers.\n",
-      "\n",
-      "Accuracy: 72.69%\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "from tabulate import tabulate\n",
-    "from IPython.display import display\n",
-    "\n",
-    "def display_incorrect_answers(csv_path):\n",
-    "    df = pd.read_csv(csv_path)\n",
-    "    \n",
-    "    incorrect_answers = df[df['Predicted Answer'] != df['Correct Answer']]\n",
-    "    \n",
-    "    max_rows = 300  # Adjust as needed\n",
-    "    incorrect_answers_limited = incorrect_answers[['Question', 'Predicted Answer', 'Correct Answer']].head(max_rows)\n",
-    "    \n",
-    "    display(pd.DataFrame(incorrect_answers_limited))\n",
-    "    \n",
-    "    if len(incorrect_answers) > max_rows:\n",
-    "        print(f\"\\nShowing first {max_rows} rows out of {len(incorrect_answers)} incorrect answers.\")\n",
-    "    \n",
-    "    total_questions = len(df)\n",
-    "    incorrect_count = len(incorrect_answers)\n",
-    "    accuracy = ((total_questions - incorrect_count) / total_questions) * 100\n",
-    "    print(f\"\\nAccuracy: {accuracy:.2f}%\")\n",
-    "\n",
-    "display_incorrect_answers(csv_path)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "6ad0e4b2-3845-4081-a686-9d2eac98fc25",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "All 'Is Correct' rows have matching answers.\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "from IPython.display import display\n",
-    "\n",
-    "def check_correct_answers(csv_path):\n",
-    "    df = pd.read_csv(csv_path)\n",
-    "    \n",
-    "    correct_mismatches = df[(df['Is Correct'] == True) & (df['Predicted Answer'] != df['Correct Answer'])]\n",
-    "    \n",
-    "    if not correct_mismatches.empty:\n",
-    "        print(\"\\nWarning: Some 'Is Correct' rows have mismatched answers!\")\n",
-    "        display(correct_mismatches[['Question', 'Predicted Answer', 'Correct Answer', 'Is Correct']])\n",
-    "    else:\n",
-    "        print(\"\\nAll 'Is Correct' rows have matching answers.\")\n",
-    "\n",
-    " \n",
-    "check_correct_answers(csv_path)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "b2a9ed17-8e39-4f81-8a27-358e948967de",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Incorrect Rows\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Question</th>\n",
-       "      <th>Predicted Answer</th>\n",
-       "      <th>Correct Answer</th>\n",
-       "      <th>Is Correct</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>We must assume that he is a Communist, because...</td>\n",
-       "      <td>A</td>\n",
-       "      <td>D</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>A water bed sells for $330 cash or $40 down an...</td>\n",
-       "      <td>J</td>\n",
-       "      <td>D</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>The team that wins in a tug-of-war is the team...</td>\n",
-       "      <td>H</td>\n",
-       "      <td>A</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>32</th>\n",
-       "      <td>In a circuit of two lamps in parallel, if the ...</td>\n",
-       "      <td>H</td>\n",
-       "      <td>B</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>37</th>\n",
-       "      <td>Consider an arbitrage-free securities market m...</td>\n",
-       "      <td>B</td>\n",
-       "      <td>A</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11968</th>\n",
-       "      <td>Four years ago the owner of a shopping center ...</td>\n",
-       "      <td>I</td>\n",
-       "      <td>E</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11972</th>\n",
-       "      <td>An electrical current flows along a flat plate...</td>\n",
-       "      <td>F</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11976</th>\n",
-       "      <td>A recent law school graduate took and passed t...</td>\n",
-       "      <td>B</td>\n",
-       "      <td>A</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11977</th>\n",
-       "      <td>LC oscillators are used for produce a waveform...</td>\n",
-       "      <td>D</td>\n",
-       "      <td>E</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11978</th>\n",
-       "      <td>A 50-hp, 500-volt shunt motor draws a line cur...</td>\n",
-       "      <td>D</td>\n",
-       "      <td>H</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>3271 rows × 4 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                Question Predicted Answer  \\\n",
-       "2      We must assume that he is a Communist, because...                A   \n",
-       "14     A water bed sells for $330 cash or $40 down an...                J   \n",
-       "16     The team that wins in a tug-of-war is the team...                H   \n",
-       "32     In a circuit of two lamps in parallel, if the ...                H   \n",
-       "37     Consider an arbitrage-free securities market m...                B   \n",
-       "...                                                  ...              ...   \n",
-       "11968  Four years ago the owner of a shopping center ...                I   \n",
-       "11972  An electrical current flows along a flat plate...                F   \n",
-       "11976  A recent law school graduate took and passed t...                B   \n",
-       "11977  LC oscillators are used for produce a waveform...                D   \n",
-       "11978  A 50-hp, 500-volt shunt motor draws a line cur...                D   \n",
-       "\n",
-       "      Correct Answer Is Correct  \n",
-       "2                  D          ❌  \n",
-       "14                 D          ❌  \n",
-       "16                 A          ❌  \n",
-       "32                 B          ❌  \n",
-       "37                 A          ❌  \n",
-       "...              ...        ...  \n",
-       "11968              E          ❌  \n",
-       "11972            NaN          ❌  \n",
-       "11976              A          ❌  \n",
-       "11977              E          ❌  \n",
-       "11978              H          ❌  \n",
-       "\n",
-       "[3271 rows x 4 columns]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "def check_incorrect_answers(csv_path):\n",
-    "    df = pd.read_csv(csv_path)\n",
-    "    print(\"Incorrect Rows\")\n",
-    "    correct_mismatches = df[(df['Predicted Answer'] != df['Correct Answer'])]\n",
-    "    display(correct_mismatches[['Question', 'Predicted Answer', 'Correct Answer', 'Is Correct']])\n",
-    "\n",
-    "check_incorrect_answers(csv_path)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "501ed411-7145-4e8e-ae07-439fea4b38f7",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.16"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

File diff suppressed because it is too large
+ 728 - 0
end-to-end-use-cases/prompt-migration/notebooks/llama_mmlu_pro.ipynb


+ 63 - 0
end-to-end-use-cases/prompt-migration/notebooks/start_vllm.py

@@ -0,0 +1,63 @@
+import os
+import subprocess
+import sys
+
+from config import MODEL_CONFIGS  # Import model configurations
+
+
+def start_vllm(cuda_devices, model_name):
+    """Start vLLM server for the selected model with user-defined CUDA settings."""
+    if model_name not in MODEL_CONFIGS:
+        print(f"Error: Model '{model_name}' not found in config.")
+        print("Available models:", ", ".join(MODEL_CONFIGS.keys()))
+        sys.exit(1)
+
+    MODEL_SETTINGS = MODEL_CONFIGS[model_name]
+
+    model_path = MODEL_SETTINGS["model"].replace("hosted_vllm/", "")
+    api_base = MODEL_SETTINGS["api_base"]
+    port = MODEL_SETTINGS["port"]
+    tensor_parallel = MODEL_SETTINGS["tensor_parallel"]
+    gpu_util = MODEL_SETTINGS["gpu_util"]
+    chat_template = MODEL_SETTINGS.get("chat_template", None)
+
+    # Use provided CUDA devices or default from config
+    os.environ["CUDA_VISIBLE_DEVICES"] = (
+        cuda_devices if cuda_devices != "default" else MODEL_SETTINGS["cuda_devices"]
+    )
+    print(f"Using CUDA devices: {os.environ['CUDA_VISIBLE_DEVICES']}")
+
+    # Build vLLM serve command
+    vllm_command = (
+        f"vllm serve {model_path} "
+        f"--port {port} "
+        f"--tensor-parallel-size {tensor_parallel} "
+        f"--gpu-memory-utilization {gpu_util} "
+    )
+
+    # Add chat template flag if required
+    if chat_template:
+        vllm_command += f"--chat-template {chat_template} "
+
+    print(f"Starting vLLM server for model: {model_name}")
+    print(f"Running command: {vllm_command}")
+
+    # Run the command in a new process
+    subprocess.run(vllm_command, shell=True)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print(
+            "Usage: CUDA_VISIBLE_DEVICES=<devices> python start_vllm.py <cuda_devices> <model_name>"
+        )
+        print(
+            "Example: CUDA_VISIBLE_DEVICES=0,1 python start_vllm.py 0,1 vllm_llama_405b"
+        )
+        print("Available models:", ", ".join(MODEL_CONFIGS.keys()))
+        sys.exit(1)
+
+    cuda_devices = sys.argv[1]
+    model_name = sys.argv[2]
+
+    start_vllm(cuda_devices, model_name)