Procházet zdrojové kódy

merge utils, added configs and sample notebook to run optimizer

Justin Lee před 11 měsíci
rodič
revize
4768a41a20

Rozdílová data souboru nebyla zobrazena, protože soubor je příliš velký
+ 5 - 3
end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/work_dir/mmlu/utils.py


+ 107 - 0
end-to-end-use-cases/prompt-migration/llama3_405b_chat_template.jinja

@@ -0,0 +1,107 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- set date_string = "26 Jul 2024" %}
+{%- endif %}
+
+{# Extract system message if available #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+
+{# System message + tool setup #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if builtin_tools is defined or tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{%- if builtin_tools is defined %}
+    {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+
+{# Handle custom tools in user messages #}
+{%- if tools_in_user_message and not tools is none %}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+    {%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+
+{# Process messages #}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- "<|python_tag|>" + tool_call.name + ".call(" }}
+            {%- for arg_name, arg_val in tool_call.arguments | items %}
+                {{- arg_name + '="' + arg_val + '"' }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+            {%- endfor %}
+            {{- ")" }}
+        {%- else %}
+            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+            {{- '{"name": "' + tool_call.name + '", ' }}
+            {{- '"parameters": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- "}" }}
+        {%- endif %}
+        {%- if builtin_tools is defined %}
+            {{- "<|eom_id|>" }}
+        {%- else %}
+            {{- "<|eot_id|>" }}
+        {%- endif %}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}

+ 59 - 0
end-to-end-use-cases/prompt-migration/notebooks/config.py

@@ -0,0 +1,59 @@
+import os
+
+MODEL_CONFIGS = {
+    "vllm_llama_70b": {
+        "model": "hosted_vllm/meta-llama/Llama-3.3-70B-Instruct",
+        "api_base": "http://localhost:8001/v1",
+        "api_key": None,
+        "port": 8001,
+        "cuda_devices": "4,5,6,7",
+        "tensor_parallel": 4,
+        "gpu_util": 0.90,
+        "chat_template": None,
+    },
+    "vllm_llama_90b": {
+        "model": "hosted_vllm/meta-llama/Llama-3.2-90B-Vision-Instruct",
+        "api_base": "http://localhost:8090/v1",
+        "api_key": None,
+        "port": 8090,
+        "cuda_devices": "4,5,6,7",
+        "tensor_parallel": 4,
+        "gpu_util": 0.70,
+        "chat_template": None,
+    },
+    "vllm_llama_405b": {
+        "model": "hosted_vllm/meta-llama/Llama-3.1-405B-FP8",
+        "api_base": "http://localhost:8405/v1",
+        "api_key": None,
+        "port": 8405,
+        "cuda_devices": "0,1,2,3,4,5,6,7",
+        "tensor_parallel": 8,
+        "gpu_util": 0.80,
+        "chat_template": "./llama3_405b_chat_template.jinja",
+    },
+    "vllm_llama_8b": {
+        "model": "hosted_vllm/meta-llama/Llama-3.1-8B-Instruct",
+        "api_base": "http://localhost:8008/v1",
+        "api_key": None,
+        "port": 8008,
+        "cuda_devices": "0",
+        "tensor_parallel": 1,
+        "gpu_util": 0.95,
+        "chat_template": None,
+    },
+    "openrouter_gpt4o": {
+        "model": "openrouter/openai/gpt-4o",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key": os.getenv("OPENROUTER_API_KEY"),
+    },
+    "openrouter_gpt4o_mini": {
+        "model": "openrouter/openai/gpt-4o-mini",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key": os.getenv("OPENROUTER_API_KEY"),
+    },
+    "openrouter_llama_70b": {
+        "model": "openrouter/meta-llama/llama-3.3-70b-instruct",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key": os.getenv("OPENROUTER_API_KEY"),
+    },
+}

+ 0 - 426
end-to-end-use-cases/prompt-migration/notebooks/inspect.ipynb

@@ -1,426 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "005b80b1-9dbc-47cd-bde3-2c0da024b73a",
-   "metadata": {},
-   "source": [
-    "Inspect"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "39e4c357-02b1-42dd-8575-9d968ba25683",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "csv_path = \"prediction_results.csv\" "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "ce23b503-3adf-47de-b637-99c74c4a6633",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Question</th>\n",
-       "      <th>Predicted Answer</th>\n",
-       "      <th>Correct Answer</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>We must assume that he is a Communist, because...</td>\n",
-       "      <td>A</td>\n",
-       "      <td>D</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>A water bed sells for $330 cash or $40 down an...</td>\n",
-       "      <td>J</td>\n",
-       "      <td>D</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>The team that wins in a tug-of-war is the team...</td>\n",
-       "      <td>H</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>32</th>\n",
-       "      <td>In a circuit of two lamps in parallel, if the ...</td>\n",
-       "      <td>H</td>\n",
-       "      <td>B</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>37</th>\n",
-       "      <td>Consider an arbitrage-free securities market m...</td>\n",
-       "      <td>B</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1127</th>\n",
-       "      <td>The annular space between two concentricalumin...</td>\n",
-       "      <td>B</td>\n",
-       "      <td>D</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1129</th>\n",
-       "      <td>A 0.1 mm thick neutral density filter gives 70...</td>\n",
-       "      <td>I</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1130</th>\n",
-       "      <td>Air (100°F, 1atm) is flowing at a velocity of ...</td>\n",
-       "      <td>A</td>\n",
-       "      <td>I</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1132</th>\n",
-       "      <td>This question refers to the following informat...</td>\n",
-       "      <td>A</td>\n",
-       "      <td>C</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1134</th>\n",
-       "      <td>Order the following (risk, return) pairs from ...</td>\n",
-       "      <td>E</td>\n",
-       "      <td>G</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>300 rows × 3 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                               Question Predicted Answer  \\\n",
-       "2     We must assume that he is a Communist, because...                A   \n",
-       "14    A water bed sells for $330 cash or $40 down an...                J   \n",
-       "16    The team that wins in a tug-of-war is the team...                H   \n",
-       "32    In a circuit of two lamps in parallel, if the ...                H   \n",
-       "37    Consider an arbitrage-free securities market m...                B   \n",
-       "...                                                 ...              ...   \n",
-       "1127  The annular space between two concentricalumin...                B   \n",
-       "1129  A 0.1 mm thick neutral density filter gives 70...                I   \n",
-       "1130  Air (100°F, 1atm) is flowing at a velocity of ...                A   \n",
-       "1132  This question refers to the following informat...                A   \n",
-       "1134  Order the following (risk, return) pairs from ...                E   \n",
-       "\n",
-       "     Correct Answer  \n",
-       "2                 D  \n",
-       "14                D  \n",
-       "16                A  \n",
-       "32                B  \n",
-       "37                A  \n",
-       "...             ...  \n",
-       "1127              D  \n",
-       "1129              A  \n",
-       "1130              I  \n",
-       "1132              C  \n",
-       "1134              G  \n",
-       "\n",
-       "[300 rows x 3 columns]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Showing first 300 rows out of 3271 incorrect answers.\n",
-      "\n",
-      "Accuracy: 72.69%\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "from tabulate import tabulate\n",
-    "from IPython.display import display\n",
-    "\n",
-    "def display_incorrect_answers(csv_path):\n",
-    "    df = pd.read_csv(csv_path)\n",
-    "    \n",
-    "    incorrect_answers = df[df['Predicted Answer'] != df['Correct Answer']]\n",
-    "    \n",
-    "    max_rows = 300  # Adjust as needed\n",
-    "    incorrect_answers_limited = incorrect_answers[['Question', 'Predicted Answer', 'Correct Answer']].head(max_rows)\n",
-    "    \n",
-    "    display(pd.DataFrame(incorrect_answers_limited))\n",
-    "    \n",
-    "    if len(incorrect_answers) > max_rows:\n",
-    "        print(f\"\\nShowing first {max_rows} rows out of {len(incorrect_answers)} incorrect answers.\")\n",
-    "    \n",
-    "    total_questions = len(df)\n",
-    "    incorrect_count = len(incorrect_answers)\n",
-    "    accuracy = ((total_questions - incorrect_count) / total_questions) * 100\n",
-    "    print(f\"\\nAccuracy: {accuracy:.2f}%\")\n",
-    "\n",
-    "display_incorrect_answers(csv_path)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "6ad0e4b2-3845-4081-a686-9d2eac98fc25",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "All 'Is Correct' rows have matching answers.\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "from IPython.display import display\n",
-    "\n",
-    "def check_correct_answers(csv_path):\n",
-    "    df = pd.read_csv(csv_path)\n",
-    "    \n",
-    "    correct_mismatches = df[(df['Is Correct'] == True) & (df['Predicted Answer'] != df['Correct Answer'])]\n",
-    "    \n",
-    "    if not correct_mismatches.empty:\n",
-    "        print(\"\\nWarning: Some 'Is Correct' rows have mismatched answers!\")\n",
-    "        display(correct_mismatches[['Question', 'Predicted Answer', 'Correct Answer', 'Is Correct']])\n",
-    "    else:\n",
-    "        print(\"\\nAll 'Is Correct' rows have matching answers.\")\n",
-    "\n",
-    " \n",
-    "check_correct_answers(csv_path)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "b2a9ed17-8e39-4f81-8a27-358e948967de",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Incorrect Rows\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Question</th>\n",
-       "      <th>Predicted Answer</th>\n",
-       "      <th>Correct Answer</th>\n",
-       "      <th>Is Correct</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>We must assume that he is a Communist, because...</td>\n",
-       "      <td>A</td>\n",
-       "      <td>D</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>A water bed sells for $330 cash or $40 down an...</td>\n",
-       "      <td>J</td>\n",
-       "      <td>D</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>The team that wins in a tug-of-war is the team...</td>\n",
-       "      <td>H</td>\n",
-       "      <td>A</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>32</th>\n",
-       "      <td>In a circuit of two lamps in parallel, if the ...</td>\n",
-       "      <td>H</td>\n",
-       "      <td>B</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>37</th>\n",
-       "      <td>Consider an arbitrage-free securities market m...</td>\n",
-       "      <td>B</td>\n",
-       "      <td>A</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11968</th>\n",
-       "      <td>Four years ago the owner of a shopping center ...</td>\n",
-       "      <td>I</td>\n",
-       "      <td>E</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11972</th>\n",
-       "      <td>An electrical current flows along a flat plate...</td>\n",
-       "      <td>F</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11976</th>\n",
-       "      <td>A recent law school graduate took and passed t...</td>\n",
-       "      <td>B</td>\n",
-       "      <td>A</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11977</th>\n",
-       "      <td>LC oscillators are used for produce a waveform...</td>\n",
-       "      <td>D</td>\n",
-       "      <td>E</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11978</th>\n",
-       "      <td>A 50-hp, 500-volt shunt motor draws a line cur...</td>\n",
-       "      <td>D</td>\n",
-       "      <td>H</td>\n",
-       "      <td>❌</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>3271 rows × 4 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                Question Predicted Answer  \\\n",
-       "2      We must assume that he is a Communist, because...                A   \n",
-       "14     A water bed sells for $330 cash or $40 down an...                J   \n",
-       "16     The team that wins in a tug-of-war is the team...                H   \n",
-       "32     In a circuit of two lamps in parallel, if the ...                H   \n",
-       "37     Consider an arbitrage-free securities market m...                B   \n",
-       "...                                                  ...              ...   \n",
-       "11968  Four years ago the owner of a shopping center ...                I   \n",
-       "11972  An electrical current flows along a flat plate...                F   \n",
-       "11976  A recent law school graduate took and passed t...                B   \n",
-       "11977  LC oscillators are used for produce a waveform...                D   \n",
-       "11978  A 50-hp, 500-volt shunt motor draws a line cur...                D   \n",
-       "\n",
-       "      Correct Answer Is Correct  \n",
-       "2                  D          ❌  \n",
-       "14                 D          ❌  \n",
-       "16                 A          ❌  \n",
-       "32                 B          ❌  \n",
-       "37                 A          ❌  \n",
-       "...              ...        ...  \n",
-       "11968              E          ❌  \n",
-       "11972            NaN          ❌  \n",
-       "11976              A          ❌  \n",
-       "11977              E          ❌  \n",
-       "11978              H          ❌  \n",
-       "\n",
-       "[3271 rows x 4 columns]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "def check_incorrect_answers(csv_path):\n",
-    "    df = pd.read_csv(csv_path)\n",
-    "    print(\"Incorrect Rows\")\n",
-    "    correct_mismatches = df[(df['Predicted Answer'] != df['Correct Answer'])]\n",
-    "    display(correct_mismatches[['Question', 'Predicted Answer', 'Correct Answer', 'Is Correct']])\n",
-    "\n",
-    "check_incorrect_answers(csv_path)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "501ed411-7145-4e8e-ae07-439fea4b38f7",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.16"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

Rozdílová data souboru nebyla zobrazena, protože soubor je příliš velký
+ 728 - 0
end-to-end-use-cases/prompt-migration/notebooks/llama_mmlu_pro.ipynb


+ 63 - 0
end-to-end-use-cases/prompt-migration/notebooks/start_vllm.py

@@ -0,0 +1,63 @@
+import os
+import subprocess
+import sys
+
+from config import MODEL_CONFIGS  # Import model configurations
+
+
+def start_vllm(cuda_devices, model_name):
+    """Start vLLM server for the selected model with user-defined CUDA settings."""
+    if model_name not in MODEL_CONFIGS:
+        print(f"Error: Model '{model_name}' not found in config.")
+        print("Available models:", ", ".join(MODEL_CONFIGS.keys()))
+        sys.exit(1)
+
+    MODEL_SETTINGS = MODEL_CONFIGS[model_name]
+
+    model_path = MODEL_SETTINGS["model"].replace("hosted_vllm/", "")
+    api_base = MODEL_SETTINGS["api_base"]
+    port = MODEL_SETTINGS["port"]
+    tensor_parallel = MODEL_SETTINGS["tensor_parallel"]
+    gpu_util = MODEL_SETTINGS["gpu_util"]
+    chat_template = MODEL_SETTINGS.get("chat_template", None)
+
+    # Use provided CUDA devices or default from config
+    os.environ["CUDA_VISIBLE_DEVICES"] = (
+        cuda_devices if cuda_devices != "default" else MODEL_SETTINGS["cuda_devices"]
+    )
+    print(f"Using CUDA devices: {os.environ['CUDA_VISIBLE_DEVICES']}")
+
+    # Build vLLM serve command
+    vllm_command = (
+        f"vllm serve {model_path} "
+        f"--port {port} "
+        f"--tensor-parallel-size {tensor_parallel} "
+        f"--gpu-memory-utilization {gpu_util} "
+    )
+
+    # Add chat template flag if required
+    if chat_template:
+        vllm_command += f"--chat-template {chat_template} "
+
+    print(f"Starting vLLM server for model: {model_name}")
+    print(f"Running command: {vllm_command}")
+
+    # Run the command in a new process
+    subprocess.run(vllm_command, shell=True)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print(
+            "Usage: CUDA_VISIBLE_DEVICES=<devices> python start_vllm.py <cuda_devices> <model_name>"
+        )
+        print(
+            "Example: CUDA_VISIBLE_DEVICES=0,1 python start_vllm.py 0,1 vllm_llama_405b"
+        )
+        print("Available models:", ", ".join(MODEL_CONFIGS.keys()))
+        sys.exit(1)
+
+    cuda_devices = sys.argv[1]
+    model_name = sys.argv[2]
+
+    start_vllm(cuda_devices, model_name)