@@ -37,3 +37,5 @@ To run pretrained model benchmark, follow the command below.
```
python pretrained_vllm_benchmark.py
+
+Refer to more vLLM benchmark details on their official Github repo [here](https://github.com/vllm-project/vllm/tree/main/benchmarks).
@@ -4,7 +4,6 @@
import csv
import json
import time
-import random
import threading
import numpy as np
import requests
@@ -18,7 +17,7 @@ from azure.core.exceptions import HttpResponseError
from azure.ai.contentsafety.models import AnalyzeTextOptions
from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import Dict, Tuple, List
+from typing import Tuple, List
@@ -1,7 +1,7 @@
{
"MAX_NEW_TOKENS" : 256,
"CONCURRENT_LEVELS" : [1, 2, 4, 8, 16, 32, 64, 128, 256],
- "MODEL_PATH" : "meta-llama/Meta-Llama-3-70B-Instruct",
+ "MODEL_PATH" : "meta-llama/your-model-path",
"MODEL_HEADERS" : {"Content-Type": "application/json"},
"SAFE_CHECK" : true,
"THRESHOLD_TPS" : 7,
@@ -18,7 +18,7 @@ from azure.core.exceptions import HttpResponseError
# Predefined inputs