123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- import os
- MODEL_CONFIGS = {
- "vllm_llama_70b": {
- "model": "hosted_vllm/meta-llama/Llama-3.3-70B-Instruct",
- "api_base": "http://localhost:8001/v1",
- "api_key": None,
- "port": 8001,
- "cuda_devices": "4,5,6,7",
- "tensor_parallel": 4,
- "gpu_util": 0.90,
- "chat_template": None,
- },
- "vllm_llama_90b": {
- "model": "hosted_vllm/meta-llama/Llama-3.2-90B-Vision-Instruct",
- "api_base": "http://localhost:8090/v1",
- "api_key": None,
- "port": 8090,
- "cuda_devices": "4,5,6,7",
- "tensor_parallel": 4,
- "gpu_util": 0.70,
- "chat_template": None,
- },
- "vllm_llama_405b": {
- "model": "hosted_vllm/meta-llama/Llama-3.1-405B-FP8",
- "api_base": "http://localhost:8405/v1",
- "api_key": None,
- "port": 8405,
- "cuda_devices": "0,1,2,3,4,5,6,7",
- "tensor_parallel": 8,
- "gpu_util": 0.80,
- "chat_template": "./llama3_405b_chat_template.jinja",
- },
- "vllm_llama_8b": {
- "model": "hosted_vllm/meta-llama/Llama-3.1-8B-Instruct",
- "api_base": "http://localhost:8008/v1",
- "api_key": None,
- "port": 8008,
- "cuda_devices": "0",
- "tensor_parallel": 1,
- "gpu_util": 0.95,
- "chat_template": None,
- },
- "openrouter_gpt4o": {
- "model": "openrouter/openai/gpt-4o",
- "api_base": "https://openrouter.ai/api/v1",
- "api_key": os.getenv("OPENROUTER_API_KEY"),
- },
- "openrouter_gpt4o_mini": {
- "model": "openrouter/openai/gpt-4o-mini",
- "api_base": "https://openrouter.ai/api/v1",
- "api_key": os.getenv("OPENROUTER_API_KEY"),
- },
- "openrouter_llama_70b": {
- "model": "openrouter/meta-llama/llama-3.3-70b-instruct",
- "api_base": "https://openrouter.ai/api/v1",
- "api_key": os.getenv("OPENROUTER_API_KEY"),
- },
- }
|