| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- # Variables for minimal Amazon SageMaker deployment
- variable "aws_region" {
- description = "AWS region for deployment"
- type = string
- default = "us-east-1"
- }
- variable "project_name" {
- description = "Name of the project (used for resource naming)"
- type = string
- default = "llama-api"
- }
- variable "environment" {
- description = "Environment name (dev, staging, prod)"
- type = string
- default = "dev"
- }
- variable "model_image_uri" {
- description = "URI of the container image for model inference"
- type = string
- default = "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
- }
- variable "model_data_s3_path" {
- description = "S3 path to the model artifacts (tar.gz file)"
- type = string
- default = ""
- }
- variable "model_name" {
- description = "Name of the model for inference"
- type = string
- default = "llama-3-3-70b-instruct"
- }
- variable "instance_type" {
- description = "SageMaker instance type for hosting (use ml.m5.xlarge for CPU if GPU quota unavailable)"
- type = string
- default = "ml.p4d.24xlarge"
- }
- variable "initial_instance_count" {
- description = "Initial number of instances for the endpoint"
- type = number
- default = 1
- }
|