radu
/
LLamaRecipes
spegling av https://github.com/facebookresearch/llama-recipes.git


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
							# Variables for minimal Amazon SageMaker deployment

variable "aws_region" {
  description = "AWS region for deployment"
  type        = string
  default     = "us-east-1"
}

variable "project_name" {
  description = "Name of the project (used for resource naming)"
  type        = string
  default     = "llama-api"
}

variable "environment" {
  description = "Environment name (dev, staging, prod)"
  type        = string
  default     = "dev"
}

variable "model_image_uri" {
  description = "URI of the container image for model inference"
  type        = string
  default     = "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
}

variable "model_data_s3_path" {
  description = "S3 path to the model artifacts (tar.gz file)"
  type        = string
  default     = ""
}

variable "model_name" {
  description = "Name of the model for inference"
  type        = string
  default     = "llama-3-3-70b-instruct"
}

variable "instance_type" {
  description = "SageMaker instance type for hosting (use ml.m5.xlarge for CPU if GPU quota unavailable)"
  type        = string
  default     = "ml.p4d.24xlarge"
}

variable "initial_instance_count" {
  description = "Initial number of instances for the endpoint"
  type        = number
  default     = 1
}