variables.tf 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. # Variables for minimal Amazon SageMaker deployment
  2. variable "aws_region" {
  3. description = "AWS region for deployment"
  4. type = string
  5. default = "us-east-1"
  6. }
  7. variable "project_name" {
  8. description = "Name of the project (used for resource naming)"
  9. type = string
  10. default = "llama-api"
  11. }
  12. variable "environment" {
  13. description = "Environment name (dev, staging, prod)"
  14. type = string
  15. default = "dev"
  16. }
  17. variable "model_image_uri" {
  18. description = "URI of the container image for model inference"
  19. type = string
  20. default = "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
  21. }
  22. variable "model_data_s3_path" {
  23. description = "S3 path to the model artifacts (tar.gz file)"
  24. type = string
  25. default = ""
  26. }
  27. variable "model_name" {
  28. description = "Name of the model for inference"
  29. type = string
  30. default = "llama-3-3-70b-instruct"
  31. }
  32. variable "instance_type" {
  33. description = "SageMaker instance type for hosting (use ml.m5.xlarge for CPU if GPU quota unavailable)"
  34. type = string
  35. default = "ml.p4d.24xlarge"
  36. }
  37. variable "initial_instance_count" {
  38. description = "Initial number of instances for the endpoint"
  39. type = number
  40. default = 1
  41. }