hace 5 meses · d03fda145d
--- a/terraform/README.md
+++ b/terraform/README.md
@@ -0,0 +1,4 @@
 
				+# Starter deployment scripts
			
 
				+Included in this folder are starter deployment scripts for both Amazon Web Services (AWS) and Google Cloud Platform (GCP). These scripts act as bare-bones deployments for Llama models in both managed and un-managed settings.
			
 
				+
			
 
				+These scripts act as a companion to the [private cloud deployment guide](/docs/open_source/private_cloud.md). The guide outlines advanced deployment techniques such as multi-region deployment and granular permissioning that may be requirements for your specific cloud deployment. Use these scripts as a starting point to build your own custom cloud deployments.
			
--- a/terraform/amazon-bedrock-default/README.md
+++ b/terraform/amazon-bedrock-default/README.md
@@ -0,0 +1,74 @@
 
				+# Amazon Bedrock deployment
			
 
				+
			
 
				+Deploy Llama 4 Scout models using Amazon Bedrock managed service.
			
 
				+
			
 
				+## Overview
			
 
				+
			
 
				+This Terraform configuration sets up a basic example deployment, demonstrating how to deploy/serve Amazon Bedrock foundation models in Amazon Web Services. Amazon Bedrock provides fully managed AI models without any infrastructure management.
			
 
				+
			
 
				+This example shows how to use basic services such as:
			
 
				+
			
 
				+- IAM roles for permissions management
			
 
				+- Service accounts for fine-grained access control
			
 
				+- Access to Bedrock Llama models in a minimal policy
			
 
				+
			
 
				+In our [architecture patterns for private cloud guide](/docs/open_source/private_cloud.md) we outline advanced patterns for cloud deployment that you may choose to implement in a more complete deployment. This includes:
			
 
				+
			
 
				+- Deployment into multiple regions or clouds
			
 
				+- Managed keys/secrets services
			
 
				+- Comprehensive logging systems for auditing and compliance
			
 
				+- Backup and recovery systems
			
 
				+
			
 
				+## Getting started
			
 
				+
			
 
				+### Prerequisites
			
 
				+
			
 
				+* AWS account with access to Amazon Bedrock
			
 
				+* Terraform installed
			
 
				+* AWS CLI configured
			
 
				+* **Model access enabled**: Go to Amazon Bedrock console → Model access → Request access for Meta Llama models
			
 
				+
			
 
				+### Deploy
			
 
				+
			
 
				+1. Configure AWS credentials:
			
 
				+   ```bash
			
 
				+   aws configure
			
 
				+   ```
			
 
				+
			
 
				+2. Edit terraform.tfvars with your values.
			
 
				+
			
 
				+3. Create configuration:
			
 
				+   ```bash
			
 
				+   cd terraform/amazon-bedrock-default
			
 
				+   cp terraform.tfvars.example terraform.tfvars
			
 
				+   ```
			
 
				+
			
 
				+4. Deploy:
			
 
				+   ```bash
			
 
				+   terraform init
			
 
				+   terraform plan
			
 
				+   terraform apply
			
 
				+   ```
			
 
				+
			
 
				+### Usage
			
 
				+
			
 
				+```python
			
 
				+import boto3
			
 
				+import json
			
 
				+
			
 
				+bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')
			
 
				+
			
 
				+response = bedrock.invoke_model(
			
 
				+    modelId='meta.llama4-scout-17b-instruct-v1:0',
			
 
				+    body=json.dumps({
			
 
				+        "prompt": "Hello, how are you?",
			
 
				+        "max_gen_len": 256,
			
 
				+        "temperature": 0.7
			
 
				+    })
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+## Next steps
			
 
				+
			
 
				+* [Amazon Bedrock User Guide](https://docs.aws.amazon.com/bedrock/)
			
 
				+* [Amazon Bedrock API Reference](https://docs.aws.amazon.com/bedrock/latest/APIReference/)
			
--- a/terraform/amazon-bedrock-default/main.tf
+++ b/terraform/amazon-bedrock-default/main.tf
@@ -0,0 +1,117 @@
 
				+# Minimal Amazon Bedrock Terraform configuration for Llama API deployment
			
 
				+# This creates only the essential IAM resources needed to access Bedrock models
			
 
				+
			
 
				+terraform {
			
 
				+  required_version = ">= 1.0"
			
 
				+  required_providers {
			
 
				+    aws = {
			
 
				+      source  = "hashicorp/aws"
			
 
				+      version = "~> 5.0"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+provider "aws" {
			
 
				+  region = var.aws_region
			
 
				+}
			
 
				+
			
 
				+# Data sources
			
 
				+data "aws_caller_identity" "current" {}
			
 
				+data "aws_region" "current" {}
			
 
				+
			
 
				+# Local values
			
 
				+locals {
			
 
				+  account_id = data.aws_caller_identity.current.account_id
			
 
				+  region     = data.aws_region.current.name
			
 
				+}
			
 
				+
			
 
				+# IAM role for Bedrock access
			
 
				+resource "aws_iam_role" "bedrock_role" {
			
 
				+  name = "${var.project_name}-bedrock-role"
			
 
				+
			
 
				+  assume_role_policy = jsonencode({
			
 
				+    Version = "2012-10-17"
			
 
				+    Statement = [
			
 
				+      {
			
 
				+        Action = "sts:AssumeRole"
			
 
				+        Effect = "Allow"
			
 
				+        Principal = {
			
 
				+          Service = [
			
 
				+            "lambda.amazonaws.com",
			
 
				+            "ec2.amazonaws.com"
			
 
				+          ]
			
 
				+        }
			
 
				+      },
			
 
				+      {
			
 
				+        Action = "sts:AssumeRole"
			
 
				+        Effect = "Allow"
			
 
				+        Principal = {
			
 
				+          AWS = "arn:aws:iam::${local.account_id}:root"
			
 
				+        }
			
 
				+      }
			
 
				+    ]
			
 
				+  })
			
 
				+
			
 
				+  tags = {
			
 
				+    Name        = "${var.project_name}-bedrock-role"
			
 
				+    Environment = var.environment
			
 
				+    ManagedBy   = "Terraform"
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+# IAM policy for Bedrock model access
			
 
				+resource "aws_iam_policy" "bedrock_access" {
			
 
				+  name        = "${var.project_name}-bedrock-access"
			
 
				+  description = "Minimal policy for accessing Bedrock Llama models"
			
 
				+
			
 
				+  policy = jsonencode({
			
 
				+    Version = "2012-10-17"
			
 
				+    Statement = [
			
 
				+      {
			
 
				+        Effect = "Allow"
			
 
				+        Action = [
			
 
				+          "bedrock:InvokeModel",
			
 
				+          "bedrock:InvokeModelWithResponseStream"
			
 
				+        ]
			
 
				+        Resource = [
			
 
				+          "arn:aws:bedrock:${local.region}::foundation-model/meta.llama4-scout-17b-instruct-v1:0"
			
 
				+        ]
			
 
				+      }
			
 
				+    ]
			
 
				+  })
			
 
				+
			
 
				+  tags = {
			
 
				+    Name        = "${var.project_name}-bedrock-access"
			
 
				+    Environment = var.environment
			
 
				+    ManagedBy   = "Terraform"
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+# Attach policy to role
			
 
				+resource "aws_iam_role_policy_attachment" "bedrock_access" {
			
 
				+  role       = aws_iam_role.bedrock_role.name
			
 
				+  policy_arn = aws_iam_policy.bedrock_access.arn
			
 
				+}
			
 
				+
			
 
				+# Optional: Create access key for programmatic access
			
 
				+resource "aws_iam_user" "bedrock_user" {
			
 
				+  count = var.create_user ? 1 : 0
			
 
				+  name  = "${var.project_name}-bedrock-user"
			
 
				+
			
 
				+  tags = {
			
 
				+    Name        = "${var.project_name}-bedrock-user"
			
 
				+    Environment = var.environment
			
 
				+    ManagedBy   = "Terraform"
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+resource "aws_iam_user_policy_attachment" "user_bedrock_access" {
			
 
				+  count      = var.create_user ? 1 : 0
			
 
				+  user       = aws_iam_user.bedrock_user[0].name
			
 
				+  policy_arn = aws_iam_policy.bedrock_access.arn
			
 
				+}
			
 
				+
			
 
				+resource "aws_iam_access_key" "bedrock_user" {
			
 
				+  count = var.create_user ? 1 : 0
			
 
				+  user  = aws_iam_user.bedrock_user[0].name
			
 
				+}
			
--- a/terraform/amazon-bedrock-default/outputs.tf
+++ b/terraform/amazon-bedrock-default/outputs.tf
@@ -0,0 +1,38 @@
 
				+# Outputs for minimal Amazon Bedrock deployment
			
 
				+
			
 
				+output "bedrock_role_arn" {
			
 
				+  description = "ARN of the IAM role for Bedrock access"
			
 
				+  value       = aws_iam_role.bedrock_role.arn
			
 
				+}
			
 
				+
			
 
				+output "bedrock_role_name" {
			
 
				+  description = "Name of the IAM role for Bedrock access"
			
 
				+  value       = aws_iam_role.bedrock_role.name
			
 
				+}
			
 
				+
			
 
				+output "bedrock_policy_arn" {
			
 
				+  description = "ARN of the IAM policy for Bedrock access"
			
 
				+  value       = aws_iam_policy.bedrock_access.arn
			
 
				+}
			
 
				+
			
 
				+output "aws_region" {
			
 
				+  description = "AWS region used for deployment"
			
 
				+  value       = var.aws_region
			
 
				+}
			
 
				+
			
 
				+output "bedrock_endpoint" {
			
 
				+  description = "Bedrock runtime endpoint URL"
			
 
				+  value       = "https://bedrock-runtime.${var.aws_region}.amazonaws.com"
			
 
				+}
			
 
				+
			
 
				+output "user_access_key_id" {
			
 
				+  description = "Access key ID for the IAM user (if created)"
			
 
				+  value       = var.create_user ? aws_iam_access_key.bedrock_user[0].id : null
			
 
				+  sensitive   = true
			
 
				+}
			
 
				+
			
 
				+output "user_secret_access_key" {
			
 
				+  description = "Secret access key for the IAM user (if created)"
			
 
				+  value       = var.create_user ? aws_iam_access_key.bedrock_user[0].secret : null
			
 
				+  sensitive   = true
			
 
				+}
			
--- a/terraform/amazon-bedrock-default/terraform.tfvars.example
+++ b/terraform/amazon-bedrock-default/terraform.tfvars.example
@@ -0,0 +1,12 @@
 
				+# Example terraform.tfvars for minimal Amazon Bedrock deployment
			
 
				+# Copy this file to terraform.tfvars and customize as needed
			
 
				+
			
 
				+# AWS Configuration
			
 
				+aws_region = "us-east-1"
			
 
				+
			
 
				+# Project Configuration
			
 
				+project_name = "my-llama-api"
			
 
				+environment  = "dev"
			
 
				+
			
 
				+# IAM User (optional)
			
 
				+create_user = false  # Set to true if you need programmatic access keys
			
--- a/terraform/amazon-bedrock-default/variables.tf
+++ b/terraform/amazon-bedrock-default/variables.tf
@@ -0,0 +1,25 @@
 
				+# Variables for minimal Amazon Bedrock deployment
			
 
				+
			
 
				+variable "aws_region" {
			
 
				+  description = "AWS region for deployment"
			
 
				+  type        = string
			
 
				+  default     = "us-east-1"
			
 
				+}
			
 
				+
			
 
				+variable "project_name" {
			
 
				+  description = "Name of the project (used for resource naming)"
			
 
				+  type        = string
			
 
				+  default     = "llama-api"
			
 
				+}
			
 
				+
			
 
				+variable "environment" {
			
 
				+  description = "Environment name (dev, staging, prod)"
			
 
				+  type        = string
			
 
				+  default     = "dev"
			
 
				+}
			
 
				+
			
 
				+variable "create_user" {
			
 
				+  description = "Whether to create IAM user with access keys for programmatic access"
			
 
				+  type        = bool
			
 
				+  default     = false
			
 
				+}
			
--- a/terraform/amazon-sagemaker-default/README.md
+++ b/terraform/amazon-sagemaker-default/README.md
@@ -0,0 +1,92 @@
 
				+# Amazon SageMaker deployment
			
 
				+
			
 
				+Deploy Llama models using Amazon SageMaker with GPU instances.
			
 
				+
			
 
				+## Overview
			
 
				+This Terraform configuration sets up a basic example deployment, demonstrating how to deploy/serve foundation models using Amazon SageMaker. Amazon SageMaker provides managed inference endpoints with auto-scaling capabilities.
			
 
				+
			
 
				+This example shows how to use basic services such as:
			
 
				+
			
 
				+- IAM roles for permissions management
			
 
				+- Service accounts for fine-grained access control
			
 
				+- Connecting model artifacts in S3 with SageMaker for deployment
			
 
				+
			
 
				+In our [architecture patterns for private cloud guide](/docs/open_source/private_cloud.md) we outline advanced patterns for cloud deployment that you may choose to implement in a more complete deployment. This includes:
			
 
				+
			
 
				+- Deployment into multiple regions or clouds
			
 
				+- Managed keys/secrets services
			
 
				+- Comprehensive logging systems for auditing and compliance
			
 
				+- Backup and recovery systems
			
 
				+
			
 
				+## Getting started
			
 
				+
			
 
				+### Prerequisites
			
 
				+
			
 
				+* AWS account with access to Amazon SageMaker
			
 
				+* Terraform installed
			
 
				+* Model artifacts packaged as `tar.gz` (see model setup below)
			
 
				+* Container image (AWS pre-built or custom ECR)
			
 
				+* A Hugging Face account with access to the appropriate models (such as Llama 3.2 1B or Llama 3.3 70B)
			
 
				+* **GPU quota**: Request quota increase for `ml.p4d.24xlarge` instances via AWS Service Quotas (default is 0)
			
 
				+
			
 
				+### Deploy
			
 
				+
			
 
				+1. Configure AWS credentials:
			
 
				+   ```bash
			
 
				+   aws configure
			
 
				+   ```
			
 
				+
			
 
				+2. Prepare Llama model artifacts:
			
 
				+   ```bash
			
 
				+   # Download model using Hugging Face CLI
			
 
				+   pip install huggingface-hub
			
 
				+   huggingface-cli download meta-llama/Llama-3.2-1B-Instruct --local-dir ./model
			
 
				+   
			
 
				+   # Package for Amazon SageMaker
			
 
				+   tar -czf model.tar.gz -C model .
			
 
				+   aws s3 cp model.tar.gz s3://your-bucket/model/
			
 
				+   ```
			
 
				+
			
 
				+3. Create configuration:
			
 
				+   ```bash
			
 
				+   cd terraform/amazon-sagemaker-default
			
 
				+   cp terraform.tfvars.example terraform.tfvars
			
 
				+   ```
			
 
				+
			
 
				+4. Edit terraform.tfvars with your model S3 path and other variables
			
 
				+
			
 
				+5. Deploy:
			
 
				+   ```bash
			
 
				+   terraform init
			
 
				+   terraform plan
			
 
				+   terraform apply
			
 
				+   ```
			
 
				+
			
 
				+### Usage
			
 
				+
			
 
				+```python
			
 
				+import boto3
			
 
				+import json
			
 
				+
			
 
				+client = boto3.client('sagemaker-runtime', region_name='us-east-1')
			
 
				+
			
 
				+response = client.invoke_endpoint(
			
 
				+    EndpointName='your-endpoint-name',
			
 
				+    ContentType='application/json',
			
 
				+    Body=json.dumps({
			
 
				+        "inputs": "Hello, how are you?",
			
 
				+        "parameters": {
			
 
				+            "max_new_tokens": 256,
			
 
				+            "temperature": 0.7
			
 
				+        }
			
 
				+    })
			
 
				+)
			
 
				+
			
 
				+result = json.loads(response['Body'].read())
			
 
				+print(result)
			
 
				+```
			
 
				+
			
 
				+## Next steps
			
 
				+
			
 
				+* [Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/)
			
 
				+* [Amazon SageMaker Runtime API](https://docs.aws.amazon.com/sagemaker/latest/APIReference/)
			
--- a/terraform/amazon-sagemaker-default/main.tf
+++ b/terraform/amazon-sagemaker-default/main.tf
@@ -0,0 +1,182 @@
 
				+# Minimal Amazon SageMaker Terraform configuration for Llama deployment
			
 
				+# This creates only the essential resources for SageMaker model deployment
			
 
				+
			
 
				+terraform {
			
 
				+  required_version = ">= 1.0"
			
 
				+  required_providers {
			
 
				+    aws = {
			
 
				+      source  = "hashicorp/aws"
			
 
				+      version = "~> 5.0"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+provider "aws" {
			
 
				+  region = var.aws_region
			
 
				+}
			
 
				+
			
 
				+# Data sources
			
 
				+data "aws_caller_identity" "current" {}
			
 
				+data "aws_region" "current" {}
			
 
				+
			
 
				+# Local values
			
 
				+locals {
			
 
				+  account_id  = data.aws_caller_identity.current.account_id
			
 
				+  region      = data.aws_region.current.name
			
 
				+  name_prefix = "${var.project_name}-${var.environment}"
			
 
				+}
			
 
				+
			
 
				+# S3 bucket for model artifacts (required for SageMaker)
			
 
				+resource "aws_s3_bucket" "model_artifacts" {
			
 
				+  bucket = "${local.name_prefix}-model-artifacts-${random_id.bucket_suffix.hex}"
			
 
				+
			
 
				+  tags = {
			
 
				+    Name        = "${local.name_prefix}-model-artifacts"
			
 
				+    Environment = var.environment
			
 
				+    ManagedBy   = "Terraform"
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+resource "random_id" "bucket_suffix" {
			
 
				+  byte_length = 4
			
 
				+}
			
 
				+
			
 
				+resource "aws_s3_bucket_public_access_block" "model_artifacts" {
			
 
				+  bucket = aws_s3_bucket.model_artifacts.id
			
 
				+
			
 
				+  block_public_acls       = true
			
 
				+  block_public_policy     = true
			
 
				+  ignore_public_acls      = true
			
 
				+  restrict_public_buckets = true
			
 
				+}
			
 
				+
			
 
				+# IAM role for SageMaker execution
			
 
				+resource "aws_iam_role" "sagemaker_execution_role" {
			
 
				+  name = "${local.name_prefix}-sagemaker-role"
			
 
				+
			
 
				+  assume_role_policy = jsonencode({
			
 
				+    Version = "2012-10-17"
			
 
				+    Statement = [
			
 
				+      {
			
 
				+        Action = "sts:AssumeRole"
			
 
				+        Effect = "Allow"
			
 
				+        Principal = {
			
 
				+          Service = "sagemaker.amazonaws.com"
			
 
				+        }
			
 
				+      }
			
 
				+    ]
			
 
				+  })
			
 
				+
			
 
				+  tags = {
			
 
				+    Name        = "${local.name_prefix}-sagemaker-role"
			
 
				+    Environment = var.environment
			
 
				+    ManagedBy   = "Terraform"
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+# IAM policy for SageMaker execution (minimal permissions)
			
 
				+resource "aws_iam_role_policy" "sagemaker_policy" {
			
 
				+  name = "${local.name_prefix}-sagemaker-policy"
			
 
				+  role = aws_iam_role.sagemaker_execution_role.id
			
 
				+
			
 
				+  policy = jsonencode({
			
 
				+    Version = "2012-10-17"
			
 
				+    Statement = [
			
 
				+      {
			
 
				+        Effect = "Allow"
			
 
				+        Action = [
			
 
				+          "s3:GetObject",
			
 
				+          "s3:ListBucket"
			
 
				+        ]
			
 
				+        Resource = [
			
 
				+          aws_s3_bucket.model_artifacts.arn,
			
 
				+          "${aws_s3_bucket.model_artifacts.arn}/*",
			
 
				+          "arn:aws:s3:::llama-model-demo-bucket",
			
 
				+          "arn:aws:s3:::llama-model-demo-bucket/*"
			
 
				+        ]
			
 
				+      },
			
 
				+      {
			
 
				+        Effect = "Allow"
			
 
				+        Action = [
			
 
				+          "ecr:GetAuthorizationToken",
			
 
				+          "ecr:BatchCheckLayerAvailability",
			
 
				+          "ecr:GetDownloadUrlForLayer",
			
 
				+          "ecr:BatchGetImage"
			
 
				+        ]
			
 
				+        Resource = "*"
			
 
				+      },
			
 
				+      {
			
 
				+        Effect = "Allow"
			
 
				+        Action = [
			
 
				+          "logs:CreateLogGroup",
			
 
				+          "logs:CreateLogStream",
			
 
				+          "logs:PutLogEvents"
			
 
				+        ]
			
 
				+        Resource = "arn:aws:logs:${local.region}:${local.account_id}:*"
			
 
				+      }
			
 
				+    ]
			
 
				+  })
			
 
				+}
			
 
				+
			
 
				+# SageMaker model
			
 
				+resource "aws_sagemaker_model" "llama_model" {
			
 
				+  name               = "${local.name_prefix}-llama-model"
			
 
				+  execution_role_arn = aws_iam_role.sagemaker_execution_role.arn
			
 
				+
			
 
				+  primary_container {
			
 
				+    image          = var.model_image_uri
			
 
				+    model_data_url = var.model_data_s3_path
			
 
				+
			
 
				+    environment = {
			
 
				+      SAGEMAKER_PROGRAM          = "inference.py"
			
 
				+      SAGEMAKER_SUBMIT_DIRECTORY = "/opt/ml/code"
			
 
				+      MODEL_NAME                 = var.model_name
			
 
				+      HF_TASK                    = "text-generation"
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  tags = {
			
 
				+    Name        = "${local.name_prefix}-llama-model"
			
 
				+    Environment = var.environment
			
 
				+    ManagedBy   = "Terraform"
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+# SageMaker endpoint configuration
			
 
				+resource "aws_sagemaker_endpoint_configuration" "llama_config" {
			
 
				+  name = "${local.name_prefix}-llama-config-${random_id.config_suffix.hex}"
			
 
				+
			
 
				+  production_variants {
			
 
				+    variant_name           = "primary"
			
 
				+    model_name             = aws_sagemaker_model.llama_model.name
			
 
				+    initial_instance_count = var.initial_instance_count
			
 
				+    instance_type          = var.instance_type
			
 
				+    initial_variant_weight = 1
			
 
				+  }
			
 
				+
			
 
				+  tags = {
			
 
				+    Name        = "${local.name_prefix}-llama-config"
			
 
				+    Environment = var.environment
			
 
				+    ManagedBy   = "Terraform"
			
 
				+  }
			
 
				+
			
 
				+  lifecycle {
			
 
				+    create_before_destroy = true
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+resource "random_id" "config_suffix" {
			
 
				+  byte_length = 4
			
 
				+}
			
 
				+
			
 
				+# SageMaker endpoint
			
 
				+resource "aws_sagemaker_endpoint" "llama_endpoint" {
			
 
				+  name                 = "${local.name_prefix}-llama-endpoint"
			
 
				+  endpoint_config_name = aws_sagemaker_endpoint_configuration.llama_config.name
			
 
				+
			
 
				+  tags = {
			
 
				+    Name        = "${local.name_prefix}-llama-endpoint"
			
 
				+    Environment = var.environment
			
 
				+    ManagedBy   = "Terraform"
			
 
				+  }
			
 
				+}
			
--- a/terraform/amazon-sagemaker-default/outputs.tf
+++ b/terraform/amazon-sagemaker-default/outputs.tf
@@ -0,0 +1,36 @@
 
				+# Outputs for minimal Amazon SageMaker deployment
			
 
				+
			
 
				+output "sagemaker_endpoint_name" {
			
 
				+  description = "Name of the SageMaker endpoint"
			
 
				+  value       = aws_sagemaker_endpoint.llama_endpoint.name
			
 
				+}
			
 
				+
			
 
				+output "sagemaker_endpoint_arn" {
			
 
				+  description = "ARN of the SageMaker endpoint"
			
 
				+  value       = aws_sagemaker_endpoint.llama_endpoint.arn
			
 
				+}
			
 
				+
			
 
				+output "sagemaker_role_arn" {
			
 
				+  description = "ARN of the SageMaker execution role"
			
 
				+  value       = aws_iam_role.sagemaker_execution_role.arn
			
 
				+}
			
 
				+
			
 
				+output "model_artifacts_bucket" {
			
 
				+  description = "S3 bucket name for model artifacts"
			
 
				+  value       = aws_s3_bucket.model_artifacts.bucket
			
 
				+}
			
 
				+
			
 
				+output "model_artifacts_bucket_arn" {
			
 
				+  description = "S3 bucket ARN for model artifacts"
			
 
				+  value       = aws_s3_bucket.model_artifacts.arn
			
 
				+}
			
 
				+
			
 
				+output "aws_region" {
			
 
				+  description = "AWS region used for deployment"
			
 
				+  value       = var.aws_region
			
 
				+}
			
 
				+
			
 
				+output "endpoint_url" {
			
 
				+  description = "SageMaker runtime endpoint URL for inference"
			
 
				+  value       = "https://runtime.sagemaker.${var.aws_region}.amazonaws.com/endpoints/${aws_sagemaker_endpoint.llama_endpoint.name}/invocations"
			
 
				+}
			
--- a/terraform/amazon-sagemaker-default/terraform.tfvars
+++ b/terraform/amazon-sagemaker-default/terraform.tfvars
@@ -0,0 +1,18 @@
 
				+# Example terraform.tfvars for minimal Amazon SageMaker deployment
			
 
				+# Copy this file to terraform.tfvars and customize as needed
			
 
				+
			
 
				+# AWS Configuration
			
 
				+aws_region = "us-west-2"
			
 
				+
			
 
				+# Project Configuration
			
 
				+project_name = "my-llama-api"
			
 
				+environment  = "dev"
			
 
				+
			
 
				+# Model Configuration
			
 
				+model_image_uri = "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:2.6.0-transformers4.51.3-gpu-py312-cu124-ubuntu22.04"
			
 
				+model_data_s3_path = "s3://llama-model-demo-bucket/model.tar.gz"
			
 
				+model_name = "Llama-3.2-1B-Instruct"
			
 
				+
			
 
				+# Instance Configuration
			
 
				+instance_type = "ml.p4d.24xlarge"  # GPU instance for Llama models, will fit larger models
			
 
				+initial_instance_count = 1
			
--- a/terraform/amazon-sagemaker-default/terraform.tfvars.example
+++ b/terraform/amazon-sagemaker-default/terraform.tfvars.example
@@ -0,0 +1,18 @@
 
				+# Example terraform.tfvars for minimal Amazon SageMaker deployment
			
 
				+# Copy this file to terraform.tfvars and customize as needed
			
 
				+
			
 
				+# AWS Configuration
			
 
				+aws_region = "us-east-1"
			
 
				+
			
 
				+# Project Configuration
			
 
				+project_name = "my-llama-api"
			
 
				+environment  = "dev"
			
 
				+
			
 
				+# Model Configuration
			
 
				+model_image_uri = "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
			
 
				+model_data_s3_path = "s3://my-bucket/model/model.tar.gz"  # Update with your model path
			
 
				+model_name = "Llama-3.3-70B-Instruct"
			
 
				+
			
 
				+# Instance Configuration
			
 
				+instance_type = "ml.p4d.24xlarge"  # GPU instance for Llama models
			
 
				+initial_instance_count = 1
			
--- a/terraform/amazon-sagemaker-default/variables.tf
+++ b/terraform/amazon-sagemaker-default/variables.tf
@@ -0,0 +1,49 @@
 
				+# Variables for minimal Amazon SageMaker deployment
			
 
				+
			
 
				+variable "aws_region" {
			
 
				+  description = "AWS region for deployment"
			
 
				+  type        = string
			
 
				+  default     = "us-east-1"
			
 
				+}
			
 
				+
			
 
				+variable "project_name" {
			
 
				+  description = "Name of the project (used for resource naming)"
			
 
				+  type        = string
			
 
				+  default     = "llama-api"
			
 
				+}
			
 
				+
			
 
				+variable "environment" {
			
 
				+  description = "Environment name (dev, staging, prod)"
			
 
				+  type        = string
			
 
				+  default     = "dev"
			
 
				+}
			
 
				+
			
 
				+variable "model_image_uri" {
			
 
				+  description = "URI of the container image for model inference"
			
 
				+  type        = string
			
 
				+  default     = "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
			
 
				+}
			
 
				+
			
 
				+variable "model_data_s3_path" {
			
 
				+  description = "S3 path to the model artifacts (tar.gz file)"
			
 
				+  type        = string
			
 
				+  default     = ""
			
 
				+}
			
 
				+
			
 
				+variable "model_name" {
			
 
				+  description = "Name of the model for inference"
			
 
				+  type        = string
			
 
				+  default     = "llama-3-3-70b-instruct"
			
 
				+}
			
 
				+
			
 
				+variable "instance_type" {
			
 
				+  description = "SageMaker instance type for hosting (use ml.m5.xlarge for CPU if GPU quota unavailable)"
			
 
				+  type        = string
			
 
				+  default     = "ml.p4d.24xlarge"
			
 
				+}
			
 
				+
			
 
				+variable "initial_instance_count" {
			
 
				+  description = "Initial number of instances for the endpoint"
			
 
				+  type        = number
			
 
				+  default     = 1
			
 
				+}
			
--- a/terraform/gcp-cloud-run-default/README.md
+++ b/terraform/gcp-cloud-run-default/README.md
@@ -0,0 +1,85 @@
 
				+# Google Cloud Platform Cloud Run deployment
			
 
				+
			
 
				+Deploy containerized Llama models using Google Cloud Run with auto-scaling.
			
 
				+
			
 
				+## Overview
			
 
				+
			
 
				+This Terraform configuration sets up a basic example deployment, demonstrating how to deploy/serve foundation models using Google Cloud Run services. Google Cloud Run provides serverless container deployment with automatic scaling. 
			
 
				+
			
 
				+This example shows how to use basic services such as:
			
 
				+
			
 
				+- IAM roles for permissions management
			
 
				+- Service accounts for fine-grained access control
			
 
				+- Containerization of Llama models on Cloud Run
			
 
				+
			
 
				+In our [architecture patterns for private cloud guide](/docs/open_source/private_cloud.md) we outline advanced patterns for cloud deployment that you may choose to implement in a more complete deployment. This includes:
			
 
				+
			
 
				+- Deployment into multiple regions or clouds
			
 
				+- Managed keys/secrets services
			
 
				+- Comprehensive logging systems for auditing and compliance
			
 
				+- Backup and recovery systems
			
 
				+
			
 
				+## Getting started
			
 
				+
			
 
				+### Prerequisites
			
 
				+
			
 
				+* GCP project with **billing account enabled** (required for Google Cloud Run and Google Cloud Artifact Registry)
			
 
				+* Terraform installed
			
 
				+* Docker container image with Llama model (see container setup below)
			
 
				+* Google Cloud CLI configured
			
 
				+* Application Default Credentials: `gcloud auth application-default login`
			
 
				+
			
 
				+### Deploy
			
 
				+
			
 
				+1. Configure GCP authentication:
			
 
				+   ```bash
			
 
				+   gcloud auth login
			
 
				+   gcloud config set project YOUR_PROJECT_ID
			
 
				+   ```
			
 
				+
			
 
				+2. Prepare container image with vLLM. For speed and simplicity's sake, we will use a small 1B parameter model. You may choose to use a larger Llama model, and if so should increase the resource requirements in your tfvars file.
			
 
				+   ```bash
			
 
				+   # Create Dockerfile
			
 
				+   cat > Dockerfile << 'EOF'
			
 
				+   FROM vllm/vllm-openai:latest
			
 
				+   ENV MODEL_NAME=meta-llama/Llama-3.2-1B-Instruct
			
 
				+   CMD ["vllm", "serve", "$MODEL_NAME", "--host", "0.0.0.0", "--port", "8080"]
			
 
				+   EOF
			
 
				+   
			
 
				+   # Build and push
			
 
				+   docker build -t llama-inference .
			
 
				+   docker tag llama-inference gcr.io/YOUR_PROJECT_ID/llama-inference:latest
			
 
				+   docker push gcr.io/YOUR_PROJECT_ID/llama-inference:latest
			
 
				+   ```
			
 
				+
			
 
				+3. Edit terraform.tfvars with your project ID and container image.
			
 
				+
			
 
				+4. Create configuration:
			
 
				+   ```bash
			
 
				+   cd terraform/gcp-cloud-run-default
			
 
				+   cp terraform.tfvars.example terraform.tfvars
			
 
				+   ```
			
 
				+
			
 
				+5. Deploy:
			
 
				+   ```bash
			
 
				+   terraform init
			
 
				+   terraform plan
			
 
				+   terraform apply
			
 
				+   ```
			
 
				+
			
 
				+### Usage
			
 
				+
			
 
				+```bash
			
 
				+# Get service URL
			
 
				+SERVICE_URL=$(terraform output -raw service_url)
			
 
				+
			
 
				+# Make request
			
 
				+curl -X POST $SERVICE_URL/predict \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{"prompt": "Hello, how are you?", "max_tokens": 100}'
			
 
				+```
			
 
				+
			
 
				+## Next steps
			
 
				+
			
 
				+* [Google Cloud Run Documentation](https://cloud.google.com/run/docs)
			
 
				+* [Google Container Registry Guide](https://cloud.google.com/container-registry/docs)
			
--- a/terraform/gcp-cloud-run-default/main.tf
+++ b/terraform/gcp-cloud-run-default/main.tf
@@ -0,0 +1,158 @@
 
				+# Minimal GCP Cloud Run Terraform configuration for Llama deployment
			
 
				+# This creates only the essential resources for Cloud Run deployment
			
 
				+
			
 
				+terraform {
			
 
				+  required_version = ">= 1.0"
			
 
				+  required_providers {
			
 
				+    google = {
			
 
				+      source  = "hashicorp/google"
			
 
				+      version = "~> 6.0"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+provider "google" {
			
 
				+  project = var.project_id
			
 
				+  region  = var.region
			
 
				+}
			
 
				+
			
 
				+# Local values
			
 
				+locals {
			
 
				+  name_prefix = "${var.project_name}-${var.environment}"
			
 
				+
			
 
				+  # Required APIs for Cloud Run
			
 
				+  required_apis = [
			
 
				+    "run.googleapis.com",
			
 
				+    "artifactregistry.googleapis.com",
			
 
				+    "iam.googleapis.com"
			
 
				+  ]
			
 
				+}
			
 
				+
			
 
				+# Enable required Google Cloud APIs
			
 
				+resource "google_project_service" "cloud_run_apis" {
			
 
				+  for_each = toset(local.required_apis)
			
 
				+
			
 
				+  project = var.project_id
			
 
				+  service = each.value
			
 
				+
			
 
				+  disable_dependent_services = false
			
 
				+  disable_on_destroy         = false
			
 
				+}
			
 
				+
			
 
				+# Artifact Registry repository for container images
			
 
				+resource "google_artifact_registry_repository" "llama_repository" {
			
 
				+  repository_id = "${local.name_prefix}-repo"
			
 
				+  format        = "DOCKER"
			
 
				+  location      = var.region
			
 
				+  description   = "Container repository for Llama inference images"
			
 
				+
			
 
				+  labels = {
			
 
				+    project     = var.project_name
			
 
				+    environment = var.environment
			
 
				+    managed-by  = "terraform"
			
 
				+  }
			
 
				+
			
 
				+  depends_on = [google_project_service.cloud_run_apis]
			
 
				+}
			
 
				+
			
 
				+# Service account for Cloud Run service
			
 
				+resource "google_service_account" "cloud_run_sa" {
			
 
				+  account_id   = "${local.name_prefix}-run-sa"
			
 
				+  display_name = "Cloud Run Service Account for ${var.project_name}"
			
 
				+  description  = "Service account for Llama Cloud Run deployment"
			
 
				+
			
 
				+  depends_on = [google_project_service.cloud_run_apis]
			
 
				+}
			
 
				+
			
 
				+# IAM role bindings for Cloud Run service account
			
 
				+resource "google_project_iam_member" "cloud_run_sa_roles" {
			
 
				+  for_each = toset([
			
 
				+    "roles/logging.logWriter",
			
 
				+    "roles/monitoring.metricWriter",
			
 
				+    "roles/artifactregistry.reader"
			
 
				+  ])
			
 
				+
			
 
				+  project = var.project_id
			
 
				+  role    = each.value
			
 
				+  member  = "serviceAccount:${google_service_account.cloud_run_sa.email}"
			
 
				+}
			
 
				+
			
 
				+# Cloud Run service
			
 
				+resource "google_cloud_run_v2_service" "llama_service" {
			
 
				+  name     = "${local.name_prefix}-service"
			
 
				+  location = var.region
			
 
				+
			
 
				+  template {
			
 
				+    service_account = google_service_account.cloud_run_sa.email
			
 
				+
			
 
				+    containers {
			
 
				+      image = var.container_image
			
 
				+
			
 
				+      # Resource allocation
			
 
				+      resources {
			
 
				+        limits = {
			
 
				+          cpu    = var.cpu_limit
			
 
				+          memory = var.memory_limit
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      # Environment variables
			
 
				+      dynamic "env" {
			
 
				+        for_each = var.environment_variables
			
 
				+        content {
			
 
				+          name  = env.key
			
 
				+          value = env.value
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      # Container port
			
 
				+      ports {
			
 
				+        container_port = var.container_port
			
 
				+        name           = "http1"
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    # Service scaling configuration
			
 
				+    scaling {
			
 
				+      min_instance_count = var.min_instances
			
 
				+      max_instance_count = var.max_instances
			
 
				+    }
			
 
				+
			
 
				+    # Execution environment
			
 
				+    execution_environment = var.execution_environment
			
 
				+  }
			
 
				+
			
 
				+  # Traffic configuration
			
 
				+  traffic {
			
 
				+    percent = 100
			
 
				+    type    = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
			
 
				+  }
			
 
				+
			
 
				+  labels = {
			
 
				+    project     = var.project_name
			
 
				+    environment = var.environment
			
 
				+    managed-by  = "terraform"
			
 
				+  }
			
 
				+
			
 
				+  depends_on = [google_project_service.cloud_run_apis]
			
 
				+}
			
 
				+
			
 
				+# IAM policy for public access (optional)
			
 
				+resource "google_cloud_run_v2_service_iam_member" "public_access" {
			
 
				+  count = var.allow_public_access ? 1 : 0
			
 
				+
			
 
				+  location = google_cloud_run_v2_service.llama_service.location
			
 
				+  name     = google_cloud_run_v2_service.llama_service.name
			
 
				+  role     = "roles/run.invoker"
			
 
				+  member   = "allUsers"
			
 
				+}
			
 
				+
			
 
				+# IAM policy for authenticated access
			
 
				+resource "google_cloud_run_v2_service_iam_member" "authenticated_access" {
			
 
				+  for_each = toset(var.allowed_members)
			
 
				+
			
 
				+  location = google_cloud_run_v2_service.llama_service.location
			
 
				+  name     = google_cloud_run_v2_service.llama_service.name
			
 
				+  role     = "roles/run.invoker"
			
 
				+  member   = each.value
			
 
				+}
			
--- a/terraform/gcp-cloud-run-default/outputs.tf
+++ b/terraform/gcp-cloud-run-default/outputs.tf
@@ -0,0 +1,31 @@
 
				+# Outputs for minimal GCP Cloud Run deployment
			
 
				+
			
 
				+output "service_url" {
			
 
				+  description = "URL of the Cloud Run service"
			
 
				+  value       = google_cloud_run_v2_service.llama_service.uri
			
 
				+}
			
 
				+
			
 
				+output "service_name" {
			
 
				+  description = "Name of the Cloud Run service"
			
 
				+  value       = google_cloud_run_v2_service.llama_service.name
			
 
				+}
			
 
				+
			
 
				+output "service_account_email" {
			
 
				+  description = "Email of the Cloud Run service account"
			
 
				+  value       = google_service_account.cloud_run_sa.email
			
 
				+}
			
 
				+
			
 
				+output "repository_url" {
			
 
				+  description = "URL of the Artifact Registry repository"
			
 
				+  value       = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.llama_repository.repository_id}"
			
 
				+}
			
 
				+
			
 
				+output "project_id" {
			
 
				+  description = "GCP project ID"
			
 
				+  value       = var.project_id
			
 
				+}
			
 
				+
			
 
				+output "region" {
			
 
				+  description = "GCP region"
			
 
				+  value       = var.region
			
 
				+}
			
--- a/terraform/gcp-cloud-run-default/terraform.tfvars.example
+++ b/terraform/gcp-cloud-run-default/terraform.tfvars.example
@@ -0,0 +1,33 @@
 
				+# Example terraform.tfvars for minimal GCP Cloud Run deployment
			
 
				+# Copy this file to terraform.tfvars and customize as needed
			
 
				+
			
 
				+# GCP Configuration
			
 
				+project_id = "your-gcp-project-id"
			
 
				+region     = "us-central1"
			
 
				+
			
 
				+# Project Configuration
			
 
				+project_name = "my-llama-api"
			
 
				+environment  = "dev"
			
 
				+
			
 
				+# Container Configuration
			
 
				+container_image = "gcr.io/cloudrun/hello"  # Replace with your Llama inference image
			
 
				+cpu_limit       = "4"
			
 
				+memory_limit    = "4Gi"
			
 
				+container_port  = 8080
			
 
				+
			
 
				+# Scaling Configuration
			
 
				+min_instances = 0
			
 
				+max_instances = 10
			
 
				+
			
 
				+# Environment Variables (optional)
			
 
				+environment_variables = {
			
 
				+  MODEL_NAME = "llama-3.2-1B-instruct"
			
 
				+  # Add other environment variables as needed
			
 
				+}
			
 
				+
			
 
				+# Access Control
			
 
				+allow_public_access = false  # Set to true for public access
			
 
				+allowed_members = [
			
 
				+  # "user:example@domain.com",
			
 
				+  # "serviceAccount:service@project.iam.gserviceaccount.com"
			
 
				+]
			
--- a/terraform/gcp-cloud-run-default/variables.tf
+++ b/terraform/gcp-cloud-run-default/variables.tf
@@ -0,0 +1,84 @@
 
				+# Variables for minimal GCP Cloud Run deployment
			
 
				+
			
 
				+variable "project_id" {
			
 
				+  description = "GCP Project ID"
			
 
				+  type        = string
			
 
				+}
			
 
				+
			
 
				+variable "region" {
			
 
				+  description = "GCP region for deployment"
			
 
				+  type        = string
			
 
				+  default     = "us-central1"
			
 
				+}
			
 
				+
			
 
				+variable "project_name" {
			
 
				+  description = "Name of the project (used for resource naming)"
			
 
				+  type        = string
			
 
				+  default     = "llama-api"
			
 
				+}
			
 
				+
			
 
				+variable "environment" {
			
 
				+  description = "Environment name (dev, staging, prod)"
			
 
				+  type        = string
			
 
				+  default     = "dev"
			
 
				+}
			
 
				+
			
 
				+variable "container_image" {
			
 
				+  description = "Container image URL for the Cloud Run service"
			
 
				+  type        = string
			
 
				+  default     = "gcr.io/cloudrun/hello"
			
 
				+}
			
 
				+
			
 
				+variable "cpu_limit" {
			
 
				+  description = "CPU limit for the container"
			
 
				+  type        = string
			
 
				+  default     = "2"
			
 
				+}
			
 
				+
			
 
				+variable "memory_limit" {
			
 
				+  description = "Memory limit for the container"
			
 
				+  type        = string
			
 
				+  default     = "2Gi"
			
 
				+}
			
 
				+
			
 
				+variable "container_port" {
			
 
				+  description = "Port that the container listens on"
			
 
				+  type        = number
			
 
				+  default     = 8080
			
 
				+}
			
 
				+
			
 
				+variable "min_instances" {
			
 
				+  description = "Minimum number of instances"
			
 
				+  type        = number
			
 
				+  default     = 0
			
 
				+}
			
 
				+
			
 
				+variable "max_instances" {
			
 
				+  description = "Maximum number of instances"
			
 
				+  type        = number
			
 
				+  default     = 10
			
 
				+}
			
 
				+
			
 
				+variable "execution_environment" {
			
 
				+  description = "Execution environment for the service"
			
 
				+  type        = string
			
 
				+  default     = "EXECUTION_ENVIRONMENT_GEN2"
			
 
				+}
			
 
				+
			
 
				+variable "environment_variables" {
			
 
				+  description = "Environment variables for the container"
			
 
				+  type        = map(string)
			
 
				+  default     = {}
			
 
				+}
			
 
				+
			
 
				+variable "allow_public_access" {
			
 
				+  description = "Whether to allow public access to the service"
			
 
				+  type        = bool
			
 
				+  default     = false
			
 
				+}
			
 
				+
			
 
				+variable "allowed_members" {
			
 
				+  description = "List of members allowed to access the service"
			
 
				+  type        = list(string)
			
 
				+  default     = []
			
 
				+}
			
--- a/terraform/gcp-vertex-ai-default/README.md
+++ b/terraform/gcp-vertex-ai-default/README.md
@@ -0,0 +1,73 @@
 
				+# GCP Vertex AI deployment
			
 
				+
			
 
				+Deploy Llama 4 Scout models using Google Cloud Vertex AI managed service.
			
 
				+
			
 
				+## Overview
			
 
				+
			
 
				+This Terraform configuration sets up a basic example deployment, demonstrating how to deploy/serve foundation models using GCP Vertex. Vertex AI provides fully managed ML services with Model-as-a-Service (MaaS) endpoints.
			
 
				+
			
 
				+This example shows how to use basic services such as:
			
 
				+
			
 
				+- IAM roles for permissions management
			
 
				+- Service accounts for fine-grained access control
			
 
				+- Creating Vertex endpoints for model serving
			
 
				+
			
 
				+In our [architecture patterns for private cloud guide](/docs/open_source/private_cloud.md) we outline advanced patterns for cloud deployment that you may choose to implement in a more complete deployment. This includes:
			
 
				+
			
 
				+- Deployment into multiple regions or clouds
			
 
				+- Managed keys/secrets services
			
 
				+- Comprehensive logging systems for auditing and compliance
			
 
				+- Backup and recovery systems
			
 
				+
			
 
				+## Getting started
			
 
				+
			
 
				+### Prerequisites
			
 
				+
			
 
				+* GCP project with **billing account enabled** (required for API activation)
			
 
				+* Terraform installed
			
 
				+* Gcloud CLI configured
			
 
				+* Application Default Credentials: `gcloud auth application-default login`
			
 
				+
			
 
				+### Deploy
			
 
				+
			
 
				+1. Configure GCP authentication:
			
 
				+   ```bash
			
 
				+   gcloud auth login
			
 
				+   gcloud config set project YOUR_PROJECT_ID
			
 
				+   ```
			
 
				+
			
 
				+2. Edit terraform.tfvars with your project ID.
			
 
				+
			
 
				+3. Create configuration:
			
 
				+   ```bash
			
 
				+   cd terraform/gcp-vertex-ai-default
			
 
				+   cp terraform.tfvars.example terraform.tfvars
			
 
				+   ```
			
 
				+
			
 
				+4. Deploy:
			
 
				+   ```bash
			
 
				+   terraform init
			
 
				+   terraform plan
			
 
				+   terraform apply
			
 
				+   ```
			
 
				+
			
 
				+### Usage
			
 
				+
			
 
				+1. Accept Llama Community License in Vertex AI Model Garden
			
 
				+2. Use Llama 4 Scout via MaaS API:
			
 
				+
			
 
				+```python
			
 
				+from google.cloud import aiplatform
			
 
				+
			
 
				+aiplatform.init(
			
 
				+    project="your-project-id",
			
 
				+    location="us-central1"
			
 
				+)
			
 
				+
			
 
				+# Model ID: meta/llama-4-scout-17b-16e-instruct-maas
			
 
				+```
			
 
				+
			
 
				+## Next steps
			
 
				+
			
 
				+* [Vertex AI Documentation](https://cloud.google.com/vertex-ai/docs)
			
 
				+* [Vertex AI Model Garden](https://console.cloud.google.com/vertex-ai/model-garden)
			
--- a/terraform/gcp-vertex-ai-default/main.tf
+++ b/terraform/gcp-vertex-ai-default/main.tf
@@ -0,0 +1,119 @@
 
				+# Minimal GCP Vertex AI Terraform configuration for Llama deployment
			
 
				+# This creates only the essential resources for Vertex AI model deployment
			
 
				+
			
 
				+terraform {
			
 
				+  required_version = ">= 1.0"
			
 
				+  required_providers {
			
 
				+    google = {
			
 
				+      source  = "hashicorp/google"
			
 
				+      version = "~> 6.0"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+provider "google" {
			
 
				+  project = var.project_id
			
 
				+  region  = var.region
			
 
				+}
			
 
				+
			
 
				+# Local values
			
 
				+locals {
			
 
				+  name_prefix = "${var.project_name}-${var.environment}"
			
 
				+
			
 
				+  # Required APIs for Vertex AI
			
 
				+  required_apis = [
			
 
				+    "aiplatform.googleapis.com",
			
 
				+    "storage.googleapis.com",
			
 
				+    "iam.googleapis.com"
			
 
				+  ]
			
 
				+}
			
 
				+
			
 
				+# Enable required Google Cloud APIs
			
 
				+resource "google_project_service" "vertex_apis" {
			
 
				+  for_each = toset(local.required_apis)
			
 
				+
			
 
				+  project = var.project_id
			
 
				+  service = each.value
			
 
				+
			
 
				+  disable_dependent_services = false
			
 
				+  disable_on_destroy         = false
			
 
				+}
			
 
				+
			
 
				+# Service Account for Vertex AI operations
			
 
				+resource "google_service_account" "vertex_ai_sa" {
			
 
				+  account_id   = "${local.name_prefix}-vertex-sa"
			
 
				+  display_name = "Vertex AI Service Account for ${var.project_name}"
			
 
				+  description  = "Service account for Vertex AI Llama model deployment"
			
 
				+
			
 
				+  depends_on = [google_project_service.vertex_apis]
			
 
				+}
			
 
				+
			
 
				+# IAM roles for the Vertex AI service account
			
 
				+resource "google_project_iam_member" "vertex_ai_user" {
			
 
				+  project = var.project_id
			
 
				+  role    = "roles/aiplatform.user"
			
 
				+  member  = "serviceAccount:${google_service_account.vertex_ai_sa.email}"
			
 
				+}
			
 
				+
			
 
				+resource "google_project_iam_member" "storage_admin" {
			
 
				+  project = var.project_id
			
 
				+  role    = "roles/storage.admin"
			
 
				+  member  = "serviceAccount:${google_service_account.vertex_ai_sa.email}"
			
 
				+}
			
 
				+
			
 
				+# Cloud Storage bucket for model artifacts
			
 
				+resource "google_storage_bucket" "vertex_artifacts" {
			
 
				+  name     = "${local.name_prefix}-vertex-artifacts-${random_id.bucket_suffix.hex}"
			
 
				+  location = var.region
			
 
				+
			
 
				+  uniform_bucket_level_access = true
			
 
				+
			
 
				+  versioning {
			
 
				+    enabled = true
			
 
				+  }
			
 
				+
			
 
				+  lifecycle_rule {
			
 
				+    condition {
			
 
				+      age = var.artifact_retention_days
			
 
				+    }
			
 
				+    action {
			
 
				+      type = "Delete"
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  labels = {
			
 
				+    project     = var.project_name
			
 
				+    environment = var.environment
			
 
				+    managed-by  = "terraform"
			
 
				+  }
			
 
				+
			
 
				+  depends_on = [google_project_service.vertex_apis]
			
 
				+}
			
 
				+
			
 
				+# Random ID for bucket naming
			
 
				+resource "random_id" "bucket_suffix" {
			
 
				+  byte_length = 4
			
 
				+}
			
 
				+
			
 
				+# Optional: Vertex AI Dataset (uncomment if needed)
			
 
				+# resource "google_vertex_ai_dataset" "llama_dataset" {
			
 
				+#   display_name        = "${local.name_prefix}-dataset"
			
 
				+#   metadata_schema_uri = "gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml"
			
 
				+#   region              = var.region
			
 
				+#   
			
 
				+#   depends_on = [google_project_service.vertex_apis]
			
 
				+# }
			
 
				+
			
 
				+# Optional: Vertex AI Endpoint (uncomment if needed)
			
 
				+# resource "google_vertex_ai_endpoint" "llama_endpoint" {
			
 
				+#   display_name = "${local.name_prefix}-endpoint"
			
 
				+#   location     = var.region
			
 
				+#   description  = "Endpoint for Llama model serving"
			
 
				+#   
			
 
				+#   labels = {
			
 
				+#     project     = var.project_name
			
 
				+#     environment = var.environment
			
 
				+#   }
			
 
				+#   
			
 
				+#   depends_on = [google_project_service.vertex_apis]
			
 
				+# }
			
--- a/terraform/gcp-vertex-ai-default/outputs.tf
+++ b/terraform/gcp-vertex-ai-default/outputs.tf
@@ -0,0 +1,31 @@
 
				+# Outputs for minimal GCP Vertex AI deployment
			
 
				+
			
 
				+output "project_id" {
			
 
				+  description = "GCP project ID"
			
 
				+  value       = var.project_id
			
 
				+}
			
 
				+
			
 
				+output "region" {
			
 
				+  description = "GCP region"
			
 
				+  value       = var.region
			
 
				+}
			
 
				+
			
 
				+output "service_account_email" {
			
 
				+  description = "Email of the Vertex AI service account"
			
 
				+  value       = google_service_account.vertex_ai_sa.email
			
 
				+}
			
 
				+
			
 
				+output "bucket_name" {
			
 
				+  description = "Name of the artifacts storage bucket"
			
 
				+  value       = google_storage_bucket.vertex_artifacts.name
			
 
				+}
			
 
				+
			
 
				+output "bucket_url" {
			
 
				+  description = "URL of the artifacts storage bucket"
			
 
				+  value       = google_storage_bucket.vertex_artifacts.url
			
 
				+}
			
 
				+
			
 
				+output "vertex_ai_region_endpoint" {
			
 
				+  description = "Vertex AI API regional endpoint"
			
 
				+  value       = "https://${var.region}-aiplatform.googleapis.com"
			
 
				+}
			
--- a/terraform/gcp-vertex-ai-default/terraform.tfvars.example
+++ b/terraform/gcp-vertex-ai-default/terraform.tfvars.example
@@ -0,0 +1,13 @@
 
				+# Example terraform.tfvars for minimal GCP Vertex AI deployment
			
 
				+# Copy this file to terraform.tfvars and customize as needed
			
 
				+
			
 
				+# GCP Configuration
			
 
				+project_id = "your-gcp-project-id"
			
 
				+region     = "us-central1"
			
 
				+
			
 
				+# Project Configuration
			
 
				+project_name = "my-llama-api"
			
 
				+environment  = "dev"
			
 
				+
			
 
				+# Storage Configuration
			
 
				+artifact_retention_days = 30
			
--- a/terraform/gcp-vertex-ai-default/variables.tf
+++ b/terraform/gcp-vertex-ai-default/variables.tf
@@ -0,0 +1,30 @@
 
				+# Variables for minimal GCP Vertex AI deployment
			
 
				+
			
 
				+variable "project_id" {
			
 
				+  description = "GCP Project ID"
			
 
				+  type        = string
			
 
				+}
			
 
				+
			
 
				+variable "region" {
			
 
				+  description = "GCP region for deployment"
			
 
				+  type        = string
			
 
				+  default     = "us-central1"
			
 
				+}
			
 
				+
			
 
				+variable "project_name" {
			
 
				+  description = "Name of the project (used for resource naming)"
			
 
				+  type        = string
			
 
				+  default     = "llama-api"
			
 
				+}
			
 
				+
			
 
				+variable "environment" {
			
 
				+  description = "Environment name (dev, staging, prod)"
			
 
				+  type        = string
			
 
				+  default     = "dev"
			
 
				+}
			
 
				+
			
 
				+variable "artifact_retention_days" {
			
 
				+  description = "Number of days to retain artifacts in the storage bucket"
			
 
				+  type        = number
			
 
				+  default     = 30
			
 
				+}