AWS Fargate: Serverless Container Orchestration

AWS Fargate is a serverless compute engine for containers. It removes the need to provision and manage servers, letting you focus on building applications. Fargate works with both Amazon ECS and Amazon EKS, providing flexible deployment options for containerised workloads.

Fargate Fundamentals

Fargate vs EC2 Launch Type

EC2 Launch Type:
┌─────────────────────────────────────────────────┐
│                 EC2 Instance                     │
│  ┌─────────────┐ ┌─────────────┐ ┌────────────┐ │
│  │ Container 1 │ │ Container 2 │ │ Container 3│ │
│  └─────────────┘ └─────────────┘ └────────────┘ │
│                                                  │
│  You manage: Instance sizing, patching, scaling │
└─────────────────────────────────────────────────┘

Fargate Launch Type:
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ Fargate Task    │ │ Fargate Task    │ │ Fargate Task    │
│ ┌─────────────┐ │ │ ┌─────────────┐ │ │ ┌─────────────┐ │
│ │ Container 1 │ │ │ │ Container 2 │ │ │ │ Container 3 │ │
│ └─────────────┘ │ │ └─────────────┘ │ │ └─────────────┘ │
│                 │ │                 │ │                 │
│ AWS manages     │ │ AWS manages     │ │ AWS manages     │
└─────────────────┘ └─────────────────┘ └─────────────────┘

Resource Configuration

CPU (vCPU)	Memory Options
0.25	0.5GB, 1GB, 2GB
0.5	1GB - 4GB
1	2GB - 8GB
2	4GB - 16GB
4	8GB - 30GB
8	16GB - 60GB
16	32GB - 120GB

ECS with Fargate

Task Definition

{
  "family": "api-service",
  "networkMode": "awsvpc",
  "requiresCompatibilities": ["FARGATE"],
  "cpu": "512",
  "memory": "1024",
  "executionRoleArn": "arn:aws:iam::123456789:role/ecsTaskExecutionRole",
  "taskRoleArn": "arn:aws:iam::123456789:role/apiTaskRole",
  "containerDefinitions": [
    {
      "name": "api",
      "image": "123456789.dkr.ecr.eu-west-1.amazonaws.com/api:latest",
      "essential": true,
      "portMappings": [
        {
          "containerPort": 8080,
          "protocol": "tcp"
        }
      ],
      "environment": [
        {
          "name": "NODE_ENV",
          "value": "production"
        }
      ],
      "secrets": [
        {
          "name": "DATABASE_URL",
          "valueFrom": "arn:aws:secretsmanager:eu-west-1:123456789:secret:db-url"
        }
      ],
      "logConfiguration": {
        "logDriver": "awslogs",
        "options": {
          "awslogs-group": "/ecs/api-service",
          "awslogs-region": "eu-west-1",
          "awslogs-stream-prefix": "ecs"
        }
      },
      "healthCheck": {
        "command": ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"],
        "interval": 30,
        "timeout": 5,
        "retries": 3,
        "startPeriod": 60
      }
    }
  ]
}

Service Definition with Terraform

# ecs.tf
resource "aws_ecs_cluster" "main" {
  name = "${var.project}-cluster"

  setting {
    name  = "containerInsights"
    value = "enabled"
  }

  configuration {
    execute_command_configuration {
      logging = "OVERRIDE"

      log_configuration {
        cloud_watch_log_group_name = aws_cloudwatch_log_group.ecs_exec.name
      }
    }
  }
}

resource "aws_ecs_service" "api" {
  name            = "api-service"
  cluster         = aws_ecs_cluster.main.id
  task_definition = aws_ecs_task_definition.api.arn
  desired_count   = 3
  launch_type     = "FARGATE"

  network_configuration {
    subnets          = var.private_subnet_ids
    security_groups  = [aws_security_group.ecs_tasks.id]
    assign_public_ip = false
  }

  load_balancer {
    target_group_arn = aws_lb_target_group.api.arn
    container_name   = "api"
    container_port   = 8080
  }

  deployment_configuration {
    maximum_percent         = 200
    minimum_healthy_percent = 100

    deployment_circuit_breaker {
      enable   = true
      rollback = true
    }
  }

  service_registries {
    registry_arn = aws_service_discovery_service.api.arn
  }

  enable_execute_command = true

  lifecycle {
    ignore_changes = [desired_count]
  }
}

# Auto Scaling
resource "aws_appautoscaling_target" "api" {
  max_capacity       = 10
  min_capacity       = 2
  resource_id        = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.api.name}"
  scalable_dimension = "ecs:service:DesiredCount"
  service_namespace  = "ecs"
}

resource "aws_appautoscaling_policy" "api_cpu" {
  name               = "api-cpu-scaling"
  policy_type        = "TargetTrackingScaling"
  resource_id        = aws_appautoscaling_target.api.resource_id
  scalable_dimension = aws_appautoscaling_target.api.scalable_dimension
  service_namespace  = aws_appautoscaling_target.api.service_namespace

  target_tracking_scaling_policy_configuration {
    predefined_metric_specification {
      predefined_metric_type = "ECSServiceAverageCPUUtilization"
    }

    target_value       = 70
    scale_in_cooldown  = 300
    scale_out_cooldown = 60
  }
}

resource "aws_appautoscaling_policy" "api_memory" {
  name               = "api-memory-scaling"
  policy_type        = "TargetTrackingScaling"
  resource_id        = aws_appautoscaling_target.api.resource_id
  scalable_dimension = aws_appautoscaling_target.api.scalable_dimension
  service_namespace  = aws_appautoscaling_target.api.service_namespace

  target_tracking_scaling_policy_configuration {
    predefined_metric_specification {
      predefined_metric_type = "ECSServiceAverageMemoryUtilization"
    }

    target_value       = 80
    scale_in_cooldown  = 300
    scale_out_cooldown = 60
  }
}

EKS with Fargate

Fargate Profile

# eks-fargate.tf
resource "aws_eks_fargate_profile" "main" {
  cluster_name           = aws_eks_cluster.main.name
  fargate_profile_name   = "main-profile"
  pod_execution_role_arn = aws_iam_role.fargate_pod_execution.arn

  subnet_ids = var.private_subnet_ids

  selector {
    namespace = "default"
  }

  selector {
    namespace = "application"
    labels = {
      "fargate" = "true"
    }
  }
}

resource "aws_iam_role" "fargate_pod_execution" {
  name = "${var.project}-fargate-pod-execution"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [{
      Action = "sts:AssumeRole"
      Effect = "Allow"
      Principal = {
        Service = "eks-fargate-pods.amazonaws.com"
      }
    }]
  })
}

resource "aws_iam_role_policy_attachment" "fargate_pod_execution" {
  role       = aws_iam_role.fargate_pod_execution.name
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy"
}

Kubernetes Deployment for Fargate

# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: api-service
  namespace: application
  labels:
    app: api-service
    fargate: "true"  # Matches Fargate profile selector
spec:
  replicas: 3
  selector:
    matchLabels:
      app: api-service
  template:
    metadata:
      labels:
        app: api-service
        fargate: "true"
    spec:
      serviceAccountName: api-service
      containers:
        - name: api
          image: 123456789.dkr.ecr.eu-west-1.amazonaws.com/api:latest
          ports:
            - containerPort: 8080
          resources:
            requests:
              memory: "512Mi"
              cpu: "250m"
            limits:
              memory: "1Gi"
              cpu: "500m"
          env:
            - name: NODE_ENV
              value: production
          envFrom:
            - secretRef:
                name: api-secrets
          livenessProbe:
            httpGet:
              path: /health
              port: 8080
            initialDelaySeconds: 30
            periodSeconds: 10
          readinessProbe:
            httpGet:
              path: /ready
              port: 8080
            initialDelaySeconds: 5
            periodSeconds: 5
          securityContext:
            runAsNonRoot: true
            runAsUser: 1000
            readOnlyRootFilesystem: true
            allowPrivilegeEscalation: false
      topologySpreadConstraints:
        - maxSkew: 1
          topologyKey: topology.kubernetes.io/zone
          whenUnsatisfiable: ScheduleAnyway
          labelSelector:
            matchLabels:
              app: api-service

Networking

VPC Configuration

# networking.tf
resource "aws_security_group" "ecs_tasks" {
  name        = "${var.project}-ecs-tasks"
  description = "Security group for ECS tasks"
  vpc_id      = var.vpc_id

  ingress {
    description     = "Allow traffic from ALB"
    from_port       = 8080
    to_port         = 8080
    protocol        = "tcp"
    security_groups = [aws_security_group.alb.id]
  }

  egress {
    description = "Allow all outbound traffic"
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = {
    Name = "${var.project}-ecs-tasks"
  }
}

# VPC Endpoints for Fargate in private subnets
resource "aws_vpc_endpoint" "ecr_api" {
  vpc_id              = var.vpc_id
  service_name        = "com.amazonaws.${var.region}.ecr.api"
  vpc_endpoint_type   = "Interface"
  subnet_ids          = var.private_subnet_ids
  security_group_ids  = [aws_security_group.vpc_endpoints.id]
  private_dns_enabled = true
}

resource "aws_vpc_endpoint" "ecr_dkr" {
  vpc_id              = var.vpc_id
  service_name        = "com.amazonaws.${var.region}.ecr.dkr"
  vpc_endpoint_type   = "Interface"
  subnet_ids          = var.private_subnet_ids
  security_group_ids  = [aws_security_group.vpc_endpoints.id]
  private_dns_enabled = true
}

resource "aws_vpc_endpoint" "s3" {
  vpc_id            = var.vpc_id
  service_name      = "com.amazonaws.${var.region}.s3"
  vpc_endpoint_type = "Gateway"
  route_table_ids   = var.private_route_table_ids
}

resource "aws_vpc_endpoint" "logs" {
  vpc_id              = var.vpc_id
  service_name        = "com.amazonaws.${var.region}.logs"
  vpc_endpoint_type   = "Interface"
  subnet_ids          = var.private_subnet_ids
  security_group_ids  = [aws_security_group.vpc_endpoints.id]
  private_dns_enabled = true
}

resource "aws_vpc_endpoint" "secretsmanager" {
  vpc_id              = var.vpc_id
  service_name        = "com.amazonaws.${var.region}.secretsmanager"
  vpc_endpoint_type   = "Interface"
  subnet_ids          = var.private_subnet_ids
  security_group_ids  = [aws_security_group.vpc_endpoints.id]
  private_dns_enabled = true
}

Service Discovery

# service-discovery.tf
resource "aws_service_discovery_private_dns_namespace" "main" {
  name        = "${var.project}.local"
  description = "Private DNS namespace for service discovery"
  vpc         = var.vpc_id
}

resource "aws_service_discovery_service" "api" {
  name = "api"

  dns_config {
    namespace_id = aws_service_discovery_private_dns_namespace.main.id

    dns_records {
      ttl  = 10
      type = "A"
    }

    routing_policy = "MULTIVALUE"
  }

  health_check_custom_config {
    failure_threshold = 1
  }
}

# Services can now discover each other at:
# api.project.local

Cost Optimisation

Spot Capacity with ECS

# spot-fargate.tf
resource "aws_ecs_service" "api_spot" {
  name            = "api-service"
  cluster         = aws_ecs_cluster.main.id
  task_definition = aws_ecs_task_definition.api.arn
  desired_count   = 3

  capacity_provider_strategy {
    capacity_provider = "FARGATE_SPOT"
    weight            = 4
    base              = 1  # Minimum 1 task on regular Fargate
  }

  capacity_provider_strategy {
    capacity_provider = "FARGATE"
    weight            = 1
    base              = 0
  }

  network_configuration {
    subnets          = var.private_subnet_ids
    security_groups  = [aws_security_group.ecs_tasks.id]
    assign_public_ip = false
  }
}

# Cluster capacity providers
resource "aws_ecs_cluster_capacity_providers" "main" {
  cluster_name = aws_ecs_cluster.main.name

  capacity_providers = ["FARGATE", "FARGATE_SPOT"]

  default_capacity_provider_strategy {
    capacity_provider = "FARGATE_SPOT"
    weight            = 4
    base              = 1
  }

  default_capacity_provider_strategy {
    capacity_provider = "FARGATE"
    weight            = 1
  }
}

Cost Comparison

Workload Type	Fargate	Fargate Spot	EC2	Recommendation
Production API	Higher	~70% cheaper	Lower	Fargate + Spot mix
Batch Jobs	Higher	~70% cheaper	Lower	Fargate Spot
Dev/Test	Higher	~70% cheaper	Lower	Fargate Spot
Stateful	Higher	Not suitable	Lower	EC2 or Fargate

Right-Sizing

// analyse-task-utilization.ts
interface TaskMetrics {
  taskId: string;
  cpuUtilization: number;
  memoryUtilization: number;
  configuredCpu: number;
  configuredMemory: number;
}

const analyseAndRecommend = (metrics: TaskMetrics[]): Recommendation[] => {
  const recommendations: Recommendation[] = [];

  const avgCpu = metrics.reduce((sum, m) => sum + m.cpuUtilization, 0) / metrics.length;
  const avgMemory = metrics.reduce((sum, m) => sum + m.memoryUtilization, 0) / metrics.length;

  const configuredCpu = metrics[0].configuredCpu;
  const configuredMemory = metrics[0].configuredMemory;

  // CPU recommendation
  if (avgCpu < 30) {
    recommendations.push({
      type: 'cpu',
      current: configuredCpu,
      recommended: Math.max(256, configuredCpu / 2),
      reason: `Average CPU utilization is ${avgCpu.toFixed(1)}%`
    });
  }

  // Memory recommendation
  if (avgMemory < 40) {
    recommendations.push({
      type: 'memory',
      current: configuredMemory,
      recommended: Math.max(512, configuredMemory / 2),
      reason: `Average memory utilization is ${avgMemory.toFixed(1)}%`
    });
  }

  return recommendations;
};

Observability

CloudWatch Container Insights

# container-insights.tf
resource "aws_ecs_cluster" "main" {
  name = "${var.project}-cluster"

  setting {
    name  = "containerInsights"
    value = "enabled"
  }
}

# CloudWatch dashboard
resource "aws_cloudwatch_dashboard" "ecs" {
  dashboard_name = "${var.project}-ecs"

  dashboard_body = jsonencode({
    widgets = [
      {
        type   = "metric"
        x      = 0
        y      = 0
        width  = 12
        height = 6
        properties = {
          metrics = [
            ["ECS/ContainerInsights", "CpuUtilized", "ServiceName", "api-service", "ClusterName", aws_ecs_cluster.main.name],
            [".", "CpuReserved", ".", ".", ".", "."]
          ]
          title  = "CPU Utilization"
          region = var.region
        }
      },
      {
        type   = "metric"
        x      = 12
        y      = 0
        width  = 12
        height = 6
        properties = {
          metrics = [
            ["ECS/ContainerInsights", "MemoryUtilized", "ServiceName", "api-service", "ClusterName", aws_ecs_cluster.main.name],
            [".", "MemoryReserved", ".", ".", ".", "."]
          ]
          title  = "Memory Utilization"
          region = var.region
        }
      }
    ]
  })
}

Application Logging

// logging.ts
import winston from 'winston';

const logger = winston.createLogger({
  level: process.env.LOG_LEVEL || 'info',
  format: winston.format.combine(
    winston.format.timestamp(),
    winston.format.json()
  ),
  defaultMeta: {
    service: process.env.SERVICE_NAME,
    version: process.env.APP_VERSION,
    taskId: process.env.ECS_TASK_ID
  },
  transports: [
    new winston.transports.Console()
  ]
});

// Request logging middleware
const requestLogger = (req: Request, res: Response, next: NextFunction) => {
  const startTime = Date.now();

  res.on('finish', () => {
    logger.info('HTTP Request', {
      method: req.method,
      path: req.path,
      statusCode: res.statusCode,
      duration: Date.now() - startTime,
      userAgent: req.get('user-agent'),
      requestId: req.get('x-request-id')
    });
  });

  next();
};

Key Takeaways

Right launch type: Use Fargate for simplicity, EC2 for cost optimisation at scale
Use Spot wisely: Mix Fargate Spot with regular Fargate for cost savings with reliability
VPC endpoints: Essential for Fargate in private subnets without NAT Gateway costs
Right-size tasks: Monitor CPU/memory utilisation and adjust configurations
Auto-scaling: Configure both target tracking and step scaling for optimal performance
Service discovery: Use Cloud Map for service-to-service communication
Health checks: Configure appropriate health checks for graceful deployments
Security: Use task roles, secrets manager, and security groups properly

Fargate eliminates infrastructure management overhead while providing the flexibility of containers. Choose the right orchestrator (ECS vs EKS) based on your team's Kubernetes expertise and operational requirements.