GCP Cloud Run: Serverless Containers

Cloud Run is Google Cloud's serverless container platform. It runs stateless containers that scale automatically from zero to thousands of instances, charging only for actual compute time. Understanding Cloud Run's capabilities enables building cost-effective, scalable applications.

Cloud Run Fundamentals

Key Characteristics

Cloud Run Features:
├── Fully Managed
│   ├── No infrastructure to manage
│   ├── Automatic scaling (0 to N)
│   ├── Built-in load balancing
│   └── Automatic HTTPS
│
├── Container-Based
│   ├── Any language/framework
│   ├── Standard container images
│   ├── Bring your own Dockerfile
│   └── Cloud Build integration
│
├── Pricing
│   ├── Pay per request + CPU/memory time
│   ├── Free tier: 2M requests/month
│   ├── CPU always allocated or on-demand
│   └── Minimum instances for warm starts
│
└── Networking
    ├── Public internet access
    ├── VPC connector for private resources
    ├── Custom domains
    └── Internal-only services

Configuration Options

Setting	Options	Default
CPU	0.08 - 8 vCPU	1 vCPU
Memory	128MB - 32GB	512MB
Timeout	1s - 3600s	300s
Concurrency	1 - 1000	80
Min instances	0 - 1000	0
Max instances	1 - 1000	100

Deployment

Terraform Configuration

# cloud-run.tf
resource "google_cloud_run_v2_service" "main" {
  name     = "${var.project}-api"
  location = var.region
  project  = var.project_id

  template {
    service_account = google_service_account.cloud_run.email

    scaling {
      min_instance_count = var.environment == "production" ? 1 : 0
      max_instance_count = var.environment == "production" ? 100 : 10
    }

    containers {
      image = "${var.region}-docker.pkg.dev/${var.project_id}/${var.artifact_repo}/${var.image_name}:${var.image_tag}"

      resources {
        limits = {
          cpu    = "2"
          memory = "1Gi"
        }
        cpu_idle          = var.environment != "production"
        startup_cpu_boost = true
      }

      ports {
        container_port = 8080
      }

      env {
        name  = "NODE_ENV"
        value = var.environment
      }

      env {
        name  = "LOG_LEVEL"
        value = var.environment == "production" ? "info" : "debug"
      }

      env {
        name = "DATABASE_URL"
        value_source {
          secret_key_ref {
            secret  = google_secret_manager_secret.database_url.secret_id
            version = "latest"
          }
        }
      }

      startup_probe {
        http_get {
          path = "/health"
          port = 8080
        }
        initial_delay_seconds = 5
        period_seconds        = 10
        failure_threshold     = 3
      }

      liveness_probe {
        http_get {
          path = "/health"
          port = 8080
        }
        period_seconds    = 30
        failure_threshold = 3
      }
    }

    vpc_access {
      connector = google_vpc_access_connector.main.id
      egress    = "PRIVATE_RANGES_ONLY"
    }

    timeout = "300s"

    execution_environment = "EXECUTION_ENVIRONMENT_GEN2"
  }

  traffic {
    type    = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
    percent = 100
  }

  lifecycle {
    ignore_changes = [
      template[0].containers[0].image,
      client,
      client_version
    ]
  }
}

# IAM for public access
resource "google_cloud_run_v2_service_iam_member" "public" {
  count    = var.allow_unauthenticated ? 1 : 0
  project  = var.project_id
  location = var.region
  name     = google_cloud_run_v2_service.main.name
  role     = "roles/run.invoker"
  member   = "allUsers"
}

# Custom domain mapping
resource "google_cloud_run_domain_mapping" "main" {
  name     = var.custom_domain
  location = var.region
  project  = var.project_id

  metadata {
    namespace = var.project_id
  }

  spec {
    route_name = google_cloud_run_v2_service.main.name
  }
}

# Service account
resource "google_service_account" "cloud_run" {
  account_id   = "${var.project}-run-sa"
  display_name = "Cloud Run Service Account"
  project      = var.project_id
}

# Grant access to Secret Manager
resource "google_secret_manager_secret_iam_member" "cloud_run" {
  project   = var.project_id
  secret_id = google_secret_manager_secret.database_url.secret_id
  role      = "roles/secretmanager.secretAccessor"
  member    = "serviceAccount:${google_service_account.cloud_run.email}"
}

# VPC Connector
resource "google_vpc_access_connector" "main" {
  name          = "${var.project}-connector"
  project       = var.project_id
  region        = var.region
  network       = var.vpc_network
  ip_cidr_range = "10.8.0.0/28"

  min_instances = 2
  max_instances = 10
}

Cloud Build Pipeline

# cloudbuild.yaml
steps:
  # Build container image
  - name: 'gcr.io/cloud-builders/docker'
    args:
      - 'build'
      - '-t'
      - '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}'
      - '-t'
      - '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:latest'
      - '--cache-from'
      - '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:latest'
      - '.'

  # Push to Artifact Registry
  - name: 'gcr.io/cloud-builders/docker'
    args:
      - 'push'
      - '--all-tags'
      - '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}'

  # Deploy to Cloud Run
  - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
    entrypoint: 'gcloud'
    args:
      - 'run'
      - 'deploy'
      - '${_SERVICE}'
      - '--image'
      - '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}'
      - '--region'
      - '${_REGION}'
      - '--platform'
      - 'managed'
      - '--quiet'

substitutions:
  _SERVICE: api-service
  _REGION: europe-west1
  _REPO: cloud-run-images

options:
  logging: CLOUD_LOGGING_ONLY

images:
  - '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}'
  - '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:latest'

Traffic Management

Gradual Rollouts

# traffic-splitting.tf
resource "google_cloud_run_v2_service" "main" {
  name     = "api-service"
  location = var.region

  template {
    # Current version configuration
    revision = "api-service-v2"
    # ...
  }

  traffic {
    type     = "TRAFFIC_TARGET_ALLOCATION_TYPE_REVISION"
    revision = "api-service-v1"
    percent  = 90
  }

  traffic {
    type     = "TRAFFIC_TARGET_ALLOCATION_TYPE_REVISION"
    revision = "api-service-v2"
    percent  = 10
    tag      = "canary"
  }
}

Blue-Green Deployment Script

#!/bin/bash
# deploy-blue-green.sh

SERVICE_NAME=$1
IMAGE=$2
REGION=${3:-europe-west1}

# Deploy new revision with no traffic
gcloud run deploy $SERVICE_NAME \
  --image=$IMAGE \
  --region=$REGION \
  --no-traffic \
  --tag=green

# Get the green revision URL
GREEN_URL=$(gcloud run services describe $SERVICE_NAME \
  --region=$REGION \
  --format='value(status.traffic[1].url)')

# Run smoke tests against green
echo "Running smoke tests against $GREEN_URL"
curl -f "$GREEN_URL/health" || exit 1

# Gradually shift traffic
for percent in 10 25 50 75 100; do
  echo "Shifting $percent% traffic to green"
  gcloud run services update-traffic $SERVICE_NAME \
    --region=$REGION \
    --to-tags=green=$percent

  # Wait and monitor
  sleep 30

  # Check error rate (implement your own monitoring)
  # if error_rate > threshold; then rollback
done

echo "Deployment complete"

Authentication Patterns

IAM-Based Authentication

// Calling authenticated Cloud Run service
import { GoogleAuth } from 'google-auth-library';

const auth = new GoogleAuth();

export const callCloudRunService = async (
  serviceUrl: string,
  method: string = 'GET',
  body?: any
): Promise<any> => {
  const client = await auth.getIdTokenClient(serviceUrl);

  const response = await client.request({
    url: serviceUrl,
    method,
    data: body,
    headers: {
      'Content-Type': 'application/json'
    }
  });

  return response.data;
};

// In Cloud Function calling Cloud Run
import * as functions from 'firebase-functions';

export const processData = functions.https.onRequest(async (req, res) => {
  const result = await callCloudRunService(
    process.env.CLOUD_RUN_URL!,
    'POST',
    { data: req.body }
  );

  res.json(result);
});

JWT Validation in Service

// middleware/auth.ts
import { OAuth2Client } from 'google-auth-library';

const client = new OAuth2Client();

export const validateGoogleToken = async (
  req: Request,
  res: Response,
  next: NextFunction
) => {
  const authHeader = req.headers.authorization;

  if (!authHeader?.startsWith('Bearer ')) {
    return res.status(401).json({ error: 'Missing authorization' });
  }

  const token = authHeader.substring(7);

  try {
    const ticket = await client.verifyIdToken({
      idToken: token,
      audience: process.env.CLOUD_RUN_URL
    });

    const payload = ticket.getPayload();
    req.user = {
      email: payload?.email,
      sub: payload?.sub
    };

    next();
  } catch (error) {
    return res.status(401).json({ error: 'Invalid token' });
  }
};

Cost Optimisation

Configuration for Cost

# Optimised for cost
resource "google_cloud_run_v2_service" "cost_optimised" {
  name     = "cost-optimised-service"
  location = var.region

  template {
    scaling {
      min_instance_count = 0  # Scale to zero
      max_instance_count = 10
    }

    containers {
      image = var.image

      resources {
        limits = {
          cpu    = "1"
          memory = "512Mi"
        }
        cpu_idle = true  # CPU throttled when idle
      }
    }

    timeout = "60s"  # Short timeout
  }
}

# Optimised for performance
resource "google_cloud_run_v2_service" "performance_optimised" {
  name     = "performance-optimised-service"
  location = var.region

  template {
    scaling {
      min_instance_count = 2   # Always warm
      max_instance_count = 100
    }

    containers {
      image = var.image

      resources {
        limits = {
          cpu    = "4"
          memory = "2Gi"
        }
        cpu_idle          = false  # CPU always allocated
        startup_cpu_boost = true
      }
    }
  }
}

Key Takeaways

Container portable: Standard containers work without modification
Scale to zero: Cost-effective for variable workloads
Min instances: Use for latency-sensitive applications
VPC connector: Required for private resource access
Traffic splitting: Enable gradual rollouts and testing
Gen2 execution: Better performance and features
CPU allocation: Choose based on workload pattern
Service-to-service: Use IAM for internal authentication

Cloud Run provides serverless simplicity with container flexibility. Proper configuration of scaling and CPU allocation optimises both cost and performance.