GCP Cloud Run: Serverless Containers
Running containerised applications on Cloud Run. Auto-scaling, traffic management, CI/CD integration, and cost optimisation strategies.
GCP Cloud Run: Serverless Containers
Cloud Run is Google Cloud's serverless container platform. It runs stateless containers that scale automatically from zero to thousands of instances, charging only for actual compute time. Understanding Cloud Run's capabilities enables building cost-effective, scalable applications.
Cloud Run Fundamentals
Key Characteristics
Cloud Run Features:
├── Fully Managed
│ ├── No infrastructure to manage
│ ├── Automatic scaling (0 to N)
│ ├── Built-in load balancing
│ └── Automatic HTTPS
│
├── Container-Based
│ ├── Any language/framework
│ ├── Standard container images
│ ├── Bring your own Dockerfile
│ └── Cloud Build integration
│
├── Pricing
│ ├── Pay per request + CPU/memory time
│ ├── Free tier: 2M requests/month
│ ├── CPU always allocated or on-demand
│ └── Minimum instances for warm starts
│
└── Networking
├── Public internet access
├── VPC connector for private resources
├── Custom domains
└── Internal-only servicesConfiguration Options
| Setting | Options | Default |
|---|---|---|
| CPU | 0.08 - 8 vCPU | 1 vCPU |
| Memory | 128MB - 32GB | 512MB |
| Timeout | 1s - 3600s | 300s |
| Concurrency | 1 - 1000 | 80 |
| Min instances | 0 - 1000 | 0 |
| Max instances | 1 - 1000 | 100 |
Deployment
Terraform Configuration
# cloud-run.tf
resource "google_cloud_run_v2_service" "main" {
name = "${var.project}-api"
location = var.region
project = var.project_id
template {
service_account = google_service_account.cloud_run.email
scaling {
min_instance_count = var.environment == "production" ? 1 : 0
max_instance_count = var.environment == "production" ? 100 : 10
}
containers {
image = "${var.region}-docker.pkg.dev/${var.project_id}/${var.artifact_repo}/${var.image_name}:${var.image_tag}"
resources {
limits = {
cpu = "2"
memory = "1Gi"
}
cpu_idle = var.environment != "production"
startup_cpu_boost = true
}
ports {
container_port = 8080
}
env {
name = "NODE_ENV"
value = var.environment
}
env {
name = "LOG_LEVEL"
value = var.environment == "production" ? "info" : "debug"
}
env {
name = "DATABASE_URL"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.database_url.secret_id
version = "latest"
}
}
}
startup_probe {
http_get {
path = "/health"
port = 8080
}
initial_delay_seconds = 5
period_seconds = 10
failure_threshold = 3
}
liveness_probe {
http_get {
path = "/health"
port = 8080
}
period_seconds = 30
failure_threshold = 3
}
}
vpc_access {
connector = google_vpc_access_connector.main.id
egress = "PRIVATE_RANGES_ONLY"
}
timeout = "300s"
execution_environment = "EXECUTION_ENVIRONMENT_GEN2"
}
traffic {
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
percent = 100
}
lifecycle {
ignore_changes = [
template[0].containers[0].image,
client,
client_version
]
}
}
# IAM for public access
resource "google_cloud_run_v2_service_iam_member" "public" {
count = var.allow_unauthenticated ? 1 : 0
project = var.project_id
location = var.region
name = google_cloud_run_v2_service.main.name
role = "roles/run.invoker"
member = "allUsers"
}
# Custom domain mapping
resource "google_cloud_run_domain_mapping" "main" {
name = var.custom_domain
location = var.region
project = var.project_id
metadata {
namespace = var.project_id
}
spec {
route_name = google_cloud_run_v2_service.main.name
}
}
# Service account
resource "google_service_account" "cloud_run" {
account_id = "${var.project}-run-sa"
display_name = "Cloud Run Service Account"
project = var.project_id
}
# Grant access to Secret Manager
resource "google_secret_manager_secret_iam_member" "cloud_run" {
project = var.project_id
secret_id = google_secret_manager_secret.database_url.secret_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.cloud_run.email}"
}
# VPC Connector
resource "google_vpc_access_connector" "main" {
name = "${var.project}-connector"
project = var.project_id
region = var.region
network = var.vpc_network
ip_cidr_range = "10.8.0.0/28"
min_instances = 2
max_instances = 10
}Cloud Build Pipeline
# cloudbuild.yaml
steps:
# Build container image
- name: 'gcr.io/cloud-builders/docker'
args:
- 'build'
- '-t'
- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}'
- '-t'
- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:latest'
- '--cache-from'
- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:latest'
- '.'
# Push to Artifact Registry
- name: 'gcr.io/cloud-builders/docker'
args:
- 'push'
- '--all-tags'
- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}'
# Deploy to Cloud Run
- name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
entrypoint: 'gcloud'
args:
- 'run'
- 'deploy'
- '${_SERVICE}'
- '--image'
- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}'
- '--region'
- '${_REGION}'
- '--platform'
- 'managed'
- '--quiet'
substitutions:
_SERVICE: api-service
_REGION: europe-west1
_REPO: cloud-run-images
options:
logging: CLOUD_LOGGING_ONLY
images:
- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:${SHORT_SHA}'
- '${_REGION}-docker.pkg.dev/${PROJECT_ID}/${_REPO}/${_SERVICE}:latest'Traffic Management
Gradual Rollouts
# traffic-splitting.tf
resource "google_cloud_run_v2_service" "main" {
name = "api-service"
location = var.region
template {
# Current version configuration
revision = "api-service-v2"
# ...
}
traffic {
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_REVISION"
revision = "api-service-v1"
percent = 90
}
traffic {
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_REVISION"
revision = "api-service-v2"
percent = 10
tag = "canary"
}
}Blue-Green Deployment Script
#!/bin/bash
# deploy-blue-green.sh
SERVICE_NAME=$1
IMAGE=$2
REGION=${3:-europe-west1}
# Deploy new revision with no traffic
gcloud run deploy $SERVICE_NAME \
--image=$IMAGE \
--region=$REGION \
--no-traffic \
--tag=green
# Get the green revision URL
GREEN_URL=$(gcloud run services describe $SERVICE_NAME \
--region=$REGION \
--format='value(status.traffic[1].url)')
# Run smoke tests against green
echo "Running smoke tests against $GREEN_URL"
curl -f "$GREEN_URL/health" || exit 1
# Gradually shift traffic
for percent in 10 25 50 75 100; do
echo "Shifting $percent% traffic to green"
gcloud run services update-traffic $SERVICE_NAME \
--region=$REGION \
--to-tags=green=$percent
# Wait and monitor
sleep 30
# Check error rate (implement your own monitoring)
# if error_rate > threshold; then rollback
done
echo "Deployment complete"Authentication Patterns
IAM-Based Authentication
// Calling authenticated Cloud Run service
import { GoogleAuth } from 'google-auth-library';
const auth = new GoogleAuth();
export const callCloudRunService = async (
serviceUrl: string,
method: string = 'GET',
body?: any
): Promise<any> => {
const client = await auth.getIdTokenClient(serviceUrl);
const response = await client.request({
url: serviceUrl,
method,
data: body,
headers: {
'Content-Type': 'application/json'
}
});
return response.data;
};
// In Cloud Function calling Cloud Run
import * as functions from 'firebase-functions';
export const processData = functions.https.onRequest(async (req, res) => {
const result = await callCloudRunService(
process.env.CLOUD_RUN_URL!,
'POST',
{ data: req.body }
);
res.json(result);
});JWT Validation in Service
// middleware/auth.ts
import { OAuth2Client } from 'google-auth-library';
const client = new OAuth2Client();
export const validateGoogleToken = async (
req: Request,
res: Response,
next: NextFunction
) => {
const authHeader = req.headers.authorization;
if (!authHeader?.startsWith('Bearer ')) {
return res.status(401).json({ error: 'Missing authorization' });
}
const token = authHeader.substring(7);
try {
const ticket = await client.verifyIdToken({
idToken: token,
audience: process.env.CLOUD_RUN_URL
});
const payload = ticket.getPayload();
req.user = {
email: payload?.email,
sub: payload?.sub
};
next();
} catch (error) {
return res.status(401).json({ error: 'Invalid token' });
}
};Cost Optimisation
Configuration for Cost
# Optimised for cost
resource "google_cloud_run_v2_service" "cost_optimised" {
name = "cost-optimised-service"
location = var.region
template {
scaling {
min_instance_count = 0 # Scale to zero
max_instance_count = 10
}
containers {
image = var.image
resources {
limits = {
cpu = "1"
memory = "512Mi"
}
cpu_idle = true # CPU throttled when idle
}
}
timeout = "60s" # Short timeout
}
}
# Optimised for performance
resource "google_cloud_run_v2_service" "performance_optimised" {
name = "performance-optimised-service"
location = var.region
template {
scaling {
min_instance_count = 2 # Always warm
max_instance_count = 100
}
containers {
image = var.image
resources {
limits = {
cpu = "4"
memory = "2Gi"
}
cpu_idle = false # CPU always allocated
startup_cpu_boost = true
}
}
}
}Key Takeaways
-
Container portable: Standard containers work without modification
-
Scale to zero: Cost-effective for variable workloads
-
Min instances: Use for latency-sensitive applications
-
VPC connector: Required for private resource access
-
Traffic splitting: Enable gradual rollouts and testing
-
Gen2 execution: Better performance and features
-
CPU allocation: Choose based on workload pattern
-
Service-to-service: Use IAM for internal authentication
Cloud Run provides serverless simplicity with container flexibility. Proper configuration of scaling and CPU allocation optimises both cost and performance.