Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion examples/terraform/envoy-ratelimiter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@ Apache Beam pipelines often process data at massive scale, which can easily over

This Terraform module deploys a **centralized Rate Limit Service (RLS)** using Envoy. Beam workers can query this service to coordinate global quotas across thousands of distributed workers, ensuring you stay within safe API limits without hitting `429 Too Many Requests` errors.

Example Beam Pipelines using it:
Example Beam Python Pipelines using it:
* [Simple DoFn RateLimiter](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/rate_limiter_simple.py)
* [Vertex AI RateLimiter](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/inference/rate_limiter_vertex_ai.py)

Example Beam Java Pipelines using it:
* [Simple DoFn RateLimiter](https://github.com/apache/beam/blob/master/examples/java/src/main/java/org/apache/beam/examples/RateLimiterSimple.java)

## Architectures:
- **GKE Autopilot**: Fully managed, serverless Kubernetes environment.
- **Private Cluster**: Nodes have internal IPs only.
Expand Down Expand Up @@ -78,6 +81,8 @@ region = "us-central1" # GCP Region for deployment
cluster_name = "ratelimit-cluster" # Name of the GKE cluster
deletion_protection = true # Prevent accidental cluster deletion (set "true" for prod)
control_plane_cidr = "172.16.0.0/28" # CIDR for GKE control plane (must not overlap with subnet)
namespace = "envoy-ratelimiter" # Kubernetes namespace for deployment
enable_metrics = false # Deploy statsd-exporter sidecar
ratelimit_replicas = 1 # Initial number of Rate Limit pods
min_replicas = 1 # Minimum HPA replicas
max_replicas = 5 # Maximum HPA replicas
Expand Down Expand Up @@ -163,6 +168,8 @@ terraform destroy
|region |GCP Region for deployment |us-central1 |
|control_plane_cidr |CIDR block for GKE control plane |172.16.0.0/28 |
|cluster_name |Name of the GKE cluster |ratelimit-cluster |
|namespace |Kubernetes namespace to deploy resources into |envoy-ratelimiter |
|enable_metrics |Deploy statsd-exporter sidecar |false |
|deletion_protection |Prevent accidental cluster deletion |false |
|ratelimit_replicas |Initial number of Rate Limit pods |1 |
|min_replicas |Minimum HPA replicas |1 |
Expand Down
4 changes: 2 additions & 2 deletions examples/terraform/envoy-ratelimiter/gke.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ resource "google_container_cluster" "primary" {

# Private Cluster Configuration
private_cluster_config {
enable_private_nodes = true # Nodes have internal IPs only
enable_private_endpoint = false # Master is accessible via Public IP (required for Terraform from outside VPC)
enable_private_nodes = true # Nodes have internal IPs only
enable_private_endpoint = false # Master is accessible via Public IP (required for Terraform from outside VPC)
master_ipv4_cidr_block = var.control_plane_cidr
}
}
137 changes: 89 additions & 48 deletions examples/terraform/envoy-ratelimiter/ratelimit.tf
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,35 @@ resource "time_sleep" "wait_for_cluster" {
depends_on = [google_container_cluster.primary]
}

# Namespace
resource "kubernetes_namespace" "ratelimit_namespace" {
metadata {
name = var.namespace
}

depends_on = [time_sleep.wait_for_cluster]
}


# ConfigMap
resource "kubernetes_config_map" "ratelimit_config" {
metadata {
name = "ratelimit-config"
name = "ratelimit-config"
namespace = var.namespace
}

data = {
"config.yaml" = var.ratelimit_config_yaml
}

depends_on = [time_sleep.wait_for_cluster]
depends_on = [kubernetes_namespace.ratelimit_namespace]
}

# Redis Deployment
resource "kubernetes_deployment" "redis" {
metadata {
name = "redis"
name = "redis"
namespace = var.namespace
labels = {
app = "redis"
}
Expand Down Expand Up @@ -81,13 +93,14 @@ resource "kubernetes_deployment" "redis" {
}
}

depends_on = [time_sleep.wait_for_cluster]
depends_on = [kubernetes_namespace.ratelimit_namespace]
}

# Redis Service
resource "kubernetes_service" "redis" {
metadata {
name = "redis"
name = "redis"
namespace = var.namespace
}

spec {
Expand All @@ -101,13 +114,14 @@ resource "kubernetes_service" "redis" {
}
}

depends_on = [time_sleep.wait_for_cluster]
depends_on = [kubernetes_namespace.ratelimit_namespace]
}

# Rate Limit Deployment
resource "kubernetes_deployment" "ratelimit" {
metadata {
name = "ratelimit"
name = "ratelimit"
namespace = var.namespace
labels = {
app = "ratelimit"
}
Expand All @@ -131,8 +145,8 @@ resource "kubernetes_deployment" "ratelimit" {

spec {
container {
name = "ratelimit"
image = var.ratelimit_image
name = "ratelimit"
image = var.ratelimit_image
command = ["/bin/ratelimit"]

port {
Expand All @@ -147,7 +161,15 @@ resource "kubernetes_deployment" "ratelimit" {

env {
name = "USE_STATSD"
value = "true"
value = var.enable_metrics ? "true" : "false"
}
env {
name = "DISABLE_STATS"
value = var.enable_metrics ? "false" : "true"
}
env {
name = "LOG_FORMAT"
value = "json"
}
env {
name = "LOG_LEVEL"
Expand Down Expand Up @@ -209,28 +231,38 @@ resource "kubernetes_deployment" "ratelimit" {
}
}

container {
name = "statsd-exporter"
image = var.statsd_exporter_image

port {
name = "metrics"
container_port = 9102
}
port {
name = "statsd-udp"
container_port = 9125
protocol = "UDP"
}
# statsd-exporter does not use much resources, so setting resources to the minimum
resources {
requests = {
cpu = "50m"
memory = "64Mi"
dynamic "container" {
for_each = var.enable_metrics ? [1] : []
content {
name = "statsd-exporter"
image = var.statsd_exporter_image
args = ["--log.format=json"]

dynamic "port" {
for_each = var.enable_metrics ? [1] : []
content {
name = "metrics"
container_port = 9102
}
}
dynamic "port" {
for_each = var.enable_metrics ? [1] : []
content {
name = "statsd-udp"
container_port = 9125
protocol = "UDP"
}
}
limits = {
cpu = "100m"
memory = "128Mi"
# statsd-exporter does not use much resources, so setting resources to the minimum
resources {
requests = {
cpu = "50m"
memory = "64Mi"
}
limits = {
cpu = "100m"
memory = "128Mi"
}
}
}
}
Expand All @@ -246,7 +278,7 @@ resource "kubernetes_deployment" "ratelimit" {
}

depends_on = [
time_sleep.wait_for_cluster,
kubernetes_namespace.ratelimit_namespace,
kubernetes_config_map.ratelimit_config,
kubernetes_service.redis
]
Expand All @@ -258,7 +290,8 @@ resource "kubernetes_deployment" "ratelimit" {

resource "kubernetes_horizontal_pod_autoscaler_v2" "ratelimit" {
metadata {
name = "ratelimit-hpa"
name = "ratelimit-hpa"
namespace = var.namespace
}

spec {
Expand All @@ -274,7 +307,7 @@ resource "kubernetes_horizontal_pod_autoscaler_v2" "ratelimit" {
metric {
type = "Resource"
resource {
name = "cpu"
name = "cpu"
target {
type = "Utilization"
average_utilization = var.hpa_cpu_target_percentage
Expand All @@ -285,7 +318,7 @@ resource "kubernetes_horizontal_pod_autoscaler_v2" "ratelimit" {
metric {
type = "Resource"
resource {
name = "memory"
name = "memory"
target {
type = "Utilization"
average_utilization = var.hpa_memory_target_percentage
Expand All @@ -294,13 +327,14 @@ resource "kubernetes_horizontal_pod_autoscaler_v2" "ratelimit" {
}
}

depends_on = [time_sleep.wait_for_cluster]
depends_on = [kubernetes_namespace.ratelimit_namespace]
}

# Rate Limit Internal Service
resource "kubernetes_service" "ratelimit" {
metadata {
name = "ratelimit"
name = "ratelimit"
namespace = var.namespace
}

spec {
Expand All @@ -323,20 +357,24 @@ resource "kubernetes_service" "ratelimit" {
port = 6070
target_port = 6070
}
port {
name = "metrics"
port = 9102
target_port = 9102
dynamic "port" {
for_each = var.enable_metrics ? [1] : []
content {
name = "metrics"
port = 9102
target_port = 9102
}
}
}

depends_on = [time_sleep.wait_for_cluster]
depends_on = [kubernetes_namespace.ratelimit_namespace]
}

# Rate Limit External Service (LoadBalancer)
resource "kubernetes_service" "ratelimit_external" {
metadata {
name = "ratelimit-external"
name = "ratelimit-external"
namespace = var.namespace
annotations = {
"networking.gke.io/load-balancer-type" = "Internal"
}
Expand All @@ -360,12 +398,15 @@ resource "kubernetes_service" "ratelimit_external" {
port = 6070
target_port = 6070
}
port {
name = "metrics"
port = 9102
target_port = 9102
dynamic "port" {
for_each = var.enable_metrics ? [1] : []
content {
name = "metrics"
port = 9102
target_port = 9102
}
}
}

depends_on = [time_sleep.wait_for_cluster]
depends_on = [kubernetes_namespace.ratelimit_namespace]
}
17 changes: 14 additions & 3 deletions examples/terraform/envoy-ratelimiter/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ variable "hpa_memory_target_percentage" {
variable "ratelimit_image" {
description = "Docker image for Envoy Rate Limit service"
type = string
default = "envoyproxy/ratelimit:e9ce92cc"
default = "envoyproxy/ratelimit:e9ce92cc"
}

variable "redis_image" {
Expand All @@ -125,7 +125,7 @@ variable "statsd_exporter_image" {
variable "ratelimit_log_level" {
description = "Log level for ratelimit service"
type = string
default = "debug"
default = "info"
}

variable "ratelimit_grpc_max_connection_age" {
Expand Down Expand Up @@ -175,4 +175,15 @@ variable "ratelimit_resources" {
memory = "512Mi"
}
}
}
}
variable "namespace" {
description = "The Kubernetes namespace to deploy resources into"
type = string
default = "envoy-ratelimiter"
}

variable "enable_metrics" {
description = "Whether to deploy the statsd-exporter sidecar for Prometheus metrics"
type = bool
default = false
}