From 077c84b67881fafa85114aa67e9fd08b5e7151f7 Mon Sep 17 00:00:00 2001 From: hsinhoyeh Date: Thu, 19 Mar 2026 10:24:25 +0800 Subject: [PATCH] feat: remove GLB and secure management domain via Caddy TLS Replace the GCP Global Load Balancer (~$648/month) with Caddy-based TLS termination. External HTTPS traffic now flows through sentinel DNAT to Caddy, which provisions Let's Encrypt certs via HTTP-01 ACME. - Remove GLB terraform resources: instance group, health check firewall, backend variables/outputs - Remove ports 8080 and 50051 from sentinel DNAT forwarded ports default (REST/gRPC no longer exposed externally, accessed via Caddy on :443) - Remove 50051 from sentinel-to-spot firewall rule (keep 8080 for sentinel health checks) --- internal/cmd/sentinel.go | 2 +- .../examples/production-consumer/main.tf | 7 +---- terraform/modules/containarium/main.tf | 18 ------------- terraform/modules/containarium/outputs.tf | 5 ---- terraform/modules/containarium/sentinel.tf | 27 +------------------ terraform/modules/containarium/variables.tf | 12 --------- 6 files changed, 3 insertions(+), 68 deletions(-) diff --git a/internal/cmd/sentinel.go b/internal/cmd/sentinel.go index 7bc844a..4d4014f 100644 --- a/internal/cmd/sentinel.go +++ b/internal/cmd/sentinel.go @@ -65,7 +65,7 @@ func init() { sentinelCmd.Flags().DurationVar(&sentinelCheckInterval, "check-interval", 15*time.Second, "Health check interval") sentinelCmd.Flags().IntVar(&sentinelHTTPPort, "http-port", 80, "Maintenance page HTTP port") sentinelCmd.Flags().IntVar(&sentinelHTTPSPort, "https-port", 443, "Maintenance page HTTPS port") - sentinelCmd.Flags().StringVar(&sentinelForwardedPorts, "forwarded-ports", "80,443,8080,50051", "Comma-separated ports to DNAT forward (port 22 handled by sshpiper)") + sentinelCmd.Flags().StringVar(&sentinelForwardedPorts, "forwarded-ports", "80,443", "Comma-separated ports to DNAT forward (port 22 handled by sshpiper)") sentinelCmd.Flags().IntVar(&sentinelHealthyThreshold, "healthy-threshold", 2, "Consecutive healthy checks before switching to proxy") sentinelCmd.Flags().IntVar(&sentinelUnhealthyThreshold, "unhealthy-threshold", 2, "Consecutive unhealthy checks before switching to maintenance") sentinelCmd.Flags().IntVar(&sentinelBinaryPort, "binary-port", 8888, "Port to serve containarium binary for spot VM downloads (0 to disable)") diff --git a/terraform/modules/containarium/examples/production-consumer/main.tf b/terraform/modules/containarium/examples/production-consumer/main.tf index 4acf056..68bb950 100644 --- a/terraform/modules/containarium/examples/production-consumer/main.tf +++ b/terraform/modules/containarium/examples/production-consumer/main.tf @@ -2,7 +2,7 @@ # Example: Production Consumer (kafeido-infra style) # ============================================================================= # This example shows how a production deployment (e.g., kafeido-infra) would -# consume the containarium module with VPC networking and GLB backend. +# consume the containarium module with VPC networking. # # Copy and adapt this for your production environment. @@ -51,8 +51,6 @@ module "containarium" { # Production features enable_iap_firewall = true - enable_health_check_firewall = true - enable_glb_backend = true jwt_secret = var.jwt_secret fail2ban_whitelist_cidr = "10.0.0.0/8" instance_tags = ["containarium-jump-server-usw1", "containarium-sentinel"] @@ -172,6 +170,3 @@ output "jump_server_ip" { value = module.containarium.jump_server_ip } -output "sentinel_instance_group" { - value = module.containarium.sentinel_instance_group -} diff --git a/terraform/modules/containarium/main.tf b/terraform/modules/containarium/main.tf index 457d6f3..37b8181 100644 --- a/terraform/modules/containarium/main.tf +++ b/terraform/modules/containarium/main.tf @@ -91,24 +91,6 @@ resource "google_compute_firewall" "allow_iap_ssh" { description = "Allow IAP SSH tunneling to Containarium instances" } -# Health check firewall (for GCP load balancer health checks) -resource "google_compute_firewall" "allow_health_check" { - count = var.enable_health_check_firewall ? 1 : 0 - name = "${var.instance_name}-allow-health-check" - network = local.network - project = var.project_id - - allow { - protocol = "tcp" - ports = ["8080", "22"] - } - - source_ranges = ["130.211.0.0/22", "35.191.0.0/16"] - target_tags = var.instance_tags - - description = "Allow GCP health check probes to Containarium instances" -} - # ----------------------------------------------------------------------------- # Regular (non-spot) VM Instance # ----------------------------------------------------------------------------- diff --git a/terraform/modules/containarium/outputs.tf b/terraform/modules/containarium/outputs.tf index a49cdeb..d0ec692 100644 --- a/terraform/modules/containarium/outputs.tf +++ b/terraform/modules/containarium/outputs.tf @@ -37,11 +37,6 @@ output "sentinel_instance_self_link" { value = local.use_sentinel ? google_compute_instance.sentinel[0].self_link : null } -output "sentinel_instance_group" { - description = "Self link of the sentinel unmanaged instance group (for GLB)" - value = local.use_sentinel && var.enable_glb_backend ? google_compute_instance_group.sentinel[0].self_link : null -} - output "spot_vm_name" { description = "Name of the spot VM" value = var.use_spot_instance ? google_compute_instance.jump_server_spot[0].name : null diff --git a/terraform/modules/containarium/sentinel.tf b/terraform/modules/containarium/sentinel.tf index 03343ee..bcb2a7e 100644 --- a/terraform/modules/containarium/sentinel.tf +++ b/terraform/modules/containarium/sentinel.tf @@ -120,7 +120,7 @@ resource "google_compute_firewall" "sentinel_to_spot" { allow { protocol = "tcp" - ports = ["22", "80", "443", "8080", "50051"] + ports = ["22", "80", "443", "8080"] } source_tags = ["containarium-sentinel"] @@ -167,28 +167,3 @@ resource "google_compute_firewall" "sentinel_mgmt_ssh" { description = "Allow SSH management to sentinel on port 2222 (port 22 handled by sshpiper)" } -# ----------------------------------------------------------------------------- -# Optional: Unmanaged Instance Group for GLB backend -# ----------------------------------------------------------------------------- - -resource "google_compute_instance_group" "sentinel" { - count = local.use_sentinel && var.enable_glb_backend ? 1 : 0 - - name = "${var.instance_name}-sentinel-group" - zone = var.zone - project = var.project_id - - instances = [ - google_compute_instance.sentinel[0].self_link, - ] - - named_port { - name = "http" - port = 8080 - } - - named_port { - name = "ssh" - port = 22 - } -} diff --git a/terraform/modules/containarium/variables.tf b/terraform/modules/containarium/variables.tf index 0572afd..32b7930 100644 --- a/terraform/modules/containarium/variables.tf +++ b/terraform/modules/containarium/variables.tf @@ -134,18 +134,6 @@ variable "enable_iap_firewall" { default = false } -variable "enable_health_check_firewall" { - description = "Create firewall rule for GCP health check IP ranges" - type = bool - default = false -} - -variable "enable_glb_backend" { - description = "Create unmanaged instance group with named ports for GLB" - type = bool - default = false -} - variable "spot_vm_external_ip" { description = "Give spot VM an ephemeral external IP (false = Cloud NAT only)" type = bool