From 832643f0ef4cd87bd149e54db2c3cb990e5860a6 Mon Sep 17 00:00:00 2001 From: mohitrajain Date: Wed, 24 Dec 2025 17:26:55 +0100 Subject: [PATCH 1/2] fix: wpb-21356 fix node types for assethost and adminhost --- .../examples/wiab-staging-hetzner/.envrc | 5 + .../examples/wiab-staging-hetzner/README.md | 5 + .../examples/wiab-staging-hetzner/main.tf | 227 ++++++++++++++++++ .../examples/wiab-staging-hetzner/outputs.tf | 114 +++++++++ .../wiab-staging-hetzner/setup_nodes.yml | 206 ++++++++++++++++ .../examples/wiab-staging-hetzner/versions.tf | 8 + 6 files changed, 565 insertions(+) create mode 100644 terraform/examples/wiab-staging-hetzner/.envrc create mode 100644 terraform/examples/wiab-staging-hetzner/README.md create mode 100644 terraform/examples/wiab-staging-hetzner/main.tf create mode 100644 terraform/examples/wiab-staging-hetzner/outputs.tf create mode 100644 terraform/examples/wiab-staging-hetzner/setup_nodes.yml create mode 100644 terraform/examples/wiab-staging-hetzner/versions.tf diff --git a/terraform/examples/wiab-staging-hetzner/.envrc b/terraform/examples/wiab-staging-hetzner/.envrc new file mode 100644 index 000000000..8a29ee6d6 --- /dev/null +++ b/terraform/examples/wiab-staging-hetzner/.envrc @@ -0,0 +1,5 @@ +[[ -f .envrc.local ]] && source_env .envrc.local +# You can set this in .envrc.local to keep it out of VCS +export HCLOUD_TOKEN +source_up + diff --git a/terraform/examples/wiab-staging-hetzner/README.md b/terraform/examples/wiab-staging-hetzner/README.md new file mode 100644 index 000000000..6539dbf65 --- /dev/null +++ b/terraform/examples/wiab-staging-hetzner/README.md @@ -0,0 +1,5 @@ +# Wire-in-a-box-staging-hetzner + +This environment is dynamically provisioned to validate the wiab-staging solution, developed as a follow-up to our HA architecture in which datastore and Kubernetes VMs are physically failure-resilient. + +For wiab-staging, all components are deliberately colocated on a single physical node, resulting in zero physical redundancy and a single point of failure. This design is intentional and suitable only for staging and testing, not production deployments. diff --git a/terraform/examples/wiab-staging-hetzner/main.tf b/terraform/examples/wiab-staging-hetzner/main.tf new file mode 100644 index 000000000..890ddd7fa --- /dev/null +++ b/terraform/examples/wiab-staging-hetzner/main.tf @@ -0,0 +1,227 @@ +locals { + rfc1918_cidr = "10.0.0.0/8" + kubenode_count = 3 + datanode_count = 3 + ssh_keys = [hcloud_ssh_key.adminhost.name] + + # Location preferences with fallbacks (EU only) + preferred_locations = ["fsn1", "hel1", "nbg1"] + + # Server type preferences with fallbacks (optimized for availability) + preferred_server_types = { + small = ["cx33", "cpx22", "cx43"] # For assethost and adminhost + medium = ["cx43", "cx53", "cpx42"] # For datanodes and k8s_nodes + } +} + +# Get available server types and locations +data "hcloud_server_types" "available" {} +data "hcloud_datacenters" "available" {} + +# Helper locals to select available resources with robust fallback logic +locals { + available_server_type_names = [for st in data.hcloud_server_types.available.server_types : st.name] + available_location_names = [for dc in data.hcloud_datacenters.available.datacenters : dc.location.name] + + # Select the first available location from the preference list + available_preferred_locations = [ + for preferred in local.preferred_locations : + preferred if contains(local.available_location_names, preferred) + ] + selected_location = length(local.available_preferred_locations) > 0 ? local.available_preferred_locations[0] : null + + # Select the first available server type from the preference list (with validation) + available_small_server_types = [ + for preferred in local.preferred_server_types.small : + preferred if contains(local.available_server_type_names, preferred) + ] + small_server_type = length(local.available_small_server_types) > 0 ? local.available_small_server_types[0] : null + + available_medium_server_types = [ + for preferred in local.preferred_server_types.medium : + preferred if contains(local.available_server_type_names, preferred) + ] + medium_server_type = length(local.available_medium_server_types) > 0 ? local.available_medium_server_types[0] : null +} + +# Validation checks - fail early with helpful error messages +resource "null_resource" "location_validation" { + count = local.selected_location != null ? 0 : 1 + + provisioner "local-exec" { + command = <<-EOT + echo "DEPLOYMENT FAILED: No suitable location available" + echo "Requested locations: ${join(", ", local.preferred_locations)}" + echo "Available locations: ${join(", ", local.available_location_names)}" + echo "Please check Hetzner Cloud region availability" + exit 1 + EOT + } +} + +resource "null_resource" "small_server_type_validation" { + count = local.small_server_type != null ? 0 : 1 + + provisioner "local-exec" { + command = <<-EOT + echo "DEPLOYMENT FAILED: No suitable database server types available" + echo "Requested types: ${join(", ", local.preferred_server_types.small)}" + echo "Available types: ${join(", ", local.available_server_type_names)}" + echo "Please check server type availability in the selected region" + exit 1 + EOT + } +} + +resource "null_resource" "medium_server_type_validation" { + count = local.medium_server_type != null ? 0 : 1 + + provisioner "local-exec" { + command = <<-EOT + echo "DEPLOYMENT FAILED: No suitable Kubernetes server types available" + echo "Requested types: ${join(", ", local.preferred_server_types.medium)}" + echo "Available types: ${join(", ", local.available_server_type_names)}" + echo "Please check server type availability in the selected region" + exit 1 + EOT + } +} + +resource "null_resource" "deployment_info" { + depends_on = [ + null_resource.location_validation, + null_resource.small_server_type_validation, + null_resource.medium_server_type_validation + ] + + provisioner "local-exec" { + command = <<-EOT + echo "VALIDATION PASSED: Deploying Wire offline infrastructure" + echo "Location: ${local.selected_location}" + echo "Database server type: ${local.medium_server_type}" + echo "Kubernetes server type: ${local.medium_server_type}" + echo "Total instances: ${local.datanode_count + local.kubenode_count + 2}" + EOT + } +} + +resource "random_pet" "main" { + depends_on = [null_resource.deployment_info] +} + +resource "hcloud_network" "main" { + name = "main-${random_pet.main.id}" + ip_range = cidrsubnet(local.rfc1918_cidr, 8, 1) +} + +resource "hcloud_network_subnet" "main" { + network_id = hcloud_network.main.id + type = "cloud" + network_zone = "eu-central" + ip_range = cidrsubnet(hcloud_network.main.ip_range, 8, 1) +} + + +resource "random_pet" "adminhost" { +} + +resource "tls_private_key" "admin" { + algorithm = "ECDSA" + ecdsa_curve = "P256" +} + +resource "hcloud_ssh_key" "adminhost" { + name = "adminhost-${random_pet.adminhost.id}" + public_key = tls_private_key.admin.public_key_openssh +} + +# Connected to all other servers. Simulates the admin's "laptop" +resource "hcloud_server" "adminhost" { + depends_on = [ + null_resource.deployment_info, + hcloud_network_subnet.main + ] + location = local.selected_location + name = "adminhost-${random_pet.adminhost.id}" + image = "ubuntu-22.04" + ssh_keys = local.ssh_keys + server_type = local.small_server_type + network { + network_id = hcloud_network.main.id + ip = "" + } +} + +# The server hosting all the bootstrap assets +resource "random_pet" "assethost" { +} + +resource "hcloud_server" "assethost" { + depends_on = [ + null_resource.deployment_info, + hcloud_network_subnet.main + ] + location = local.selected_location + name = "assethost-${random_pet.assethost.id}" + image = "ubuntu-22.04" + ssh_keys = local.ssh_keys + server_type = local.small_server_type + public_net { + ipv4_enabled = false + ipv6_enabled = false + } + network { + network_id = hcloud_network.main.id + ip = "" + } +} + +resource "random_pet" "kubenode" { + count = local.kubenode_count +} + +resource "hcloud_server" "kubenode" { + depends_on = [ + null_resource.deployment_info, + hcloud_network_subnet.main + ] + count = local.kubenode_count + location = local.selected_location + name = "kubenode-${random_pet.kubenode[count.index].id}" + image = "ubuntu-22.04" + ssh_keys = local.ssh_keys + server_type = local.medium_server_type + public_net { + ipv4_enabled = false + ipv6_enabled = false + } + network { + network_id = hcloud_network.main.id + ip = "" + } +} + +resource "random_pet" "datanode" { + count = local.datanode_count +} + +resource "hcloud_server" "datanode" { + depends_on = [ + null_resource.deployment_info, + hcloud_network_subnet.main + ] + count = local.datanode_count + location = local.selected_location + name = "datanode-${random_pet.datanode[count.index].id}" + image = "ubuntu-22.04" + ssh_keys = local.ssh_keys + server_type = local.medium_server_type + public_net { + ipv4_enabled = false + ipv6_enabled = false + } + network { + network_id = hcloud_network.main.id + ip = "" + } +} diff --git a/terraform/examples/wiab-staging-hetzner/outputs.tf b/terraform/examples/wiab-staging-hetzner/outputs.tf new file mode 100644 index 000000000..c758df645 --- /dev/null +++ b/terraform/examples/wiab-staging-hetzner/outputs.tf @@ -0,0 +1,114 @@ +output "ssh_private_key" { + sensitive = true + value = tls_private_key.admin.private_key_pem +} + +output "selected_server_types" { + description = "Server types selected after checking availability" + value = { + small_server_type = local.small_server_type + medium_server_type = local.medium_server_type + } +} + +output "selected_location" { + description = "Location selected after checking availability" + value = local.selected_location +} + +output "resource_fallback_info" { + description = "Information about resource fallback selections" + value = { + requested_locations = local.preferred_locations + available_locations = local.available_location_names + selected_location = local.selected_location + + requested_small_types = local.preferred_server_types.small + available_small_types = local.available_small_server_types + selected_small_type = local.small_server_type + + requested_medium_types = local.preferred_server_types.medium + available_medium_types = local.available_medium_server_types + selected_medium_type = local.medium_server_type + } +} + +output "adminhost" { + sensitive = true + value = hcloud_server.adminhost.ipv4_address +} +# output format that a static inventory file expects +output "static-inventory" { + sensitive = true + value = { + all = { + vars = { + adminhost_ip = tolist(hcloud_server.adminhost.network)[0].ip + ansible_user = "root" + private_interface = "enp7s0" + } + } + adminhost = { + hosts = { + "adminhost" = { + ansible_host = hcloud_server.adminhost.ipv4_address + } + } + vars = { + ansible_ssh_common_args = "-o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s -o BatchMode=yes -o ConnectionAttempts=10 -o ServerAliveInterval=60 -o ServerAliveCountMax=3" + } + } + private = { + children = { + assethost = {} + datanode = {} + "kube-node" = {} + adminhost_local = {} + } + vars = { + ansible_ssh_common_args = "-o ProxyCommand=\"ssh -i ssh_private_key -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -W %h:%p -q root@${hcloud_server.adminhost.ipv4_address}\" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s -o BatchMode=yes -o ConnectionAttempts=10 -o ServerAliveInterval=60 -o ServerAliveCountMax=3" + } + } + adminhost_local = { + hosts = { + "adminhost_local" = { + ansible_host = tolist(hcloud_server.adminhost.network)[0].ip + } + } + } + assethost = { + hosts = { + "assethost" = { + ansible_host = tolist(hcloud_server.assethost.network)[0].ip + } + } + } + kube-node = { + hosts = { + for index, server in hcloud_server.kubenode : server.name => { + ansible_host = tolist(hcloud_server.kubenode[index].network)[0].ip + ip = tolist(hcloud_server.kubenode[index].network)[0].ip + } + } + # NOTE: Necessary for the Hetzner Cloud until Calico v3.17 arrives in Kubespray + # Hetzner private networks have an MTU of 1450 instead of 1500 + vars = { + calico_mtu = 1450 + calico_veth_mtu = 1430 + # NOTE: relax handling a list with more than 3 items; required on Hetzner + docker_dns_servers_strict = false + upstream_dns_servers = [tolist(hcloud_server.adminhost.network)[0].ip] + } + } + datanode = { + hosts = { + for index, server in hcloud_server.datanode : server.name => { + ansible_host = tolist(hcloud_server.datanode[index].network)[0].ip + } + } + vars = { + datanode_network_interface = "enp7s0" + } + } + } +} diff --git a/terraform/examples/wiab-staging-hetzner/setup_nodes.yml b/terraform/examples/wiab-staging-hetzner/setup_nodes.yml new file mode 100644 index 000000000..2fec757d7 --- /dev/null +++ b/terraform/examples/wiab-staging-hetzner/setup_nodes.yml @@ -0,0 +1,206 @@ +--- +- name: Setup adminhost with dnsmasq and Docker + hosts: adminhost_local + become: yes + tasks: + + - name: Check if private interface exists + shell: ip addr show {{ private_interface }} + register: interface_check + retries: 60 + delay: 2 + until: interface_check.rc == 0 + + - name: Get private IP address + shell: ip -o -4 addr show {{ private_interface }} | awk '{print $4}' | cut -d'/' -f1 + register: private_ip_result + + - name: Set private IP fact + set_fact: + private_ip: "{{ private_ip_result.stdout }}" + + - name: Disable systemd-resolved + systemd: + name: systemd-resolved + enabled: no + state: stopped + + - name: Remove existing resolv.conf symlink + file: + path: /etc/resolv.conf + state: absent + + - name: Create new resolv.conf with Google DNS + copy: + content: "nameserver 8.8.8.8\n" + dest: /etc/resolv.conf + mode: '0644' + + - name: Update package cache + apt: + update_cache: yes + + - name: Install dnsmasq + apt: + name: dnsmasq + state: present + + - name: Configure dnsmasq + copy: + content: | + port=53 + domain-needed + bogus-priv + bind-interfaces + listen-address={{ private_ip }} + listen-address=127.0.0.1 + dest: /etc/dnsmasq.conf + mode: '0644' + + - name: Restart dnsmasq + systemd: + name: dnsmasq + state: restarted + + - name: Add Docker GPG key + apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + keyring: /usr/share/keyrings/docker-archive-keyring.gpg + state: present + + - name: Add Docker repository + apt_repository: + repo: "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + state: present + + - name: Install Docker packages + apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + state: present + update_cache: yes + + - name: Configure iptables for DNS + iptables: + chain: OUTPUT + protocol: udp + destination_port: "53" + jump: ACCEPT + state: present + + - name: Configure iptables for DNS input from private network + iptables: + chain: INPUT + protocol: udp + source: 10.0.0.0/8 + destination_port: "53" + jump: ACCEPT + state: present + action: insert + rule_num: 1 + + - name: Start and enable Docker + systemd: + name: docker + state: started + enabled: yes + +- name: Setup default routing for all k8s and datastore nodes + hosts: datanode:kube-node + become: yes + + tasks: + - name: Check if private interface exists + shell: ip addr show {{ private_interface }} + register: interface_check + retries: 60 + delay: 2 + until: interface_check.rc == 0 + + - name: Get existing gateway for private interface + shell: ip route show | grep "{{ private_interface }}" | grep "via" | head -1 | awk '{print $3}' + register: gateway_check + failed_when: false + changed_when: false + + - name: Check if default route already exists + shell: ip route show default + register: default_route_check + failed_when: false + changed_when: false + + - name: Add default route via existing gateway + shell: ip route add default via {{ gateway_check.stdout }} + register: route_result + failed_when: > + route_result.rc != 0 and + "File exists" not in route_result.stderr and + "RTNETLINK answers: File exists" not in route_result.stderr + changed_when: route_result.rc == 0 + when: + - default_route_check.stdout == "" + - gateway_check.stdout != "" + +- name: Configure DNS for all k8s and datastore nodes + hosts: datanode:kube-node + become: yes + + tasks: + - name: Disable systemd-resolved + systemd: + name: systemd-resolved + enabled: no + state: stopped + + - name: Remove existing resolv.conf symlink + file: + path: /etc/resolv.conf + state: absent + + - name: Create new resolv.conf with Adminhost dnsmasq service + copy: + content: "nameserver {{ adminhost_ip }}\n" + dest: /etc/resolv.conf + mode: '0644' + +- name: Add demo user on all nodes + hosts: all + tasks: + - name: Create demo user + ansible.builtin.user: + name: demo + shell: /bin/bash + create_home: yes + state: present + + - name: Add demo user to sudo group + ansible.builtin.user: + name: demo + groups: sudo + append: yes + + - name: Ensure .ssh directory exists for demo user + ansible.builtin.file: + path: /home/demo/.ssh + state: directory + owner: demo + group: demo + mode: '0700' + + - name: Copy root's authorized_keys to demo user + ansible.builtin.copy: + remote_src: yes + src: /root/.ssh/authorized_keys + dest: /home/demo/.ssh/authorized_keys + owner: demo + group: demo + mode: '0600' + + - name: Allow demo user to run sudo without password + ansible.builtin.lineinfile: + path: /etc/sudoers.d/demo + line: 'demo ALL=(ALL) NOPASSWD:ALL' + create: yes + validate: 'visudo -cf %s' diff --git a/terraform/examples/wiab-staging-hetzner/versions.tf b/terraform/examples/wiab-staging-hetzner/versions.tf new file mode 100644 index 000000000..b047ba54e --- /dev/null +++ b/terraform/examples/wiab-staging-hetzner/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + hcloud = { + source = "hetznercloud/hcloud" + } + } + required_version = "~> 1.1" +} From 08c036dd51b80d4e52e9b079b62ab72f28561286 Mon Sep 17 00:00:00 2001 From: mohitrajain Date: Sun, 28 Dec 2025 19:01:33 +0100 Subject: [PATCH 2/2] build: wpb-21356 add changelog for terraform wiab-staging resources --- changelog.d/3-deploy-builds/wiab-staging | 1 + 1 file changed, 1 insertion(+) diff --git a/changelog.d/3-deploy-builds/wiab-staging b/changelog.d/3-deploy-builds/wiab-staging index 50424dd72..b7e9bd468 100644 --- a/changelog.d/3-deploy-builds/wiab-staging +++ b/changelog.d/3-deploy-builds/wiab-staging @@ -1,3 +1,4 @@ Fixed: offline-vm-setup script to use ubuntu cloud image, local seed iso and VM verification process Changed: Add ansible playbook for wiab-staging VM provisioning Fixed: offline-deploy.sh for SSH_AUTH_SOCK handling and remove defunct passwords for postgresql +Added: terraform resources for wiab-staging