From 7eef8bb9f6c760bbda83fd11cd1e258a77564dff Mon Sep 17 00:00:00 2001 From: EJ Campbell Date: Wed, 4 Mar 2026 15:46:16 -0800 Subject: [PATCH 1/9] fix: stale state cleanup retry, host routing verification, snapshot restore timeout - load_state_by_pid: retry after cleanup_stale_state when PID not found - Routed mode: verify forwarding/accept_ra/default route at setup, warn if wrong - Snapshot restore: remove 30s timeout on fc-agent output reconnect (VM may be CPU-starved after restore; proceeding early causes exec failures) --- src/commands/snapshot.rs | 16 +++++------ src/state/manager.rs | 60 ++++++++++++++++++++++------------------ 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index 6b4d3b3a..9e02b8e8 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -673,7 +673,7 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { // Network mode inherited from snapshot metadata let network_mode = snapshot_config.metadata.network_mode; - // Start egress proxy for rootless mode (bypasses TAP/bridge for outbound TCP) + // Start egress proxy for rootless mode only let _egress_proxy_handle = if matches!(network_mode, FcNetworkMode::Rootless) { let socket_path = clone_vsock_base.clone(); Some(tokio::spawn(async move { @@ -726,9 +726,9 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { Box::new(net) } FcNetworkMode::Routed => { - RoutedNetwork::preflight_check().context("routed mode preflight check failed")?; let mut net = RoutedNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone()); + net.preflight_check().context("routed mode preflight check failed")?; if !port_mappings.is_empty() { let loopback_ip = state_manager .allocate_loopback_ip(&mut vm_state) @@ -1098,13 +1098,13 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { // exec_rebind → exec_re_register → rebind_done → output.reconnect() → HERE // Without this gate, the health monitor could start exec calls before // the exec server has re-registered its AsyncFd after restore. + // No timeout — after snapshot restore, the VM may be CPU-starved (HHVM, EdenFS, + // falcon all resume simultaneously) and fc-agent's MMDS poll + restore handler + // can take minutes. Proceeding early causes exec failures; waiting is correct. if !tty_mode { - match tokio::time::timeout(std::time::Duration::from_secs(30), output_connected_rx).await { - Ok(Ok(())) => info!(vm_id = %vm_id, "fc-agent output connected, exec server ready"), - Ok(Err(_)) => warn!(vm_id = %vm_id, "output connected_tx dropped"), - Err(_) => { - warn!(vm_id = %vm_id, "fc-agent did not connect within 30s, proceeding anyway") - } + match output_connected_rx.await { + Ok(()) => info!(vm_id = %vm_id, "fc-agent output connected, exec server ready"), + Err(_) => warn!(vm_id = %vm_id, "output connected_tx dropped"), } } diff --git a/src/state/manager.rs b/src/state/manager.rs index dd4ccf14..cc886dd0 100644 --- a/src/state/manager.rs +++ b/src/state/manager.rs @@ -317,35 +317,41 @@ impl StateManager { ); } - match vms.into_iter().find(|vm| vm.pid == Some(pid)) { - Some(vm) => { - tracing::debug!( - pid = pid, - vm_id = %vm.vm_id, - vm_name = ?vm.name, - "load_state_by_pid: found matching VM" - ); - Ok(vm) - } - None => { - // Log all available PIDs to help debug - let available_pids: Vec = self - .list_vms() - .await - .unwrap_or_default() - .iter() - .filter_map(|v| v.pid) - .collect(); + if let Some(vm) = vms.into_iter().find(|vm| vm.pid == Some(pid)) { + tracing::debug!( + pid = pid, + vm_id = %vm.vm_id, + vm_name = ?vm.name, + "load_state_by_pid: found matching VM" + ); + return Ok(vm); + } - tracing::error!( - search_pid = pid, - available_pids = ?available_pids, - state_dir = %self.state_dir.display(), - "load_state_by_pid: VM not found - no state file has this PID" - ); - Err(anyhow::anyhow!("No VM found with PID: {}", pid)) - } + // PID not found. Clean stale state files (dead PIDs) and retry once. + // Stale files from killed VMs can shadow the target if the stale PID + // was reused by the OS — save_state deletes the collision, but + // cleanup_stale_state handles the general case. + self.cleanup_stale_state().await; + let vms = self.list_vms().await?; + let available_pids: Vec = vms.iter().filter_map(|v| v.pid).collect(); + if let Some(vm) = vms.into_iter().find(|vm| vm.pid == Some(pid)) { + tracing::debug!( + pid = pid, + vm_id = %vm.vm_id, + "load_state_by_pid: found VM after stale cleanup" + ); + return Ok(vm); } + + // Still not found after cleanup + + tracing::error!( + search_pid = pid, + available_pids = ?available_pids, + state_dir = %self.state_dir.display(), + "load_state_by_pid: VM not found - no state file has this PID" + ); + Err(anyhow::anyhow!("No VM found with PID: {}", pid)) } /// List all VMs From ca2ef8c2386f8b4136eb895754aa8eb2c8f3f358 Mon Sep 17 00:00:00 2001 From: EJ Campbell Date: Wed, 4 Mar 2026 15:46:29 -0800 Subject: [PATCH 2/9] fix: add --ipv6-prefix for routed mode, skip MASQUERADE when routable Add --ipv6-prefix flag for routed mode VM addressing. When set, VMs get addresses in the specified /64 prefix via NDP proxy, and MASQUERADE is skipped (the prefix is directly routable and covered by the machine cert's IP SANs, so VM source IPs pass IP binding checks). Without --ipv6-prefix, detect_host_ipv6() auto-detects from interfaces, skipping deprecated addresses. For hosts where all /64s are deprecated, --ipv6-prefix is required. Also: preflight_check is now an instance method (&self) so the ipv6_prefix configuration cannot be mismatched between preflight and setup. --- src/cli/args.rs | 8 ++ src/commands/podman/mod.rs | 9 +- src/commands/serve.rs | 1 + src/network/routed.rs | 190 +++++++++++++++++++++++++------------ 4 files changed, 146 insertions(+), 62 deletions(-) diff --git a/src/cli/args.rs b/src/cli/args.rs index d436eb42..2496d2de 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -198,6 +198,14 @@ pub struct RunArgs { #[arg(long, value_enum, default_value_t = NetworkMode::Rootless)] pub network: NetworkMode, + /// Routable IPv6 /64 prefix for routed mode VM addressing. + /// Each VM gets a unique address in this prefix via NDP proxy. + /// When set, MASQUERADE is skipped (the prefix is directly routable). + /// When not set, auto-detected from host interfaces. + /// Example: --ipv6-prefix 2803:6084:7058:46f6 + #[arg(long)] + pub ipv6_prefix: Option, + /// HTTP health check URL. If not specified, health is based on container running status. /// The URL hostname is sent as the Host header; the connection goes to the guest IP. /// Example: --health-check http://myapp.example.com/status diff --git a/src/commands/podman/mod.rs b/src/commands/podman/mod.rs index 7ffa1a78..6a55f8e6 100644 --- a/src/commands/podman/mod.rs +++ b/src/commands/podman/mod.rs @@ -650,9 +650,12 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result> { port_mappings.clone(), )), NetworkMode::Routed => { - RoutedNetwork::preflight_check().context("routed mode preflight check failed")?; let mut net = RoutedNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone()); + if let Some(ref prefix) = args.ipv6_prefix { + net = net.with_ipv6_prefix(prefix.clone()); + } + net.preflight_check().context("routed mode preflight check failed")?; if !port_mappings.is_empty() { let loopback_ip = state_manager .allocate_loopback_ip(&mut vm_state) @@ -813,7 +816,9 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result> { None }; - // Start egress proxy for rootless mode (bypasses TAP/bridge for outbound TCP) + // Start egress proxy for rootless mode only. + // Routed mode uses native IPv6 kernel routing — no proxy needed. + // Services use mutual TLS with client certs, not source IP matching. let egress_proxy_handle = if matches!(args.network, NetworkMode::Rootless) { let socket_path = vsock_socket_path.clone(); Some(tokio::spawn(async move { diff --git a/src/commands/serve.rs b/src/commands/serve.rs index 6aca1dea..6e54c02e 100644 --- a/src/commands/serve.rs +++ b/src/commands/serve.rs @@ -309,6 +309,7 @@ async fn create_sandbox( publish: vec![], balloon: None, network: crate::cli::NetworkMode::Rootless, + ipv6_prefix: None, health_check: None, health_check_timeout: 5, privileged: false, diff --git a/src/network/routed.rs b/src/network/routed.rs index 6b5d0033..f7d873a1 100644 --- a/src/network/routed.rs +++ b/src/network/routed.rs @@ -40,6 +40,8 @@ pub struct RoutedNetwork { tap_device: String, port_mappings: Vec, loopback_ip: Option, + /// Explicit routable /64 prefix. Skips auto-detect and MASQUERADE. + ipv6_prefix: Option, // Network state (populated during setup) namespace_id: Option, @@ -56,6 +58,7 @@ impl RoutedNetwork { tap_device, port_mappings, loopback_ip: None, + ipv6_prefix: None, namespace_id: None, host_veth: None, vm_ipv6: None, @@ -64,6 +67,11 @@ impl RoutedNetwork { } } + pub fn with_ipv6_prefix(mut self, prefix: String) -> Self { + self.ipv6_prefix = Some(prefix); + self + } + /// Get the network namespace ID (for setting Firecracker's namespace). pub fn namespace_id(&self) -> Option<&str> { self.namespace_id.as_deref() @@ -80,13 +88,10 @@ impl RoutedNetwork { /// Validate that the host meets requirements for routed networking. /// - /// Checks: - /// - Running as root (required for network namespaces and veth pairs) - /// - Host has a global IPv6 address with a /64 subnet - /// - ip6tables is available (for MASQUERADE) - /// /// Call this early (before VM setup) to give clear error messages. - pub fn preflight_check() -> Result<()> { + /// When `--ipv6-prefix` was set (via `with_ipv6_prefix`), auto-detect and + /// ip6tables checks are skipped. + pub fn preflight_check(&self) -> Result<()> { // Must be root if !nix::unistd::getuid().is_root() { anyhow::bail!( @@ -95,23 +100,28 @@ impl RoutedNetwork { ); } + if self.ipv6_prefix.is_some() { + return Ok(()); // Explicit prefix — no auto-detect or ip6tables needed + } + // Must have global IPv6 if Self::detect_host_ipv6().is_none() { anyhow::bail!( "routed networking requires a host with a global IPv6 address.\n\ - The host needs a /64 subnet (or a /128 with a /64 on-link route, e.g. AWS VPC).\n\ - Check with: ip -6 addr show scope global\n\ - If using AWS, ensure the instance has an IPv6 address assigned." + The host needs a non-deprecated /64 (or a /128 with a /64 on-link route).\n\ + Use --ipv6-prefix to specify a routable /64 prefix explicitly.\n\ + Check with: ip -6 addr show scope global" ); } - // ip6tables must be available + // ip6tables must be available (for MASQUERADE) let ip6tables = std::process::Command::new("ip6tables") .args(["--version"]) .output(); if ip6tables.is_err() || !ip6tables.unwrap().status.success() { anyhow::bail!( "routed networking requires ip6tables for IPv6 MASQUERADE.\n\ + Use --ipv6-prefix to specify a routable prefix (skips MASQUERADE).\n\ Install with: apt-get install iptables" ); } @@ -124,13 +134,15 @@ impl RoutedNetwork { self.vm_ipv6.as_deref() } - /// Detect host's global IPv6 address and /64 subnet. + /// Detect host's global IPv6 address and /64 subnet for VM addressing. /// Returns (host_ip, subnet_prefix) e.g. ("2600:1f1c:494:201::1", "2600:1f1c:494:201") /// - /// Supports two common configurations: - /// - Direct /64: host has an address with /64 prefix length (e.g. home/colo servers) - /// - AWS-style /128: host has a /128 address but the kernel has a /64 on-link route - /// from Router Advertisements (standard AWS VPC behavior) + /// Skips deprecated addresses (preferred_lft 0). Supports: + /// - Direct /64: host has an active address with /64 prefix length + /// - /128 with on-link /64 route: AWS VPC, service networks + /// + /// For hosts where auto-detect fails (e.g. only deprecated /64s), use + /// --ipv6-prefix to specify the routable prefix explicitly. fn detect_host_ipv6() -> Option<(String, String)> { let output = std::process::Command::new("ip") .args(["-6", "addr", "show", "scope", "global"]) @@ -141,6 +153,9 @@ impl RoutedNetwork { for line in stdout.lines() { let line = line.trim(); if let Some(addr) = line.strip_prefix("inet6 ") { + if line.contains("deprecated") { + continue; + } if let Some(addr_cidr) = addr.split_whitespace().next() { if let Some((addr, prefix_len)) = addr_cidr.split_once('/') { if addr.starts_with("fe80") || addr.starts_with("fd") { @@ -154,11 +169,9 @@ impl RoutedNetwork { ); if prefix_len == "64" { - // Direct /64 — use as-is return Some((addr.to_string(), prefix)); } if prefix_len == "128" { - // AWS-style: /128 address, check for /64 on-link route if Self::has_onlink_64_route(&prefix) { info!( addr = %addr, @@ -258,9 +271,16 @@ impl NetworkManager for RoutedNetwork { "setting up routed networking" ); - // Detect host IPv6 subnet - let (host_ipv6, ipv6_prefix) = Self::detect_host_ipv6() - .context("routed mode requires a host with a global IPv6 /64 subnet")?; + // Resolve IPv6 /64 prefix: explicit --ipv6-prefix or auto-detect from interfaces + let (host_ipv6, ipv6_prefix) = if let Some(ref prefix) = self.ipv6_prefix { + let host_addr = format!("{}::1", prefix); + info!(prefix = %prefix, "using explicit --ipv6-prefix (routable, no MASQUERADE)"); + (host_addr, prefix.clone()) + } else { + Self::detect_host_ipv6() + .context("routed mode requires a global IPv6 /64 subnet. \ + Use --ipv6-prefix to specify one explicitly.")? + }; // Generate a unique IPv6 for this VM. Check for route collisions // (astronomically unlikely with 64-bit hash, but defend against it). @@ -356,6 +376,50 @@ impl NetworkManager for RoutedNetwork { .output() .await; + // Detect default interface early — used for sysctl checks AND proxy NDP below. + let default_iface = detect_default_ipv6_interface() + .await + .unwrap_or_else(|| "eth0".to_string()); + + // Verify host routing is set up correctly. These sysctls are the user's + // responsibility (host sysctl configuration), not fcvm's — but warn + // loudly if they're wrong because IPv6 egress silently fails without them. + if self.ipv6_prefix.is_none() { + if let Ok(val) = + tokio::fs::read_to_string("/proc/sys/net/ipv6/conf/all/forwarding").await + { + if val.trim() != "1" { + warn!( + "net.ipv6.conf.all.forwarding={} (need 1) — fix host sysctls", + val.trim() + ); + } + } + if let Ok(val) = tokio::fs::read_to_string(format!( + "/proc/sys/net/ipv6/conf/{}/accept_ra", + default_iface + )) + .await + { + if val.trim() != "2" { + warn!( + "net.ipv6.conf.{}.accept_ra={} (need 2) — IPv6 routing may fail after reboot", + default_iface, + val.trim() + ); + } + } + let route_check = tokio::process::Command::new("ip") + .args(["-6", "route", "show", "default"]) + .output() + .await; + if let Ok(out) = route_check { + if !String::from_utf8_lossy(&out.stdout).contains("default via") { + warn!("no default IPv6 route — fix host sysctls to fix accept_ra"); + } + } + } + // 8. Assign link-local to host veth manually (auto-assignment fails when // all.forwarding=1 from a previous run). Use EUI-64 from MAC + nodad. let host_ll = generate_link_local_from_mac(&host_veth) @@ -417,9 +481,7 @@ impl NetworkManager for RoutedNetwork { .await; // 12. Add proxy NDP so the network fabric routes VM's IPv6 to this host - let default_iface = detect_default_ipv6_interface() - .await - .unwrap_or_else(|| "eth0".to_string()); + // (default_iface already detected above) // Enable proxy NDP on the interface so the kernel actually responds // to neighbor solicitations for our proxy entries. let _ = tokio::process::Command::new("sysctl") @@ -447,22 +509,28 @@ impl NetworkManager for RoutedNetwork { // On AWS, source/dest check drops packets with unassigned source IPs. // MASQUERADE rewrites the source to the host's IP so the VPC fabric // accepts the traffic. IPv4 is not routed externally — only IPv6. - let _ = tokio::process::Command::new("ip6tables") - .args([ - "-t", - "nat", - "-A", - "POSTROUTING", - "-o", - &default_iface, - "-s", - &format!("{}/128", vm_ipv6), - "-j", - "MASQUERADE", - ]) - .output() - .await; - info!(iface = %default_iface, "added IPv6 MASQUERADE for outbound traffic"); + // Skipped when --ipv6-prefix is set: the prefix is directly routable + // and the VM's source IP matches the cert's IP SANs. + if self.ipv6_prefix.is_some() { + info!(iface = %default_iface, "skipping MASQUERADE (--ipv6-prefix is routable)"); + } else { + let _ = tokio::process::Command::new("ip6tables") + .args([ + "-t", + "nat", + "-A", + "POSTROUTING", + "-o", + &default_iface, + "-s", + &format!("{}/128", vm_ipv6), + "-j", + "MASQUERADE", + ]) + .output() + .await; + info!(iface = %default_iface, "added IPv6 MASQUERADE for outbound traffic"); + } // 14. Port forwarding: TCP proxy listens on host loopback, connects to VM // inside the namespace via setns(2). The veth is a bridge member so @@ -568,28 +636,30 @@ impl NetworkManager for RoutedNetwork { if let Some(ref vm_ipv6) = self.vm_ipv6 { let default_iface = self.default_iface.as_deref().unwrap_or("eth0"); - // Remove IPv6 MASQUERADE rule - match tokio::process::Command::new("ip6tables") - .args([ - "-t", - "nat", - "-D", - "POSTROUTING", - "-o", - default_iface, - "-s", - &format!("{}/128", vm_ipv6), - "-j", - "MASQUERADE", - ]) - .output() - .await - { - Ok(o) if !o.status.success() => { - warn!(stderr = %String::from_utf8_lossy(&o.stderr).trim(), "ip6tables MASQUERADE cleanup failed"); + // Remove IPv6 MASQUERADE rule (only if we set one — skipped with --ipv6-prefix) + if self.ipv6_prefix.is_none() { + match tokio::process::Command::new("ip6tables") + .args([ + "-t", + "nat", + "-D", + "POSTROUTING", + "-o", + default_iface, + "-s", + &format!("{}/128", vm_ipv6), + "-j", + "MASQUERADE", + ]) + .output() + .await + { + Ok(o) if !o.status.success() => { + warn!(stderr = %String::from_utf8_lossy(&o.stderr).trim(), "ip6tables MASQUERADE cleanup failed"); + } + Err(e) => warn!(error = %e, "ip6tables command failed"), + _ => {} } - Err(e) => warn!(error = %e, "ip6tables command failed"), - _ => {} } // Remove proxy NDP From 23b113162270e1b72861fae879790375e6750bca Mon Sep 17 00:00:00 2001 From: ejc3 Date: Thu, 5 Mar 2026 00:52:27 +0000 Subject: [PATCH 3/9] fix: formatting, clippy, and missing ipv6_prefix field in tests - cargo fmt: line wrapping for .context() chains - clippy: collapse nested if in detect_host_ipv6() - Add missing ipv6_prefix field to RunArgs in test helpers Tested: make lint (fmt, clippy, audit, deny all pass) --- src/commands/podman/mod.rs | 4 +++- src/commands/snapshot.rs | 3 ++- src/network/routed.rs | 23 +++++++++++------------ tests/test_library_api.rs | 1 + 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/commands/podman/mod.rs b/src/commands/podman/mod.rs index 6a55f8e6..24aae361 100644 --- a/src/commands/podman/mod.rs +++ b/src/commands/podman/mod.rs @@ -655,7 +655,8 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result> { if let Some(ref prefix) = args.ipv6_prefix { net = net.with_ipv6_prefix(prefix.clone()); } - net.preflight_check().context("routed mode preflight check failed")?; + net.preflight_check() + .context("routed mode preflight check failed")?; if !port_mappings.is_empty() { let loopback_ip = state_manager .allocate_loopback_ip(&mut vm_state) @@ -1166,6 +1167,7 @@ mod tests { rootfs_type: None, non_blocking_output: false, label: vec![], + ipv6_prefix: None, image: "alpine:latest".to_string(), command_args: vec![], } diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index 9e02b8e8..39168181 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -728,7 +728,8 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { FcNetworkMode::Routed => { let mut net = RoutedNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone()); - net.preflight_check().context("routed mode preflight check failed")?; + net.preflight_check() + .context("routed mode preflight check failed")?; if !port_mappings.is_empty() { let loopback_ip = state_manager .allocate_loopback_ip(&mut vm_state) diff --git a/src/network/routed.rs b/src/network/routed.rs index f7d873a1..feb728ae 100644 --- a/src/network/routed.rs +++ b/src/network/routed.rs @@ -171,15 +171,13 @@ impl RoutedNetwork { if prefix_len == "64" { return Some((addr.to_string(), prefix)); } - if prefix_len == "128" { - if Self::has_onlink_64_route(&prefix) { - info!( - addr = %addr, - prefix = %prefix, - "using /128 address with /64 on-link route" - ); - return Some((addr.to_string(), prefix)); - } + if prefix_len == "128" && Self::has_onlink_64_route(&prefix) { + info!( + addr = %addr, + prefix = %prefix, + "using /128 address with /64 on-link route" + ); + return Some((addr.to_string(), prefix)); } } } @@ -277,9 +275,10 @@ impl NetworkManager for RoutedNetwork { info!(prefix = %prefix, "using explicit --ipv6-prefix (routable, no MASQUERADE)"); (host_addr, prefix.clone()) } else { - Self::detect_host_ipv6() - .context("routed mode requires a global IPv6 /64 subnet. \ - Use --ipv6-prefix to specify one explicitly.")? + Self::detect_host_ipv6().context( + "routed mode requires a global IPv6 /64 subnet. \ + Use --ipv6-prefix to specify one explicitly.", + )? }; // Generate a unique IPv6 for this VM. Check for route collisions diff --git a/tests/test_library_api.rs b/tests/test_library_api.rs index e69e9b9a..286fc415 100644 --- a/tests/test_library_api.rs +++ b/tests/test_library_api.rs @@ -50,6 +50,7 @@ fn test_run_args(name: &str) -> RunArgs { label: vec![], non_blocking_output: false, health_check_timeout: 5, + ipv6_prefix: None, image: common::TEST_IMAGE.to_string(), command_args: vec![], } From 06b9b1a4bb768c99db850cfb385be84e1ab0755c Mon Sep 17 00:00:00 2001 From: ejc3 Date: Thu, 5 Mar 2026 01:06:31 +0000 Subject: [PATCH 4/9] fix: propagate ipv6_prefix through snapshot, validate input, fix hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues found by code review: 1. ipv6_prefix not propagated through snapshot metadata — clones of routed-mode VMs with --ipv6-prefix would fail preflight (no auto-detect) or incorrectly add MASQUERADE. Added ipv6_prefix to VmConfig, SnapshotMetadata, and the snapshot restore path. 2. No validation of --ipv6-prefix input — invalid values like "foobar" produced nonsense addresses. Added validate_ipv6_prefix() that checks for 4 colon-separated hex groups. 3. Bare .await on output_connected_rx could hang forever if Firecracker crashes before fc-agent connects. Replaced with tokio::select! loop that polls vm_manager.try_wait() every 5s as a liveness check. Tested: make lint (fmt, clippy, audit, deny all pass) --- src/commands/common.rs | 1 + src/commands/podman/mod.rs | 1 + src/commands/snapshot.rs | 34 ++++++++++++++++++++++++++++++---- src/network/routed.rs | 33 ++++++++++++++++++++++++++++++++- src/state/types.rs | 5 +++++ src/storage/snapshot.rs | 8 ++++++++ tests/test_health_monitor.rs | 1 + tests/test_state_manager.rs | 4 ++++ 8 files changed, 82 insertions(+), 5 deletions(-) diff --git a/src/commands/common.rs b/src/commands/common.rs index a89fb7dc..ea205128 100644 --- a/src/commands/common.rs +++ b/src/commands/common.rs @@ -1282,6 +1282,7 @@ pub fn build_snapshot_config( user: vm_state.config.user.clone(), port_mappings: vm_state.config.port_mappings.clone(), network_mode: vm_state.config.network_mode, + ipv6_prefix: vm_state.config.ipv6_prefix.clone(), tty: vm_state.config.tty, interactive: vm_state.config.interactive, }, diff --git a/src/commands/podman/mod.rs b/src/commands/podman/mod.rs index 24aae361..0d06fcee 100644 --- a/src/commands/podman/mod.rs +++ b/src/commands/podman/mod.rs @@ -600,6 +600,7 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result> { vm_state.config.portable_volumes = args.portable_volumes; vm_state.config.port_mappings = port_mappings.clone(); vm_state.config.network_mode = args.network.into(); + vm_state.config.ipv6_prefix = args.ipv6_prefix.clone(); vm_state.config.tty = args.tty; vm_state.config.interactive = args.interactive; vm_state.config.user = args.user.clone(); diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index 39168181..c43d4690 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -643,7 +643,7 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { // stays stuck reading from the old (dead) connection after VM resume resets vsock. let output_reconnect = Arc::new(tokio::sync::Notify::new()); // Channel to know when fc-agent's output connection arrives (gates health monitor) - let (output_connected_tx, output_connected_rx) = tokio::sync::oneshot::channel(); + let (output_connected_tx, mut output_connected_rx) = tokio::sync::oneshot::channel(); let output_handle = if !tty_mode { let socket_path = output_socket_path.clone(); let vm_id_clone = vm_id.clone(); @@ -728,6 +728,9 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { FcNetworkMode::Routed => { let mut net = RoutedNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone()); + if let Some(ref prefix) = snapshot_config.metadata.ipv6_prefix { + net = net.with_ipv6_prefix(prefix.clone()); + } net.preflight_check() .context("routed mode preflight check failed")?; if !port_mappings.is_empty() { @@ -1102,10 +1105,33 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { // No timeout — after snapshot restore, the VM may be CPU-starved (HHVM, EdenFS, // falcon all resume simultaneously) and fc-agent's MMDS poll + restore handler // can take minutes. Proceeding early causes exec failures; waiting is correct. + // But poll VM liveness to avoid hanging forever if Firecracker crashes. if !tty_mode { - match output_connected_rx.await { - Ok(()) => info!(vm_id = %vm_id, "fc-agent output connected, exec server ready"), - Err(_) => warn!(vm_id = %vm_id, "output connected_tx dropped"), + let mut liveness_interval = tokio::time::interval(std::time::Duration::from_secs(5)); + liveness_interval.tick().await; // consume immediate first tick + loop { + tokio::select! { + result = &mut output_connected_rx => { + match result { + Ok(()) => info!(vm_id = %vm_id, "fc-agent output connected, exec server ready"), + Err(_) => warn!(vm_id = %vm_id, "output connected_tx dropped"), + } + break; + } + _ = liveness_interval.tick() => { + match vm_manager.try_wait() { + Ok(Some(status)) => { + warn!(vm_id = %vm_id, ?status, "VM exited before fc-agent connected"); + break; + } + Ok(None) => {} // still running + Err(e) => { + warn!(vm_id = %vm_id, error = %e, "VM liveness check failed"); + break; + } + } + } + } } } diff --git a/src/network/routed.rs b/src/network/routed.rs index feb728ae..71a63a55 100644 --- a/src/network/routed.rs +++ b/src/network/routed.rs @@ -67,11 +67,41 @@ impl RoutedNetwork { } } + /// Set an explicit routable /64 prefix. Validates format (4 colon-separated hex groups). pub fn with_ipv6_prefix(mut self, prefix: String) -> Self { self.ipv6_prefix = Some(prefix); self } + /// Validate that a prefix string looks like a valid IPv6 /64 prefix + /// (4 colon-separated groups of 1-4 hex digits, e.g. "2600:1f1c:494:201"). + fn validate_ipv6_prefix(prefix: &str) -> Result<()> { + let groups: Vec<&str> = prefix.split(':').collect(); + if groups.len() != 4 { + anyhow::bail!( + "invalid --ipv6-prefix '{}': expected 4 colon-separated hex groups \ + (e.g. 2600:1f1c:494:201)", + prefix + ); + } + for group in &groups { + if group.is_empty() || group.len() > 4 { + anyhow::bail!( + "invalid --ipv6-prefix '{}': each group must be 1-4 hex digits", + prefix + ); + } + if u16::from_str_radix(group, 16).is_err() { + anyhow::bail!( + "invalid --ipv6-prefix '{}': '{}' is not valid hex", + prefix, + group + ); + } + } + Ok(()) + } + /// Get the network namespace ID (for setting Firecracker's namespace). pub fn namespace_id(&self) -> Option<&str> { self.namespace_id.as_deref() @@ -100,7 +130,8 @@ impl RoutedNetwork { ); } - if self.ipv6_prefix.is_some() { + if let Some(ref prefix) = self.ipv6_prefix { + Self::validate_ipv6_prefix(prefix)?; return Ok(()); // Explicit prefix — no auto-detect or ip6tables needed } diff --git a/src/state/types.rs b/src/state/types.rs index 9670e16f..01106b37 100644 --- a/src/state/types.rs +++ b/src/state/types.rs @@ -150,6 +150,10 @@ pub struct VmConfig { /// Stored so clones inherit the same networking mode from snapshots. #[serde(default)] pub network_mode: crate::firecracker::FcNetworkMode, + /// Explicit routable IPv6 /64 prefix for routed mode. + /// When set, MASQUERADE is skipped and auto-detect is bypassed. + #[serde(default)] + pub ipv6_prefix: Option, /// Whether a PTY is allocated for the container. #[serde(default)] pub tty: bool, @@ -199,6 +203,7 @@ impl VmState { portable_volumes: false, user: None, username: None, + ipv6_prefix: None, }, } } diff --git a/src/storage/snapshot.rs b/src/storage/snapshot.rs index 2f612432..613b4d39 100644 --- a/src/storage/snapshot.rs +++ b/src/storage/snapshot.rs @@ -96,6 +96,9 @@ pub struct SnapshotMetadata { /// Network mode (bridged, rootless, routed) inherited by clones #[serde(default)] pub network_mode: crate::firecracker::FcNetworkMode, + /// Explicit routable IPv6 /64 prefix for routed mode (skips auto-detect and MASQUERADE) + #[serde(default)] + pub ipv6_prefix: Option, /// Whether PTY is allocated for the container #[serde(default)] pub tty: bool, @@ -283,6 +286,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -407,6 +411,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -475,6 +480,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -530,6 +536,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -636,6 +643,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, diff --git a/tests/test_health_monitor.rs b/tests/test_health_monitor.rs index ed3ab0c2..cf857840 100644 --- a/tests/test_health_monitor.rs +++ b/tests/test_health_monitor.rs @@ -80,6 +80,7 @@ async fn test_health_monitor_behaviors() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), diff --git a/tests/test_state_manager.rs b/tests/test_state_manager.rs index 96675a4f..185253d5 100644 --- a/tests/test_state_manager.rs +++ b/tests/test_state_manager.rs @@ -53,6 +53,7 @@ async fn test_state_persistence() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), @@ -129,6 +130,7 @@ async fn test_list_vms() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), @@ -189,6 +191,7 @@ async fn test_load_state_by_name_duplicate_detection() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), @@ -243,6 +246,7 @@ async fn test_load_state_by_name_duplicate_detection() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), From f074323a68af1635deec505e52fad112e294beaa Mon Sep 17 00:00:00 2001 From: ejc3 Date: Thu, 5 Mar 2026 01:08:54 +0000 Subject: [PATCH 5/9] refactor: restore symmetric builder pattern for RoutedNetwork MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep ipv6_prefix as a builder method (with_ipv6_prefix), consistent with loopback_ip (with_loopback_ip). Both are optional config set after construction — loopback_ip because it's allocated async after preflight_check, ipv6_prefix because it's genuinely optional. The real compile-time safety comes from storing ipv6_prefix in SnapshotMetadata, so both call sites (podman run + snapshot run) have it available and use symmetric code. Tested: cargo fmt --check + clippy pass --- src/network/routed.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/network/routed.rs b/src/network/routed.rs index 71a63a55..369a3e79 100644 --- a/src/network/routed.rs +++ b/src/network/routed.rs @@ -67,7 +67,6 @@ impl RoutedNetwork { } } - /// Set an explicit routable /64 prefix. Validates format (4 colon-separated hex groups). pub fn with_ipv6_prefix(mut self, prefix: String) -> Self { self.ipv6_prefix = Some(prefix); self From bac740f8ebfc4efbdc25a7b4a57b003bfda81925 Mon Sep 17 00:00:00 2001 From: ejc3 Date: Thu, 5 Mar 2026 01:13:37 +0000 Subject: [PATCH 6/9] docs: update README, DESIGN, CLAUDE.md for --ipv6-prefix - README: ip6tables not required with --ipv6-prefix, add to CLI Reference - DESIGN: add ipv6_prefix field to RoutedNetwork, note conditional checks - CLAUDE.md: note MASQUERADE skipped with --ipv6-prefix --- .claude/CLAUDE.md | 2 +- DESIGN.md | 11 ++++++----- README.md | 3 ++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index eb02c32d..cdc8513c 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -1229,7 +1229,7 @@ fuse-pipe/benches/ - Each VM gets a unique IPv6 derived from host's /64 subnet via hash of vm_id - Network namespace with bridge (br0) connecting TAP and veth for L2 forwarding - Proxy NDP on default interface makes VM IPv6 routable from network fabric -- ip6tables MASQUERADE for AWS VPC source/dest checks +- ip6tables MASQUERADE for AWS VPC source/dest checks (skipped when `--ipv6-prefix` is set) - Port forwarding via built-in TCP proxy (setns + tokio relay) on unique loopback IP (same allocation as rootless) - IPv4 stays internal to namespace (health checks only); all external traffic uses IPv6 - Egress proxy is NOT used — IPv6 goes natively through the kernel stack diff --git a/DESIGN.md b/DESIGN.md index e4b2815d..0f7cd7de 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -290,7 +290,7 @@ iptables -t nat -A PREROUTING -d 172.30.x.1 -p tcp --dport 8080 -j DNAT --to-des Uses veth pairs + IPv6 routing for kernel line-rate networking without userspace proxies. **Features**: -- Requires root and a host with a global IPv6 /64 subnet +- Requires root and a host with a global IPv6 /64 subnet (or `--ipv6-prefix` to specify one explicitly) - Native IPv6 routing through the kernel stack (no userspace L4 translation) - Each VM gets a unique IPv6 derived from the host's /64 prefix - Port forwarding via built-in TCP proxy (`setns` + tokio relay) on loopback IP (same as rootless) @@ -308,11 +308,12 @@ struct RoutedNetwork { vm_ipv6: Option, default_iface: Option, proxy_handles: Vec>, + ipv6_prefix: Option, // explicit /64 prefix (skips auto-detect + MASQUERADE) } async fn setup() -> Result { - preflight_check() // root, IPv6, ip6tables - detect_host_ipv6() // find /64 subnet (or /128 with on-link /64) + self.preflight_check() // root, IPv6, ip6tables (ip6tables skipped if --ipv6-prefix) + detect_host_ipv6() // find /64 subnet (or /128 with on-link /64); skipped if --ipv6-prefix generate_vm_ipv6(prefix, vm_id) // deterministic IPv6 from hash create_namespace(ns_name) create_veth_pair(host_veth, guest_veth) @@ -323,7 +324,7 @@ async fn setup() -> Result { // Namespace: default IPv6 route via host veth link-local // Host: /128 route to VM IPv6 via host veth // Proxy NDP on default interface - // ip6tables MASQUERADE for outbound + // ip6tables MASQUERADE for outbound (skipped if --ipv6-prefix is set) // TCP proxy port forwarding on loopback IP (setns + tokio relay) } ``` @@ -1366,7 +1367,7 @@ fcvm snapshot run --pid [OPTIONS] --exec Execute command in container after clone is healthy ``` -Network mode, port mappings, TTY, and interactive flags are inherited from the snapshot +Network mode, port mappings, TTY, interactive flags, and `--ipv6-prefix` are inherited from the snapshot metadata automatically — no need to re-specify them on clone. **Examples**: diff --git a/README.md b/README.md index 70788475..74dcde2a 100644 --- a/README.md +++ b/README.md @@ -270,7 +270,7 @@ fcvm auto-forwards `http_proxy`/`https_proxy` from host to VM via MMDS. - Firecracker binary in PATH - For rootless: `passt` package (provides `pasta`) - For bridged: sudo, iptables, iproute2 -- For routed: sudo, ip6tables, iproute2, host with global IPv6 /64 +- For routed: sudo, iproute2, host with global IPv6 /64 (ip6tables also needed unless `--ipv6-prefix` is set) - For rootfs build: qemu-utils, e2fsprogs **Storage:** btrfs at `/mnt/fcvm-btrfs` (auto-created as loopback on non-btrfs hosts) @@ -336,6 +336,7 @@ See [`Containerfile`](Containerfile) for the complete dependency list used in CI --portable-volumes Path-hash inodes for cross-machine snapshot/restore --rootfs-size Minimum free space on rootfs (default: 10G) --no-snapshot Disable automatic snapshot creation +--ipv6-prefix Use explicit /64 prefix for routed mode (skips auto-detect and MASQUERADE) ``` Run `fcvm --help` or `fcvm --help` for full options. From c1f6fab8a860c28b0e5c00cb994dd061b839700b Mon Sep 17 00:00:00 2001 From: ejc3 Date: Thu, 5 Mar 2026 01:28:05 +0000 Subject: [PATCH 7/9] test: add unit tests for ipv6_prefix validation, generation, and serde - 10 tests in routed.rs: prefix validation (valid/invalid/hex/full-addr), VM IPv6 generation (deterministic/format), parse_host_ipv6 (deprecated address filtering, link-local/ULA skipping, prefix extraction) - 2 tests in snapshot.rs: ipv6_prefix roundtrip through SnapshotMetadata serde, backward compatibility with old snapshots missing the field - 3 tests in test_state_manager.rs: load_state_by_pid found/not-found, stale state cleanup on retry (verifies dead PID files are removed) Extracted parse_host_ipv6(output, check_onlink) from detect_host_ipv6() for testability without shelling out to `ip addr`. --- src/network/routed.rs | 185 +++++++++++++++++++++++++++++++++++- src/storage/snapshot.rs | 54 +++++++++++ tests/test_state_manager.rs | 108 +++++++++++++++++++++ 3 files changed, 344 insertions(+), 3 deletions(-) diff --git a/src/network/routed.rs b/src/network/routed.rs index 369a3e79..52b3eea8 100644 --- a/src/network/routed.rs +++ b/src/network/routed.rs @@ -180,7 +180,20 @@ impl RoutedNetwork { .ok()?; let stdout = String::from_utf8_lossy(&output.stdout); - for line in stdout.lines() { + // First pass: look for /64 addresses (preferred over /128) + if let Some(result) = Self::parse_host_ipv6(&stdout, false) { + return Some(result); + } + // Second pass: check /128 addresses with on-link /64 routes + Self::parse_host_ipv6(&stdout, true) + } + + /// Parse `ip -6 addr show` output to find a usable global IPv6 address. + /// When `check_onlink` is false, only returns /64 addresses. + /// When `check_onlink` is true, returns /128 addresses that have on-link /64 routes. + /// Skips deprecated, link-local, and ULA addresses. + fn parse_host_ipv6(output: &str, check_onlink: bool) -> Option<(String, String)> { + for line in output.lines() { let line = line.trim(); if let Some(addr) = line.strip_prefix("inet6 ") { if line.contains("deprecated") { @@ -198,10 +211,13 @@ impl RoutedNetwork { segments[0], segments[1], segments[2], segments[3] ); - if prefix_len == "64" { + if !check_onlink && prefix_len == "64" { return Some((addr.to_string(), prefix)); } - if prefix_len == "128" && Self::has_onlink_64_route(&prefix) { + if check_onlink + && prefix_len == "128" + && Self::has_onlink_64_route(&prefix) + { info!( addr = %addr, prefix = %prefix, @@ -845,3 +861,166 @@ async fn detect_default_ipv6_interface() -> Option { } None } + +#[cfg(test)] +mod tests { + use super::*; + + // --- validate_ipv6_prefix tests --- + + #[test] + fn test_validate_ipv6_prefix_valid() { + assert!(RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494:201").is_ok()); + assert!(RoutedNetwork::validate_ipv6_prefix("2803:6084:7058:46f6").is_ok()); + assert!(RoutedNetwork::validate_ipv6_prefix("0:0:0:0").is_ok()); + assert!(RoutedNetwork::validate_ipv6_prefix("ffff:ffff:ffff:ffff").is_ok()); + assert!(RoutedNetwork::validate_ipv6_prefix("a:b:c:d").is_ok()); + } + + #[test] + fn test_validate_ipv6_prefix_wrong_group_count() { + let err = RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + + let err = RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494:201:abcd").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + + let err = RoutedNetwork::validate_ipv6_prefix("2600").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + + let err = RoutedNetwork::validate_ipv6_prefix("").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + } + + #[test] + fn test_validate_ipv6_prefix_invalid_hex() { + // Non-hex characters + let err = RoutedNetwork::validate_ipv6_prefix("zzzz:1f1c:494:201").unwrap_err(); + assert!(err.to_string().contains("not valid hex")); + + // Empty group (consecutive colons) — splits to 4 groups but one is empty + let err = RoutedNetwork::validate_ipv6_prefix("2600::494:201").unwrap_err(); + assert!(err + .to_string() + .contains("each group must be 1-4 hex digits")); + + // Group too long (5 digits) + let err = RoutedNetwork::validate_ipv6_prefix("26000:1f1c:494:201").unwrap_err(); + assert!(err + .to_string() + .contains("each group must be 1-4 hex digits")); + } + + #[test] + fn test_validate_ipv6_prefix_full_address_rejected() { + // Full IPv6 address (8 groups) should be rejected + let err = RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494:201:1:2:3:4").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + + // Compressed full address + let err = RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494:201::1").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + } + + // --- generate_vm_ipv6 tests --- + + #[test] + fn test_generate_vm_ipv6_deterministic() { + let a1 = RoutedNetwork::generate_vm_ipv6("2600:1f1c:494:201", "vm-abc"); + let a2 = RoutedNetwork::generate_vm_ipv6("2600:1f1c:494:201", "vm-abc"); + assert_eq!(a1, a2, "same inputs must produce same output"); + + let b = RoutedNetwork::generate_vm_ipv6("2600:1f1c:494:201", "vm-xyz"); + assert_ne!(a1, b, "different vm_ids must produce different addresses"); + + let c = RoutedNetwork::generate_vm_ipv6("2803:6084:7058:46f6", "vm-abc"); + assert_ne!(a1, c, "different prefixes must produce different addresses"); + } + + #[test] + fn test_generate_vm_ipv6_format() { + let addr = RoutedNetwork::generate_vm_ipv6("2600:1f1c:494:201", "vm-test"); + assert!( + addr.starts_with("2600:1f1c:494:201:"), + "address must start with prefix: {}", + addr + ); + // Should have 8 colon-separated groups total (4 prefix + 4 interface ID) + let groups: Vec<&str> = addr.split(':').collect(); + assert_eq!(groups.len(), 8, "IPv6 must have 8 groups: {}", addr); + // Each interface ID group should be valid hex + for group in &groups[4..] { + assert!( + u16::from_str_radix(group, 16).is_ok(), + "group '{}' is not valid hex in: {}", + group, + addr + ); + } + } + + // --- parse_host_ipv6 tests (deprecated address filtering) --- + + #[test] + fn test_parse_host_ipv6_skips_deprecated() { + let output = "\ +2: eth0: mtu 9001 state UP + inet6 2600:1f1c:494:201::1/64 scope global deprecated dynamic noprefixroute + valid_lft 3552sec preferred_lft 0sec + inet6 2803:6084:7058:46f6::1/64 scope global dynamic noprefixroute + valid_lft 3552sec preferred_lft 3552sec"; + + let result = RoutedNetwork::parse_host_ipv6(output, false); + assert!(result.is_some(), "should find non-deprecated address"); + let (addr, prefix) = result.unwrap(); + assert_eq!(addr, "2803:6084:7058:46f6::1"); + assert_eq!(prefix, "2803:6084:7058:46f6"); + } + + #[test] + fn test_parse_host_ipv6_skips_link_local_and_ula() { + let output = "\ + inet6 fe80::1/64 scope global + inet6 fd00::1/64 scope global + inet6 2600:1f1c:494:201::5/64 scope global dynamic"; + + let result = RoutedNetwork::parse_host_ipv6(output, false); + assert!(result.is_some()); + let (addr, _) = result.unwrap(); + assert_eq!(addr, "2600:1f1c:494:201::5"); + } + + #[test] + fn test_parse_host_ipv6_all_deprecated_returns_none() { + let output = "\ + inet6 2600:1f1c:494:201::1/64 scope global deprecated dynamic + inet6 2803:6084:7058:46f6::1/64 scope global deprecated dynamic"; + + let result = RoutedNetwork::parse_host_ipv6(output, false); + assert!(result.is_none(), "all deprecated should return None"); + } + + #[test] + fn test_parse_host_ipv6_extracts_prefix() { + let output = " inet6 2600:1f1c:0494:0201::abcd/64 scope global dynamic"; + + let result = RoutedNetwork::parse_host_ipv6(output, false); + assert!(result.is_some()); + let (addr, prefix) = result.unwrap(); + assert_eq!(addr, "2600:1f1c:0494:0201::abcd"); + // Prefix is normalized through Ipv6Addr parsing (leading zeros stripped) + assert_eq!(prefix, "2600:1f1c:494:201"); + } +} diff --git a/src/storage/snapshot.rs b/src/storage/snapshot.rs index 613b4d39..d76d72bf 100644 --- a/src/storage/snapshot.rs +++ b/src/storage/snapshot.rs @@ -663,4 +663,58 @@ mod tests { let parsed: SnapshotConfig = serde_json::from_str(&json).unwrap(); assert_eq!(parsed.snapshot_type, SnapshotType::System); } + + #[test] + fn test_snapshot_metadata_ipv6_prefix_roundtrip() { + let metadata = SnapshotMetadata { + image: "nginx:alpine".to_string(), + vcpu: 2, + memory_mib: 512, + network_config: NetworkConfig::default(), + volumes: vec![], + health_check_url: None, + health_check_timeout: 5, + hugepages: false, + extra_disks: vec![], + username: None, + user: None, + port_mappings: vec![], + network_mode: Default::default(), + ipv6_prefix: Some("2600:1f1c:494:201".to_string()), + tty: false, + interactive: false, + }; + + let json = serde_json::to_string(&metadata).unwrap(); + assert!(json.contains("2600:1f1c:494:201")); + + let parsed: SnapshotMetadata = serde_json::from_str(&json).unwrap(); + assert_eq!( + parsed.ipv6_prefix, + Some("2600:1f1c:494:201".to_string()), + "ipv6_prefix must survive serialization roundtrip" + ); + } + + #[test] + fn test_snapshot_metadata_ipv6_prefix_backward_compat() { + // Old snapshots won't have ipv6_prefix — must deserialize to None + let json = r#"{ + "image": "nginx:alpine", + "vcpu": 2, + "memory_mib": 512, + "network_config": { + "tap_device": "tap-old", + "guest_mac": "AA:BB:CC:DD:EE:FF" + } + }"#; + + let metadata: SnapshotMetadata = serde_json::from_str(json).unwrap(); + assert_eq!( + metadata.ipv6_prefix, None, + "missing ipv6_prefix must default to None for backward compat" + ); + assert_eq!(metadata.image, "nginx:alpine"); + assert_eq!(metadata.vcpu, 2); + } } diff --git a/tests/test_state_manager.rs b/tests/test_state_manager.rs index 185253d5..4fdac9cd 100644 --- a/tests/test_state_manager.rs +++ b/tests/test_state_manager.rs @@ -261,3 +261,111 @@ async fn test_load_state_by_name_duplicate_detection() { let found = manager.load_state_by_name("unique-name").await.unwrap(); assert_eq!(found.pid, Some(6000)); } + +/// Helper to create a minimal VmState with given vm_id and pid +fn make_vm_state(vm_id: &str, name: &str, pid: u32) -> VmState { + VmState { + schema_version: 1, + vm_id: vm_id.to_string(), + name: Some(name.to_string()), + status: VmStatus::Running, + health_status: HealthStatus::Healthy, + exit_code: None, + pid: Some(pid), + holder_pid: None, + created_at: Utc::now(), + last_updated: Utc::now(), + config: VmConfig { + image: "test:latest".to_string(), + vcpu: 1, + memory_mib: 256, + network: NetworkConfig::default(), + volumes: vec![], + extra_disks: vec![], + nfs_shares: vec![], + health_check_url: None, + snapshot_name: None, + process_type: Some(ProcessType::Vm), + serve_pid: None, + original_vsock_vm_id: None, + port_mappings: vec![], + network_mode: Default::default(), + ipv6_prefix: None, + tty: false, + interactive: false, + labels: std::collections::HashMap::new(), + hugepages: false, + portable_volumes: false, + user: None, + username: None, + health_check_timeout: 5, + }, + } +} + +#[tokio::test] +async fn test_load_state_by_pid_found() { + let temp_dir = TempDir::new().unwrap(); + let manager = StateManager::new(temp_dir.path().to_path_buf()); + manager.init().await.unwrap(); + + // Use our own PID (guaranteed to exist in /proc) + let my_pid = std::process::id(); + let state = make_vm_state("vm-pid-test", "pid-test", my_pid); + manager.save_state(&state).await.unwrap(); + + let found = manager.load_state_by_pid(my_pid).await.unwrap(); + assert_eq!(found.vm_id, "vm-pid-test"); + assert_eq!(found.pid, Some(my_pid)); +} + +#[tokio::test] +async fn test_load_state_by_pid_not_found() { + let temp_dir = TempDir::new().unwrap(); + let manager = StateManager::new(temp_dir.path().to_path_buf()); + manager.init().await.unwrap(); + + let my_pid = std::process::id(); + let state = make_vm_state("vm-other", "other", my_pid); + manager.save_state(&state).await.unwrap(); + + // Search for a PID that no VM has + let err = manager + .load_state_by_pid(99999999) + .await + .expect_err("should fail for unknown PID"); + assert!( + err.to_string().contains("No VM found with PID"), + "error should mention PID: {}", + err + ); +} + +#[tokio::test] +async fn test_load_state_by_pid_cleans_stale_on_retry() { + let temp_dir = TempDir::new().unwrap(); + let manager = StateManager::new(temp_dir.path().to_path_buf()); + manager.init().await.unwrap(); + + // Create a stale state file with a PID that doesn't exist. + // Use a very high PID that's virtually guaranteed to not exist. + let stale_pid = 4_000_000_000u32; + let stale_state = make_vm_state("vm-stale", "stale", stale_pid); + manager.save_state(&stale_state).await.unwrap(); + + // Verify the stale file exists + let vms = manager.list_vms().await.unwrap(); + assert_eq!(vms.len(), 1, "stale VM state should exist before cleanup"); + + // load_state_by_pid for a non-existent PID triggers cleanup_stale_state + let _ = manager.load_state_by_pid(99999998).await; + + // After the failed lookup, the stale state should have been cleaned up + // (PID 4000000000 doesn't exist in /proc) + let vms_after = manager.list_vms().await.unwrap(); + assert_eq!( + vms_after.len(), + 0, + "stale state file should be removed after cleanup" + ); +} From 0953b6ef2a0038dc2bfe72cced36057a3df7ef1c Mon Sep 17 00:00:00 2001 From: "claude[bot]" Date: Thu, 5 Mar 2026 04:33:25 +0000 Subject: [PATCH 8/9] fix: restore ipv6_prefix to vm_state.config during snapshot run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing `vm_state.config.ipv6_prefix` restoration alongside other config fields in the snapshot run path. Without this, cascaded snapshots (creating a snapshot from a clone) lose the prefix because `build_snapshot_config()` reads from `vm_state.config`. Also fix README CLI reference: `` → `` to match the actual input format (4 colon-separated hex groups, not CIDR notation). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- README.md | 2 +- src/commands/snapshot.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 74dcde2a..8457cf5b 100644 --- a/README.md +++ b/README.md @@ -336,7 +336,7 @@ See [`Containerfile`](Containerfile) for the complete dependency list used in CI --portable-volumes Path-hash inodes for cross-machine snapshot/restore --rootfs-size Minimum free space on rootfs (default: 10G) --no-snapshot Disable automatic snapshot creation ---ipv6-prefix Use explicit /64 prefix for routed mode (skips auto-detect and MASQUERADE) +--ipv6-prefix Use explicit /64 prefix for routed mode (skips auto-detect and MASQUERADE) ``` Run `fcvm --help` or `fcvm --help` for full options. diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index c43d4690..d7ec0006 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -794,6 +794,7 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { vm_state.config.user = snapshot_config.metadata.user.clone(); vm_state.config.port_mappings = port_mappings; vm_state.config.network_mode = network_mode; + vm_state.config.ipv6_prefix = snapshot_config.metadata.ipv6_prefix.clone(); vm_state.config.tty = tty_mode; vm_state.config.interactive = interactive; From 9d705f909c16163e19933599920e4c7ab6455c22 Mon Sep 17 00:00:00 2001 From: ejc3 Date: Thu, 5 Mar 2026 05:38:40 +0000 Subject: [PATCH 9/9] diag: add extensive diagnostics for clone port forward stress test On first failure (or when FCVM_FORCE_DIAG=1), dump 16 diagnostic checks: - Verbose curl via pasta (exact failure point) - ss -tlnp and ss -tanp (listening + all TCP states) - nsenter curl 10.0.2.100:80 (bypass pasta, test guest directly) - nsenter ip neigh, bridge link, ss -tanp (namespace state) - nsenter ping 10.0.2.100 (L3 reachability) - exec in VM: nginx pids, localhost curl, connection count - exec in VM: ss -tan, ARP, somaxconn, tcp_max_syn_backlog - exec in VM: dmesg for TCP/conntrack errors - Raw TCP via nc from namespace (bypass HTTP) - /proc/net/sockstat in namespace (socket counts) - Pasta fd count, VmRSS, thread count - tcpdump on br0 port 80 while doing curl (captures actual RST) - conntrack entries for port 80 in namespace - Per-request timing logged every 5th request The decisive test is nsenter curl: if it works but pasta curl doesn't, the bug is in pasta's splice relay. If both fail, the bug is in the guest's TCP stack after snapshot restore. --- tests/common/mod.rs | 4 + tests/test_clone_port_forward_stress.rs | 396 +++++++++++++++++++++--- 2 files changed, 355 insertions(+), 45 deletions(-) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index da6ed882..89c0e5a7 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1676,6 +1676,10 @@ pub async fn dump_clone_network_diagnostics(pid: u32) { .await; } +pub async fn get_holder_pid_for_diag(fcvm_path: &std::path::PathBuf, pid: u32) -> Option { + get_holder_pid(fcvm_path, pid).await +} + async fn get_holder_pid(fcvm_path: &std::path::PathBuf, pid: u32) -> Option { let out = tokio::process::Command::new(fcvm_path) .args(["ls", "--json"]) diff --git a/tests/test_clone_port_forward_stress.rs b/tests/test_clone_port_forward_stress.rs index 383bb4a5..47dd0f09 100644 --- a/tests/test_clone_port_forward_stress.rs +++ b/tests/test_clone_port_forward_stress.rs @@ -219,6 +219,7 @@ async fn test_clone_port_forward_stress_rootless() -> Result<()> { for clone in &clones { let ip = clone.loopback_ip.clone(); let name = clone.name.clone(); + let clone_pid = clone.pid; let success = Arc::clone(&total_success); let zero = Arc::clone(&total_zero_bytes); let errors = Arc::clone(&total_errors); @@ -227,13 +228,24 @@ async fn test_clone_port_forward_stress_rootless() -> Result<()> { let mut clone_success = 0u32; let mut clone_zero = 0u32; let mut clone_error = 0u32; + let force_diag = std::env::var("FCVM_FORCE_DIAG") + .map(|v| !v.is_empty()) + .unwrap_or(false); for req in 0..REQUESTS_PER_CLONE { + let req_start = Instant::now(); let result = common::curl_check(&ip, host_port, 5).await; + let req_ms = req_start.elapsed().as_millis(); if result.success && result.body_len > 0 { clone_success += 1; success.fetch_add(1, Ordering::Relaxed); + if req % 5 == 0 || force_diag { + println!( + " Clone {} req {}: OK {}b {}ms", + name, req, result.body_len, req_ms + ); + } } else if result.success && result.body_len == 0 { clone_zero += 1; zero.fetch_add(1, Ordering::Relaxed); @@ -247,29 +259,18 @@ async fn test_clone_port_forward_stress_rootless() -> Result<()> { name, req, ip, host_port, result.error ); } - // On first error, dump diagnostics - if clone_error == 1 { + // On first error (or forced via FCVM_FORCE_DIAG), dump extensive diagnostics + if clone_error == 1 || (force_diag && req == 3 && clone_error == 0) { let port_str = host_port.to_string(); + let pid_str = clone_pid.to_string(); - // 1. Check listening sockets for our port - let ss = tokio::process::Command::new("ss") - .args(["-tlnp"]) - .output() - .await; - if let Ok(out) = ss { - let stdout = String::from_utf8_lossy(&out.stdout); - let matching: Vec<&str> = stdout - .lines() - .filter(|l| l.contains(&port_str) || l.starts_with("State")) - .collect(); - println!( - " DIAG clone {} port {} ss output: {:?}", - name, host_port, matching - ); - } + println!( + " DIAG === clone {} (pid={}) first failure diagnostics ===", + name, clone_pid + ); - // 2. Verbose curl to see connection details - let verbose = tokio::process::Command::new("curl") + // 1. Verbose curl to see exact failure point + if let Ok(out) = tokio::process::Command::new("curl") .args([ "-v", "--max-time", @@ -277,52 +278,357 @@ async fn test_clone_port_forward_stress_rootless() -> Result<()> { &format!("http://{}:{}", ip, host_port), ]) .output() - .await; - if let Ok(out) = verbose { + .await + { let stderr = String::from_utf8_lossy(&out.stderr); println!( - " DIAG clone {} verbose curl stderr: {}", - name, - stderr.chars().take(500).collect::() + " DIAG [verbose curl via pasta]:\n {}", + stderr.lines().collect::>().join("\n ") ); } - // 3. Check if pasta is still alive (look for pasta process) - let pgrep = tokio::process::Command::new("pgrep") + // 2. Listening sockets for our port + if let Ok(out) = tokio::process::Command::new("ss") + .args(["-tlnp"]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&out.stdout); + let matching: Vec<&str> = stdout + .lines() + .filter(|l| l.contains(&port_str) || l.starts_with("State")) + .collect(); + println!(" DIAG [ss -tlnp]: {:?}", matching); + } + + // 3. ALL TCP sockets (ESTABLISHED, TIME_WAIT, etc.) + if let Ok(out) = tokio::process::Command::new("ss") + .args(["-tanp"]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&out.stdout); + let matching: Vec<&str> = stdout + .lines() + .filter(|l| { + l.contains(&port_str) + || l.contains(&ip) + || l.starts_with("State") + || l.contains("TIME-WAIT") + }) + .collect(); + println!(" DIAG [ss -tanp relevant]: {:?}", matching); + } + + // 4. pasta process check + if let Ok(out) = tokio::process::Command::new("pgrep") .args(["-a", "pasta"]) .output() - .await; - if let Ok(out) = pgrep { + .await + { let stdout = String::from_utf8_lossy(&out.stdout); let matching: Vec<&str> = stdout .lines() .filter(|l| l.contains(&ip) || l.contains(&port_str)) .collect(); + println!(" DIAG [pasta for this clone]: {:?}", matching); + } + + // 5. Get holder_pid for nsenter diagnostics + let fcvm_path = common::find_fcvm_binary().unwrap(); + let holder_pid = + common::get_holder_pid_for_diag(&fcvm_path, clone_pid).await; + if let Some(hpid) = holder_pid { + let hpid_str = hpid.to_string(); + + // 6. nsenter curl — bypass pasta, curl guest directly through namespace + if let Ok(out) = tokio::process::Command::new("nsenter") + .args([ + "-t", + &hpid_str, + "--net", + "curl", + "-sS", + "--max-time", + "2", + "http://10.0.2.100:80", + ]) + .output() + .await + { + let body_len = out.stdout.len(); + let stderr = String::from_utf8_lossy(&out.stderr); + let status = out.status; + println!(" DIAG [nsenter curl 10.0.2.100:80]: status={} body={} bytes stderr={}", status, body_len, stderr.trim()); + } + + // 7. ARP table in namespace + if let Ok(out) = tokio::process::Command::new("nsenter") + .args(["-t", &hpid_str, "--net", "ip", "neigh"]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&out.stdout); + println!( + " DIAG [nsenter ip neigh]: {}", + stdout.lines().collect::>().join(" | ") + ); + } + + // 8. Bridge state in namespace + if let Ok(out) = tokio::process::Command::new("nsenter") + .args(["-t", &hpid_str, "--net", "bridge", "link"]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&out.stdout); + println!( + " DIAG [nsenter bridge link]: {}", + stdout.lines().collect::>().join(" | ") + ); + } + + // 9. All TCP sockets inside the namespace (TIME_WAIT from pasta splice) + if let Ok(out) = tokio::process::Command::new("nsenter") + .args(["-t", &hpid_str, "--net", "ss", "-tanp"]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&out.stdout); + println!(" DIAG [nsenter ss -tanp (namespace)]:"); + for line in stdout.lines() { + println!(" {}", line); + } + } + + // 10. Ping guest from namespace + if let Ok(out) = tokio::process::Command::new("nsenter") + .args([ + "-t", + &hpid_str, + "--net", + "ping", + "-c", + "1", + "-W", + "1", + "10.0.2.100", + ]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&out.stdout); + let status = out.status; + println!( + " DIAG [nsenter ping 10.0.2.100]: status={} {}", + status, + stdout.lines().last().unwrap_or("") + ); + } + } else { println!( - " DIAG clone {} pasta processes matching {}:{}: {:?}", - name, ip, host_port, matching + " DIAG [holder_pid]: could not determine holder PID for nsenter" ); - // Also show all pasta processes for context - let all: Vec<&str> = stdout.lines().collect(); - println!(" DIAG clone {} all pasta processes: {:?}", name, all); } - // 4. Check connections (not just listening) - let ss_all = tokio::process::Command::new("ss") - .args(["-tnp"]) + // 11. Check nginx inside the VM via exec + if let Ok(out) = tokio::process::Command::new(&fcvm_path) + .args(["exec", "--pid", &pid_str, "--vm", "--", "sh", "-c", + "echo nginx_pids=$(pgrep nginx | tr '\\n' ','); curl -sS --max-time 2 http://localhost:80 | wc -c; echo nginx_conns=$(ss -tn | grep ':80 ' | wc -l)"]) + .output().await + { + let stdout = String::from_utf8_lossy(&out.stdout); + let stderr = String::from_utf8_lossy(&out.stderr); + println!(" DIAG [exec in VM — nginx check]: stdout={} stderr={}", stdout.trim(), stderr.trim()); + } + + // 12. Check guest's conntrack / iptables + if let Ok(out) = tokio::process::Command::new(&fcvm_path) + .args(["exec", "--pid", &pid_str, "--vm", "--", "sh", "-c", + "ss -tan | head -20; echo '---'; ip neigh; echo '---'; cat /proc/sys/net/ipv4/tcp_max_syn_backlog 2>/dev/null; echo '---'; cat /proc/sys/net/core/somaxconn 2>/dev/null"]) + .output().await + { + let stdout = String::from_utf8_lossy(&out.stdout); + println!(" DIAG [exec in VM — tcp state]:"); + for line in stdout.lines() { + println!(" {}", line); + } + } + + // 13. dmesg inside VM for TCP errors + if let Ok(out) = tokio::process::Command::new(&fcvm_path) + .args(["exec", "--pid", &pid_str, "--vm", "--", "sh", "-c", + "dmesg 2>/dev/null | grep -iE 'tcp|conntrack|drop|reset|syn|nf_' | tail -10 || echo 'no dmesg access'"]) + .output().await + { + let stdout = String::from_utf8_lossy(&out.stdout); + if !stdout.trim().is_empty() { + println!(" DIAG [exec in VM — dmesg tcp]: {}", stdout.trim()); + } + } + + // 14. Raw TCP test from namespace via nc (bypasses HTTP) + if let Some(hpid) = holder_pid { + let hpid_str = hpid.to_string(); + if let Ok(out) = tokio::process::Command::new("nsenter") + .args([ + "-t", + &hpid_str, + "--net", + "sh", + "-c", + "echo -e 'GET / HTTP/1.0\\r\\nHost: test\\r\\n\\r\\n' | nc -w 2 10.0.2.100 80 | head -1", + ]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&out.stdout); + let stderr = String::from_utf8_lossy(&out.stderr); + println!( + " DIAG [nsenter nc 10.0.2.100:80]: stdout={} stderr={}", + stdout.trim(), + stderr.trim() + ); + } + + // 14b. /proc/net/sockstat inside namespace (socket counts) + if let Ok(out) = tokio::process::Command::new("nsenter") + .args(["-t", &hpid_str, "--net", "cat", "/proc/net/sockstat"]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&out.stdout); + println!( + " DIAG [nsenter /proc/net/sockstat]: {}", + stdout.trim() + ); + } + } + + // 15. Pasta fd count, /proc status, and memory (are fds/memory leaking?) + if let Ok(out) = tokio::process::Command::new("pgrep") + .args(["-a", "pasta"]) .output() - .await; - if let Ok(out) = ss_all { + .await + { let stdout = String::from_utf8_lossy(&out.stdout); - let matching: Vec<&str> = stdout - .lines() - .filter(|l| l.contains(&ip) || l.starts_with("State")) - .collect(); + for line in stdout.lines() { + if line.contains(&ip) { + if let Some(pasta_pid) = line.split_whitespace().next() { + // fd count + if let Ok(fds) = tokio::process::Command::new("ls") + .args([&format!("/proc/{}/fd", pasta_pid)]) + .output() + .await + { + let fd_count = String::from_utf8_lossy(&fds.stdout) + .lines() + .count(); + println!( + " DIAG [pasta pid={} fd count]: {}", + pasta_pid, fd_count + ); + } + // VmRSS and threads + if let Ok(status) = tokio::process::Command::new("sh") + .args([ + "-c", + &format!( + "grep -E 'VmRSS|Threads|FDSize' /proc/{}/status", + pasta_pid + ), + ]) + .output() + .await + { + let stdout = String::from_utf8_lossy(&status.stdout); + println!( + " DIAG [pasta pid={} status]: {}", + pasta_pid, + stdout.lines().collect::>().join(" | ") + ); + } + } + } + } + } + + // 15. tcpdump in namespace while doing a curl — capture the actual RST + if let Some(hpid) = holder_pid { + let hpid_str = hpid.to_string(); + // Start tcpdump in background, do a curl via pasta, then collect + let tcpdump_hpid = hpid_str.clone(); + let tcpdump_handle = tokio::spawn(async move { + tokio::process::Command::new("nsenter") + .args([ + "-t", + &tcpdump_hpid, + "--net", + "timeout", + "3", + "tcpdump", + "-i", + "br0", + "-c", + "20", + "-nn", + "port", + "80", + ]) + .output() + .await + }); + // Brief pause to let tcpdump start + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + // Now do a curl via pasta (this should fail and tcpdump captures it) + let pasta_retry = common::curl_check(&ip, host_port, 2).await; println!( - " DIAG clone {} connections to {}: {:?}", - name, ip, matching + " DIAG [pasta curl during tcpdump]: success={} body={} err={}", + pasta_retry.success, + pasta_retry.body_len, + pasta_retry.error.trim() ); + // Also try nsenter curl during tcpdump + if let Ok(out) = tokio::process::Command::new("nsenter") + .args([ + "-t", + &hpid_str, + "--net", + "curl", + "-sS", + "--max-time", + "2", + "http://10.0.2.100:80", + ]) + .output() + .await + { + println!(" DIAG [nsenter curl during tcpdump]: status={} body={} bytes", + out.status, out.stdout.len()); + } + // Collect tcpdump output + if let Ok(Ok(out)) = tcpdump_handle.await { + let stderr = String::from_utf8_lossy(&out.stderr); + println!(" DIAG [tcpdump br0 port 80]:"); + for line in stderr.lines() { + println!(" {}", line); + } + } + + // 16. conntrack entries in namespace + if let Ok(out) = tokio::process::Command::new("nsenter") + .args(["-t", &hpid_str, "--net", "sh", "-c", + "cat /proc/net/nf_conntrack 2>/dev/null | grep ':0050 ' | head -10 || echo 'no conntrack'"]) + .output().await + { + let stdout = String::from_utf8_lossy(&out.stdout); + if !stdout.trim().is_empty() { + println!(" DIAG [nsenter conntrack port 80]: {}", stdout.trim()); + } + } } + + println!(" DIAG === end diagnostics for clone {} ===", name); } } }