From ca6973d65249137cc7f8f0b7123f34e770fd2f23 Mon Sep 17 00:00:00 2001 From: "claude[bot]" Date: Wed, 4 Mar 2026 00:49:21 +0000 Subject: [PATCH] fix: skip pasta port probe during snapshot restore to prevent 0-byte responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During snapshot restore, post_start() runs BEFORE the VM snapshot is loaded into Firecracker. The port forwarding probe in post_start() forces pasta to accept a TCP connection and attempt L2 forwarding to a non-existent guest. This poisoned connection attempt corrupts pasta's internal connection tracking state, causing all subsequent data-bearing connections through pasta to return 0 bytes (TCP connect succeeds but HTTP responses are empty). The fix adds a restore_mode flag to PastaNetwork that skips the premature port probe in post_start(). Port forwarding is still properly verified later via verify_port_forwarding(), which runs after the VM is resumed and fc-agent has sent its gratuitous ARP. Root cause: common.rs calls network.post_start() at line 997, then loads the snapshot at line 1033+. The wait_for_port_forwarding() inside post_start() probes pasta before any guest exists, poisoning pasta's L4 translation state. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/commands/snapshot.rs | 3 ++- src/network/pasta.rs | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index bc1b18fd..0d4c682a 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -749,7 +749,8 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { // With bridge mode, guest IP is always 10.0.2.100 on pasta network // Each clone runs in its own namespace, so no IP conflict let net = PastaNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone()) - .with_loopback_ip(loopback_ip); + .with_loopback_ip(loopback_ip) + .with_restore_mode(); Box::new(net) } }; diff --git a/src/network/pasta.rs b/src/network/pasta.rs index b246e141..da1cf408 100644 --- a/src/network/pasta.rs +++ b/src/network/pasta.rs @@ -75,6 +75,7 @@ pub struct PastaNetwork { pid_file: Option, loopback_ip: Option, // Unique loopback IP for port forwarding (127.x.y.z) holder_pid: Option, // Namespace PID (set in post_start) + restore_mode: bool, // Skip port probe in post_start (VM not loaded yet) } impl PastaNetwork { @@ -90,6 +91,7 @@ impl PastaNetwork { pid_file: None, loopback_ip: None, holder_pid: None, + restore_mode: false, } } @@ -106,6 +108,20 @@ impl PastaNetwork { self } + /// Skip port forwarding probe in post_start() for snapshot restore. + /// + /// During snapshot restore, post_start() runs BEFORE the VM snapshot is loaded + /// into Firecracker. Probing ports at that point forces pasta to attempt L2 + /// forwarding to a non-existent guest, which can poison pasta's internal + /// connection tracking and cause subsequent data-bearing connections to fail + /// (TCP connect succeeds but 0 bytes returned). The proper verification happens + /// later via verify_port_forwarding() after the VM is resumed and fc-agent has + /// sent its gratuitous ARP. + pub fn with_restore_mode(mut self) -> Self { + self.restore_mode = true; + self + } + /// Get the loopback IP assigned to this VM for port forwarding pub fn loopback_ip(&self) -> Option<&str> { self.loopback_ip.as_deref() @@ -604,7 +620,13 @@ impl NetworkManager for PastaNetwork { // The PID file only means pasta spawned, not that ports are bound. // Health checks use nsenter (bridge path), so without this check // "healthy" doesn't mean port forwarding works. - if !self.port_mappings.is_empty() { + // + // Skip in restore mode: during snapshot restore, post_start() runs BEFORE + // the VM snapshot is loaded. Probing ports now forces pasta to attempt L2 + // forwarding to a non-existent guest, poisoning its connection state and + // causing subsequent connections to return 0 bytes. The port check happens + // later via verify_port_forwarding() after the VM is actually running. + if !self.restore_mode && !self.port_mappings.is_empty() { self.wait_for_port_forwarding().await?; }