From 3fb3707bb9e750ba5e66883ba1138b9e3f650062 Mon Sep 17 00:00:00 2001 From: Callan Barrett Date: Sat, 7 Feb 2026 15:28:25 +0800 Subject: [PATCH] fix: prevent stale WebSocket handlers from corrupting reconnection state Three bugs caused the app to get stuck in permanent "reconnecting" state after resuming from background: 1. createWebSocket() replaced this.ws without cleaning up old handlers. Stale onclose callbacks would fire on the transport, scheduling duplicate reconnection timers and leaking WebSocket instances. 2. pauseHeartbeat() didn't clear reconnectTimer, allowing background reconnection loops when the WebSocket died while the app was backgrounded. 3. Pong timeout called ws.close() then handleDisconnection(), but the subsequent onclose event triggered handleDisconnection() again, creating parallel reconnection paths. Extract closeWebSocket() helper to null handlers before closing, reused in createWebSocket(), cleanup(), and pong timeout. --- .../WebSocketTransport.lifecycle.test.ts | 203 ++++++++++++++++++ src/lib/transport/WebSocketTransport.ts | 53 +++-- 2 files changed, 237 insertions(+), 19 deletions(-) diff --git a/src/__tests__/unit/lib/transport/WebSocketTransport.lifecycle.test.ts b/src/__tests__/unit/lib/transport/WebSocketTransport.lifecycle.test.ts index 78f1afc..6317dcd 100644 --- a/src/__tests__/unit/lib/transport/WebSocketTransport.lifecycle.test.ts +++ b/src/__tests__/unit/lib/transport/WebSocketTransport.lifecycle.test.ts @@ -799,4 +799,207 @@ describe("WebSocketTransport lifecycle", () => { transport.destroy(); }); }); + + describe("stale WebSocket cleanup", () => { + it("should null old WebSocket handlers when createWebSocket creates a new instance", () => { + const transport = new WebSocketTransport({ + deviceId: "test-device", + url: "ws://localhost:7497", + reconnectInterval: 100, + }); + + transport.connect(); + const ws1 = MockWebSocket.getLatest()!; + ws1.simulateOpen(); + + // Disconnect to trigger reconnect + ws1.simulateClose(); + + // Wait for reconnect to create a new WebSocket + vi.advanceTimersByTime(100); + + const ws2 = MockWebSocket.getLatest()!; + expect(ws2).not.toBe(ws1); + + // Old WebSocket handlers should be nulled + expect(ws1.onopen).toBeNull(); + expect(ws1.onclose).toBeNull(); + expect(ws1.onerror).toBeNull(); + expect(ws1.onmessage).toBeNull(); + + transport.destroy(); + }); + + it("should not create parallel WebSockets from repeated reconnection failures", () => { + const transport = new WebSocketTransport({ + deviceId: "test-device", + url: "ws://localhost:7497", + reconnectInterval: 100, + }); + + transport.connect(); + const ws1 = MockWebSocket.getLatest()!; + ws1.simulateOpen(); + + // Simulate connection drop + ws1.simulateClose(); + + // Each reconnect attempt should only create one new WebSocket + for (let i = 0; i < 5; i++) { + vi.advanceTimersByTime(100); + const ws = MockWebSocket.getLatest()!; + // Fail the connection + ws.simulateClose(); + } + + // initial(1) + 5 created in loop iterations = 6 total + // The key assertion: no parallel WebSockets from duplicate timers + // Each iteration should create exactly one new WebSocket + expect(MockWebSocket.instances.length).toBe(6); + + // All old instances should have nulled handlers + for (let i = 0; i < MockWebSocket.instances.length - 1; i++) { + expect(MockWebSocket.instances[i]!.onopen).toBeNull(); + expect(MockWebSocket.instances[i]!.onclose).toBeNull(); + } + + transport.destroy(); + }); + }); + + describe("pauseHeartbeat and reconnection", () => { + it("should clear reconnectTimer when pauseHeartbeat is called", () => { + const transport = new WebSocketTransport({ + deviceId: "test-device", + url: "ws://localhost:7497", + reconnectInterval: 2000, + }); + + transport.connect(); + const ws = MockWebSocket.getLatest()!; + ws.simulateOpen(); + + // Disconnect - this schedules a reconnect timer + ws.simulateClose(); + expect(transport.state).toBe("reconnecting"); + + const instancesAfterClose = MockWebSocket.instances.length; + + // Pause heartbeat (app goes to background) - should clear reconnect timer + transport.pauseHeartbeat(); + + // Advance past the reconnect interval + vi.advanceTimersByTime(3000); + + // No new WebSocket should have been created + expect(MockWebSocket.instances.length).toBe(instancesAfterClose); + + transport.destroy(); + }); + + it("should block scheduleReconnect when heartbeatPaused is true", () => { + const transport = new WebSocketTransport({ + deviceId: "test-device", + url: "ws://localhost:7497", + reconnectInterval: 100, + }); + + transport.connect(); + const ws = MockWebSocket.getLatest()!; + ws.simulateOpen(); + + // Pause heartbeat first (simulating app going to background) + transport.pauseHeartbeat(); + + const instancesBeforeClose = MockWebSocket.instances.length; + + // Now the WebSocket dies while paused + // Manually trigger onclose (simulating network drop in background) + ws.simulateClose(); + + // Advance time well past reconnect interval + vi.advanceTimersByTime(1000); + + // No new WebSocket should have been created because heartbeat is paused + expect(MockWebSocket.instances.length).toBe(instancesBeforeClose); + + transport.destroy(); + }); + + it("should reconnect successfully after pause, WS death, and resume", () => { + const transport = new WebSocketTransport({ + deviceId: "test-device", + url: "ws://localhost:7497", + reconnectInterval: 100, + }); + + transport.connect(); + const ws1 = MockWebSocket.getLatest()!; + ws1.simulateOpen(); + expect(transport.state).toBe("connected"); + + // App goes to background + transport.pauseHeartbeat(); + + // WebSocket dies while backgrounded + ws1.simulateClose(); + + // Advance time - no reconnection should happen + vi.advanceTimersByTime(500); + const instancesWhilePaused = MockWebSocket.instances.length; + expect(instancesWhilePaused).toBe(1); // Still just the original + + // App comes to foreground - immediateReconnect triggers + transport.immediateReconnect(); + + // Advance past the 500ms delay + vi.advanceTimersByTime(500); + + // Should have created a new WebSocket + expect(MockWebSocket.instances.length).toBe(2); + + // New connection succeeds + const ws2 = MockWebSocket.getLatest()!; + ws2.simulateOpen(); + expect(transport.state).toBe("connected"); + + transport.destroy(); + }); + }); + + describe("pong timeout", () => { + it("should not create duplicate reconnections from pong timeout", () => { + const transport = new WebSocketTransport({ + deviceId: "test-device", + url: "ws://localhost:7497", + pingInterval: 1000, + pongTimeout: 500, + reconnectInterval: 100, + }); + + transport.connect(); + const ws1 = MockWebSocket.getLatest()!; + ws1.simulateOpen(); + + // Trigger ping + vi.advanceTimersByTime(1000); + expect(ws1.sentMessages).toContain("ping"); + + // Let pong timeout fire (closes WS and triggers handleDisconnection) + vi.advanceTimersByTime(500); + + // Old WS handlers should be nulled (closeWebSocket was called) + expect(ws1.onclose).toBeNull(); + expect(ws1.onopen).toBeNull(); + + // Wait for reconnect + vi.advanceTimersByTime(100); + + // Should have exactly 2 WebSocket instances (original + 1 reconnect) + // Not 3 (which would happen if onclose also triggered a separate reconnection) + expect(MockWebSocket.instances.length).toBe(2); + + transport.destroy(); + }); + }); }); diff --git a/src/lib/transport/WebSocketTransport.ts b/src/lib/transport/WebSocketTransport.ts index d268d57..ba577ec 100644 --- a/src/lib/transport/WebSocketTransport.ts +++ b/src/lib/transport/WebSocketTransport.ts @@ -188,7 +188,12 @@ export class WebSocketTransport implements Transport { this.heartbeatPaused = true; this.heartReset(); - // Cancel any pending immediate reconnect to prevent connections opening in background + // Cancel any pending reconnect timers to prevent connections opening in background + if (this.reconnectTimer) { + clearTimeout(this.reconnectTimer); + this.reconnectTimer = undefined; + } + if (this.immediateReconnectTimer) { clearTimeout(this.immediateReconnectTimer); this.immediateReconnectTimer = undefined; @@ -215,7 +220,26 @@ export class WebSocketTransport implements Transport { } } + private closeWebSocket(): void { + if (this.ws) { + this.ws.onopen = null; + this.ws.onclose = null; + this.ws.onerror = null; + this.ws.onmessage = null; + if ( + this.ws.readyState === WebSocket.OPEN || + this.ws.readyState === WebSocket.CONNECTING + ) { + this.ws.close(); + } + this.ws = null; + } + } + private createWebSocket(): void { + // Close any existing WebSocket to prevent stale handlers from corrupting state + this.closeWebSocket(); + try { this.ws = new WebSocket(this.config.url); this.setupEventHandlers(); @@ -311,6 +335,11 @@ export class WebSocketTransport implements Transport { } private scheduleReconnect(): void { + // Don't schedule reconnects while heartbeat is paused (app backgrounded) + if (this.heartbeatPaused) { + return; + } + if ( this.isDestroyed || this.reconnectAttempts >= this.config.maxReconnectAttempts @@ -388,10 +417,9 @@ export class WebSocketTransport implements Transport { logger.warn( `[Transport:${this.deviceId}] Pong timeout - forcing disconnect`, ); - // Close the WebSocket - this.ws?.close(); - // Force disconnect handling immediately, don't wait for onclose event - // This ensures we detect dead connections even if browser doesn't fire onclose + // Close and null handlers before triggering disconnect to prevent + // onclose from firing a duplicate handleDisconnection() + this.closeWebSocket(); this.stopHeartbeat(); this.handleDisconnection(); }, this.config.pongTimeout); @@ -451,19 +479,6 @@ export class WebSocketTransport implements Transport { this.immediateReconnectTimer = undefined; } - if (this.ws) { - this.ws.onopen = null; - this.ws.onclose = null; - this.ws.onerror = null; - this.ws.onmessage = null; - - if ( - this.ws.readyState === WebSocket.OPEN || - this.ws.readyState === WebSocket.CONNECTING - ) { - this.ws.close(); - } - this.ws = null; - } + this.closeWebSocket(); } }