From f4cd2ad6908c74df921b26769bdd481bbdeab584 Mon Sep 17 00:00:00 2001 From: Valentino Hudhra Date: Sat, 21 Feb 2026 22:39:44 +0000 Subject: [PATCH 1/2] Fix excessive Redis log spam when Redis is unavailable Replace per-event error logging with state-based transition logging. A redisAvailable flag tracks connectivity so get/set short-circuit silently when Redis is down, and only one log line is emitted per state change instead of per retry and per request. --- services/cache/kv.ts | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/services/cache/kv.ts b/services/cache/kv.ts index 7967e1cad..9929c0565 100644 --- a/services/cache/kv.ts +++ b/services/cache/kv.ts @@ -10,6 +10,9 @@ const REDIS_URL = process.env.REDIS_URL; // Create Redis client only if REDIS_URL is provided let redis: ReturnType | null = null; +/** Tracks whether Redis is currently reachable — used to suppress per-request error spam. */ +let redisAvailable = false; + if (REDIS_URL) { redis = createClient({ url: REDIS_URL, @@ -29,13 +32,29 @@ if (REDIS_URL) { }, }); - redis.on('error', (err) => { - logger.error('Redis Client Error', err); + // Log state transitions once instead of every error event + redis.on('error', () => { + if (redisAvailable) { + redisAvailable = false; + logger.warn( + 'Redis connection lost — cache operations will fall through to DB', + ); + } + }); + + redis.on('ready', () => { + if (!redisAvailable) { + redisAvailable = true; + logger.info('Redis connected'); + } }); // Connect to Redis if (!redis.isOpen) { - redis.connect().catch(console.error); + redis.connect().catch(() => { + // Intentionally swallowed — the 'error' event handler above already + // logs the transition to unavailable state once. + }); } } @@ -205,7 +224,7 @@ export const invalidateMultiple = async ({ }; export const get = async (key: string) => { - if (!redis) { + if (!redis || !redisAvailable) { return null; } @@ -218,7 +237,6 @@ export const get = async (key: string) => { return null; } catch (e) { - logger.error('CACHE: error getting from Redis', { error: e }); cacheMetrics.recordError('get'); return null; @@ -228,7 +246,7 @@ export const get = async (key: string) => { // const DEFAULT_TTL = 3600 * 24 * 30; // 3600 * 24 = 1 day const DEFAULT_TTL = 3600; // short TTL for testing export const set = async (key: string, data: unknown, ttl?: number) => { - if (!redis) { + if (!redis || !redisAvailable) { return; } @@ -240,7 +258,6 @@ export const set = async (key: string, data: unknown, ttl?: number) => { await redis.setEx(key, ttl || DEFAULT_TTL, serializedData); } } catch (e) { - logger.error('CACHE: error setting to Redis', { error: e }); cacheMetrics.recordError('set'); } }; From 36c1471e634a95da6dfd815f4c0fd0a56ce35cc1 Mon Sep 17 00:00:00 2001 From: Valentino Hudhra Date: Sat, 21 Feb 2026 23:23:52 +0000 Subject: [PATCH 2/2] Remove reconnect retry cap so Redis can recover after outage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous cap of 3 retries meant Redis was permanently disabled for the process lifetime after any outage. With the redisAvailable guard already short-circuiting get/set, retries cost nothing — just backoff up to 30s so the ready event can fire when Redis comes back. --- services/cache/kv.ts | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/services/cache/kv.ts b/services/cache/kv.ts index 9929c0565..0856e0e36 100644 --- a/services/cache/kv.ts +++ b/services/cache/kv.ts @@ -21,13 +21,10 @@ if (REDIS_URL) { connectTimeout: 10_000, keepAlive: false, // TCP keepalive reconnectStrategy: (retries) => { - if (retries > 3) { - return false; - } - - const jitter = Math.floor(Math.random() * 100); - - return Math.min(retries * 500, 5_000) + jitter; + // Exponential backoff capped at 30s so we can recover when Redis comes back. + // The redisAvailable guard ensures zero per-request overhead while disconnected. + const jitter = Math.floor(Math.random() * 200); + return Math.min(retries * 500, 30_000) + jitter; }, }, });