From dd5b5e9cfab4cc41f5afe6a1c23eb895e805c125 Mon Sep 17 00:00:00 2001 From: kian woon Date: Thu, 2 Apr 2026 23:18:57 +0800 Subject: [PATCH] fix(metrics): resolve 3 GUI stats accuracy bugs (#133) - Cache hit rate no longer excludes zero-cache requests from denominator (was inflating 40% real rate to ~78%) - avgTokensPerSec now filters out timed-out/aborted entries (0 tok/s), matching per-model stats behavior - avgCacheHitRate now uses same 1000-entry window as token totals, eliminating scope mismatch with the 50-entry recent requests window --- src/metrics.ts | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/src/metrics.ts b/src/metrics.ts index 2ea1a2b..23e341f 100644 --- a/src/metrics.ts +++ b/src/metrics.ts @@ -26,6 +26,7 @@ export class MetricsStore { private _totalInputTokens = 0; private _totalOutputTokens = 0; private _totalTokensPerSec = 0; + private _nonZeroTpsCount = 0; // count of entries with tokensPerSec > 0 private _totalCacheReadTokens = 0; private _totalCacheCreationTokens = 0; private _modelMap = new Map(); @@ -92,6 +93,7 @@ export class MetricsStore { this._totalInputTokens -= evicted.inputTokens ?? 0; this._totalOutputTokens -= evicted.outputTokens ?? 0; this._totalTokensPerSec -= evicted.tokensPerSec ?? 0; + if ((evicted.tokensPerSec ?? 0) > 0) this._nonZeroTpsCount--; this._totalCacheReadTokens -= evicted.cacheReadTokens ?? 0; this._totalCacheCreationTokens -= evicted.cacheCreationTokens ?? 0; @@ -153,6 +155,7 @@ export class MetricsStore { this._totalInputTokens += metrics.inputTokens ?? 0; this._totalOutputTokens += metrics.outputTokens ?? 0; this._totalTokensPerSec += metrics.tokensPerSec ?? 0; + if ((metrics.tokensPerSec ?? 0) > 0) this._nonZeroTpsCount++; this._totalCacheReadTokens += metrics.cacheReadTokens ?? 0; this._totalCacheCreationTokens += metrics.cacheCreationTokens ?? 0; @@ -240,26 +243,21 @@ export class MetricsStore { .map(([provider, count]) => ({ provider, count })) .sort((a, b) => b.count - a.count); - // Compute average cache hit rate across all requests with cache data - let cacheHitRateSum = 0; - let cacheHitRateCount = 0; - for (const r of requests) { - const totalInput = (r.inputTokens ?? 0) + (r.cacheReadTokens ?? 0) + (r.cacheCreationTokens ?? 0); - if (totalInput > 0 && (r.cacheReadTokens ?? 0) > 0) { - cacheHitRateSum += (r.cacheReadTokens! / totalInput) * 100; - cacheHitRateCount++; - } - } + // Compute average cache hit rate using running counters (same 1000-entry window as token totals) + const totalInputAll = this._totalInputTokens + this._totalCacheReadTokens + this._totalCacheCreationTokens; + const avgCacheHitRate = totalInputAll > 0 + ? Math.round((this._totalCacheReadTokens / totalInputAll) * 1000) / 10 + : 0; // getRecentRequests() already caps at WS_RECENT_REQUESTS_CAP return { totalRequests: this._lifetimeRequests, totalInputTokens: this._totalInputTokens, totalOutputTokens: this._totalOutputTokens, - avgTokensPerSec: this.count > 0 ? Math.round((this._totalTokensPerSec / this.count) * 10) / 10 : 0, + avgTokensPerSec: this._nonZeroTpsCount > 0 ? Math.round((this._totalTokensPerSec / this._nonZeroTpsCount) * 10) / 10 : 0, totalCacheReadTokens: this._totalCacheReadTokens, totalCacheCreationTokens: this._totalCacheCreationTokens, - avgCacheHitRate: cacheHitRateCount > 0 ? Math.round((cacheHitRateSum / cacheHitRateCount) * 10) / 10 : 0, + avgCacheHitRate, activeModels, providerDistribution, recentRequests: requests, @@ -314,17 +312,13 @@ export class MetricsStore { ? Math.round((tokSecEntries.reduce((s, e) => s + e.tokensPerSec, 0) / tokSecEntries.length) * 10) / 10 : 0; - // Cache hit rate - let cacheHitSum = 0; - let cacheHitCount = 0; - for (const e of entries) { - const totalInput = (e.inputTokens ?? 0) + (e.cacheReadTokens ?? 0) + (e.cacheCreationTokens ?? 0); - if (totalInput > 0 && (e.cacheReadTokens ?? 0) > 0) { - cacheHitSum += (e.cacheReadTokens! / totalInput) * 100; - cacheHitCount++; - } - } - const avgCacheHitRate = cacheHitCount > 0 ? Math.round((cacheHitSum / cacheHitCount) * 10) / 10 : 0; + // Cache hit rate — sum-based (includes zero-cache requests in denominator) + const totalInputForModel = entries.reduce((s, e) => + s + (e.inputTokens ?? 0) + (e.cacheReadTokens ?? 0) + (e.cacheCreationTokens ?? 0), 0); + const totalCacheReadForModel = entries.reduce((s, e) => s + (e.cacheReadTokens ?? 0), 0); + const avgCacheHitRate = totalInputForModel > 0 + ? Math.round((totalCacheReadForModel / totalInputForModel) * 1000) / 10 + : 0; // Provider breakdown const providerGroups = new Map();