From 6f8df61d04d91cd49432f878e44777688c26fba3 Mon Sep 17 00:00:00 2001 From: rlaope Date: Sun, 12 Apr 2026 21:52:42 +0900 Subject: [PATCH] feat: real-time GC rate & leak monitoring in server (#114) Add live allocation rate, promotion rate, and memory leak detection to the server-side GCAnalyzer: - GcMetricsComputer: computes allocation/promotion rates and leak detection from recent GC summaries using simplified linear regression - GCAnalyzer: integrates periodic rate/leak computation, exposes via GCAnalysisResult - REST API: new fields in GC analysis JSON response Signed-off-by: rlaope --- .../io/argus/server/analysis/GCAnalyzer.java | 27 ++- .../server/analysis/GcMetricsComputer.java | 175 ++++++++++++++++++ .../server/handler/ArgusChannelHandler.java | 4 + .../analysis/GcMetricsComputerTest.java | 163 ++++++++++++++++ 4 files changed, 367 insertions(+), 2 deletions(-) create mode 100644 argus-server/src/main/java/io/argus/server/analysis/GcMetricsComputer.java create mode 100644 argus-server/src/test/java/io/argus/server/analysis/GcMetricsComputerTest.java diff --git a/argus-server/src/main/java/io/argus/server/analysis/GCAnalyzer.java b/argus-server/src/main/java/io/argus/server/analysis/GCAnalyzer.java index e1292b8..0790b07 100644 --- a/argus-server/src/main/java/io/argus/server/analysis/GCAnalyzer.java +++ b/argus-server/src/main/java/io/argus/server/analysis/GCAnalyzer.java @@ -127,6 +127,11 @@ public GCAnalysisResult getAnalysis() { // GC overhead warning if > 10% boolean overheadWarning = currentGcOverheadPercent > 10.0; + // Compute real-time rate and leak metrics from all recorded events + List allSummaries = new ArrayList<>(recentGCs); + GcMetricsComputer.RateMetrics rates = GcMetricsComputer.computeRates(allSummaries); + GcMetricsComputer.LeakMetrics leak = GcMetricsComputer.detectLeak(allSummaries); + return new GCAnalysisResult( total, totalPause / 1_000_000, // Convert to ms @@ -138,7 +143,11 @@ public GCAnalysisResult getAnalysis() { lastHeapCommitted, lastGCTime, currentGcOverheadPercent, - overheadWarning + overheadWarning, + rates.allocationRateKBPerSec(), + rates.promotionRateKBPerSec(), + leak.leakSuspected(), + leak.confidencePercent() ); } @@ -256,12 +265,18 @@ public static final class GCAnalysisResult { private final Instant lastGCTime; private final double gcOverheadPercent; private final boolean isOverheadWarning; + private final double allocationRateKBPerSec; + private final double promotionRateKBPerSec; + private final boolean leakSuspected; + private final double leakConfidencePercent; public GCAnalysisResult(long totalGCEvents, long totalPauseTimeMs, double avgPauseTimeMs, long maxPauseTimeMs, List recentGCs, Map causeDistribution, long currentHeapUsed, long currentHeapCommitted, Instant lastGCTime, - double gcOverheadPercent, boolean isOverheadWarning) { + double gcOverheadPercent, boolean isOverheadWarning, + double allocationRateKBPerSec, double promotionRateKBPerSec, + boolean leakSuspected, double leakConfidencePercent) { this.totalGCEvents = totalGCEvents; this.totalPauseTimeMs = totalPauseTimeMs; this.avgPauseTimeMs = avgPauseTimeMs; @@ -273,6 +288,10 @@ public GCAnalysisResult(long totalGCEvents, long totalPauseTimeMs, double avgPau this.lastGCTime = lastGCTime; this.gcOverheadPercent = gcOverheadPercent; this.isOverheadWarning = isOverheadWarning; + this.allocationRateKBPerSec = allocationRateKBPerSec; + this.promotionRateKBPerSec = promotionRateKBPerSec; + this.leakSuspected = leakSuspected; + this.leakConfidencePercent = leakConfidencePercent; } public long totalGCEvents() { return totalGCEvents; } @@ -286,5 +305,9 @@ public GCAnalysisResult(long totalGCEvents, long totalPauseTimeMs, double avgPau public Instant lastGCTime() { return lastGCTime; } public double gcOverheadPercent() { return gcOverheadPercent; } public boolean isOverheadWarning() { return isOverheadWarning; } + public double allocationRateKBPerSec() { return allocationRateKBPerSec; } + public double promotionRateKBPerSec() { return promotionRateKBPerSec; } + public boolean leakSuspected() { return leakSuspected; } + public double leakConfidencePercent() { return leakConfidencePercent; } } } diff --git a/argus-server/src/main/java/io/argus/server/analysis/GcMetricsComputer.java b/argus-server/src/main/java/io/argus/server/analysis/GcMetricsComputer.java new file mode 100644 index 0000000..456e494 --- /dev/null +++ b/argus-server/src/main/java/io/argus/server/analysis/GcMetricsComputer.java @@ -0,0 +1,175 @@ +package io.argus.server.analysis; + +import java.util.List; + +/** + * Computes allocation rate, promotion rate, and leak detection metrics + * from recent GC summaries, using simplified versions of the CLI gclog algorithms. + * + *

Designed for real-time monitoring with small event windows (last 100 events). + */ +public final class GcMetricsComputer { + + private static final int RATE_WINDOW = 20; + private static final int LEAK_WINDOW = 50; + private static final double LEAK_R2_THRESHOLD = 0.7; + + private GcMetricsComputer() {} + + /** + * Computes GC rate metrics from recent GC summaries. + * + *

Allocation rate: average of (heapUsedBefore[i] - heapUsedAfter[i-1]) / timeDelta + * over the last {@value #RATE_WINDOW} consecutive event pairs. + * + *

Promotion rate: average of positive (heapUsedAfter[i] - heapUsedAfter[i-1]) / timeDelta + * over the same window. + * + * @param summaries ordered list of GC summaries (oldest first) + * @return computed rate metrics + */ + public static RateMetrics computeRates(List summaries) { + if (summaries.size() < 2) { + return new RateMetrics(0.0, 0.0); + } + + // Use last RATE_WINDOW events + int start = Math.max(0, summaries.size() - RATE_WINDOW); + List window = summaries.subList(start, summaries.size()); + + double allocSum = 0; + int allocCount = 0; + double promoSum = 0; + int promoCount = 0; + + for (int i = 1; i < window.size(); i++) { + GCAnalyzer.GCSummary prev = window.get(i - 1); + GCAnalyzer.GCSummary cur = window.get(i); + + double timeDeltaSec = (cur.timestamp().toEpochMilli() - prev.timestamp().toEpochMilli()) / 1000.0; + if (timeDeltaSec <= 0) { + continue; + } + + // Allocation rate: bytes allocated between GCs, converted to KB/s + long heapBeforeBytes = cur.heapUsedBefore(); + long prevHeapAfterBytes = prev.heapUsedAfter(); + long allocatedBytes = heapBeforeBytes - prevHeapAfterBytes; + if (allocatedBytes > 0) { + double allocKBPerSec = (allocatedBytes / 1024.0) / timeDeltaSec; + allocSum += allocKBPerSec; + allocCount++; + } + + // Promotion rate: positive growth in heap-after-GC between consecutive events + long promotedBytes = cur.heapUsedAfter() - prev.heapUsedAfter(); + if (promotedBytes > 0) { + double promoKBPerSec = (promotedBytes / 1024.0) / timeDeltaSec; + promoSum += promoKBPerSec; + promoCount++; + } + } + + double allocationRate = allocCount > 0 ? allocSum / allocCount : 0.0; + double promotionRate = promoCount > 0 ? promoSum / promoCount : 0.0; + + return new RateMetrics(allocationRate, promotionRate); + } + + /** + * Detects a memory leak by performing linear regression on the last {@value #LEAK_WINDOW} + * heap-after-GC values. R² > {@value #LEAK_R2_THRESHOLD} with a positive slope is + * considered a leak. + * + * @param summaries ordered list of GC summaries (oldest first) + * @return computed leak metrics + */ + public static LeakMetrics detectLeak(List summaries) { + if (summaries.size() < 3) { + return new LeakMetrics(false, 0.0); + } + + // Use last LEAK_WINDOW events + int start = Math.max(0, summaries.size() - LEAK_WINDOW); + List window = summaries.subList(start, summaries.size()); + + // Filter to events with valid heap-after data + List valid = window.stream() + .filter(s -> s.heapUsedAfter() > 0) + .toList(); + + if (valid.size() < 3) { + return new LeakMetrics(false, 0.0); + } + + int n = valid.size(); + // x = index, y = heapUsedAfter in KB + double sumX = 0, sumY = 0, sumXY = 0, sumX2 = 0; + for (int i = 0; i < n; i++) { + double x = i; + double y = valid.get(i).heapUsedAfter() / 1024.0; + sumX += x; + sumY += y; + sumXY += x * y; + sumX2 += x * x; + } + + double denom = n * sumX2 - sumX * sumX; + if (denom == 0) { + return new LeakMetrics(false, 0.0); + } + + double slope = (n * sumXY - sumX * sumY) / denom; + double intercept = (sumY - slope * sumX) / n; + + // R² calculation + double meanY = sumY / n; + double ssTot = 0, ssRes = 0; + for (int i = 0; i < n; i++) { + double x = i; + double y = valid.get(i).heapUsedAfter() / 1024.0; + double predicted = slope * x + intercept; + ssRes += (y - predicted) * (y - predicted); + ssTot += (y - meanY) * (y - meanY); + } + + double r2 = ssTot == 0 ? 0.0 : Math.max(0.0, Math.min(1.0, 1.0 - ssRes / ssTot)); + + boolean leakSuspected = slope > 0 && r2 > LEAK_R2_THRESHOLD; + double confidencePercent = r2 * 100.0; + + return new LeakMetrics(leakSuspected, confidencePercent); + } + + /** + * Allocation and promotion rate metrics. + */ + public static final class RateMetrics { + private final double allocationRateKBPerSec; + private final double promotionRateKBPerSec; + + public RateMetrics(double allocationRateKBPerSec, double promotionRateKBPerSec) { + this.allocationRateKBPerSec = allocationRateKBPerSec; + this.promotionRateKBPerSec = promotionRateKBPerSec; + } + + public double allocationRateKBPerSec() { return allocationRateKBPerSec; } + public double promotionRateKBPerSec() { return promotionRateKBPerSec; } + } + + /** + * Leak detection metrics. + */ + public static final class LeakMetrics { + private final boolean leakSuspected; + private final double confidencePercent; + + public LeakMetrics(boolean leakSuspected, double confidencePercent) { + this.leakSuspected = leakSuspected; + this.confidencePercent = confidencePercent; + } + + public boolean leakSuspected() { return leakSuspected; } + public double confidencePercent() { return confidencePercent; } + } +} diff --git a/argus-server/src/main/java/io/argus/server/handler/ArgusChannelHandler.java b/argus-server/src/main/java/io/argus/server/handler/ArgusChannelHandler.java index 56282e6..864c2ce 100644 --- a/argus-server/src/main/java/io/argus/server/handler/ArgusChannelHandler.java +++ b/argus-server/src/main/java/io/argus/server/handler/ArgusChannelHandler.java @@ -570,6 +570,10 @@ private void handleGCAnalysis(ChannelHandlerContext ctx, FullHttpRequest request sb.append("\"currentHeapCommitted\":").append(analysis.currentHeapCommitted()).append(","); sb.append("\"gcOverheadPercent\":").append(String.format("%.2f", analysis.gcOverheadPercent())).append(","); sb.append("\"isOverheadWarning\":").append(analysis.isOverheadWarning()).append(","); + sb.append("\"allocationRateKBPerSec\":").append(String.format("%.2f", analysis.allocationRateKBPerSec())).append(","); + sb.append("\"promotionRateKBPerSec\":").append(String.format("%.2f", analysis.promotionRateKBPerSec())).append(","); + sb.append("\"leakSuspected\":").append(analysis.leakSuspected()).append(","); + sb.append("\"leakConfidencePercent\":").append(String.format("%.2f", analysis.leakConfidencePercent())).append(","); if (analysis.lastGCTime() != null) { sb.append("\"lastGCTime\":\"").append(analysis.lastGCTime()).append("\","); diff --git a/argus-server/src/test/java/io/argus/server/analysis/GcMetricsComputerTest.java b/argus-server/src/test/java/io/argus/server/analysis/GcMetricsComputerTest.java new file mode 100644 index 0000000..a0cfaff --- /dev/null +++ b/argus-server/src/test/java/io/argus/server/analysis/GcMetricsComputerTest.java @@ -0,0 +1,163 @@ +package io.argus.server.analysis; + +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +class GcMetricsComputerTest { + + /** Build a GCSummary with epoch-second timestamps for predictable math. */ + private static GCAnalyzer.GCSummary summary(long timestampEpochSec, + long heapUsedBeforeBytes, + long heapUsedAfterBytes) { + return new GCAnalyzer.GCSummary( + Instant.ofEpochSecond(timestampEpochSec), + "G1 Young Generation", + "G1 Evacuation Pause", + 10.0, + heapUsedBeforeBytes, + heapUsedAfterBytes, + heapUsedBeforeBytes - heapUsedAfterBytes + ); + } + + // ── computeRates ───────────────────────────────────────────────────────── + + @Test + void emptyList_returnsZeroRates() { + GcMetricsComputer.RateMetrics m = GcMetricsComputer.computeRates(List.of()); + assertEquals(0.0, m.allocationRateKBPerSec(), 0.001); + assertEquals(0.0, m.promotionRateKBPerSec(), 0.001); + } + + @Test + void singleEvent_returnsZeroRates() { + GcMetricsComputer.RateMetrics m = GcMetricsComputer.computeRates( + List.of(summary(0, 2 * 1024 * 1024, 1 * 1024 * 1024)) + ); + assertEquals(0.0, m.allocationRateKBPerSec(), 0.001); + assertEquals(0.0, m.promotionRateKBPerSec(), 0.001); + } + + @Test + void steadyAllocation_computesCorrectRate() { + // Each second: heapBefore = 2 MB, heapAfter = 1 MB (prev). + // Allocated per interval = 2 MB - 1 MB = 1 MB = 1024 KB. + // Rate = 1024 KB / 1 s = 1024 KB/s. + long oneMB = 1024 * 1024L; + long twoMB = 2 * oneMB; + List summaries = List.of( + summary(0, twoMB, oneMB), + summary(1, twoMB, oneMB), + summary(2, twoMB, oneMB), + summary(3, twoMB, oneMB) + ); + GcMetricsComputer.RateMetrics m = GcMetricsComputer.computeRates(summaries); + // allocated = (2MB - 1MB) / 1s = 1024 KB/s + assertEquals(1024.0, m.allocationRateKBPerSec(), 5.0); + } + + @Test + void promotionDetected_heapAfterIncreasing() { + // heapAfter grows by 512 KB each second = 512 KB/s promotion rate. + long base = 1024 * 1024L; // 1 MB + long step = 512 * 1024L; // 512 KB + List summaries = List.of( + summary(0, base * 2, base), + summary(1, base * 2 + step, base + step), + summary(2, base * 2 + step * 2, base + step * 2), + summary(3, base * 2 + step * 3, base + step * 3) + ); + GcMetricsComputer.RateMetrics m = GcMetricsComputer.computeRates(summaries); + assertTrue(m.promotionRateKBPerSec() > 0, + "Expected positive promotion rate, got " + m.promotionRateKBPerSec()); + assertEquals(512.0, m.promotionRateKBPerSec(), 10.0); + } + + @Test + void stableHeapAfter_zeroPromotionRate() { + // heapAfter stays constant — no promotion. + long oneMB = 1024 * 1024L; + List summaries = List.of( + summary(0, 2 * oneMB, oneMB), + summary(1, 2 * oneMB, oneMB), + summary(2, 2 * oneMB, oneMB), + summary(3, 2 * oneMB, oneMB) + ); + GcMetricsComputer.RateMetrics m = GcMetricsComputer.computeRates(summaries); + assertEquals(0.0, m.promotionRateKBPerSec(), 0.001); + } + + // ── detectLeak ─────────────────────────────────────────────────────────── + + @Test + void insufficientEvents_noLeakSuspected() { + GcMetricsComputer.LeakMetrics m = GcMetricsComputer.detectLeak(List.of( + summary(0, 2 * 1024 * 1024, 1 * 1024 * 1024), + summary(1, 2 * 1024 * 1024, 1 * 1024 * 1024) + )); + assertFalse(m.leakSuspected()); + assertEquals(0.0, m.confidencePercent(), 0.001); + } + + @Test + void risingHeapTrend_leakSuspected() { + // heapAfter grows linearly: 1 MB, 2 MB, 3 MB, ... → clear upward trend. + List summaries = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + long heapAfter = (i + 1) * 1024 * 1024L; + summaries.add(summary(i, heapAfter + 512 * 1024L, heapAfter)); + } + GcMetricsComputer.LeakMetrics m = GcMetricsComputer.detectLeak(summaries); + assertTrue(m.leakSuspected(), "Expected leak to be suspected for linearly rising heap"); + assertTrue(m.confidencePercent() > 70.0, + "Expected high confidence, got " + m.confidencePercent()); + } + + @Test + void stableHeap_noLeakSuspected() { + // heapAfter oscillates slightly around 1 MB — not a leak. + List summaries = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + long heapAfter = 1024 * 1024L + (i % 3) * 10 * 1024L; // ±30 KB noise + summaries.add(summary(i, heapAfter + 512 * 1024L, heapAfter)); + } + GcMetricsComputer.LeakMetrics m = GcMetricsComputer.detectLeak(summaries); + assertFalse(m.leakSuspected(), "Expected no leak for stable heap"); + } + + @Test + void perfectLinearGrowth_highConfidence() { + // R² should be very close to 1.0 for perfect linear growth. + List summaries = new ArrayList<>(); + for (int i = 0; i < 30; i++) { + long heapAfter = (500 + i * 50) * 1024L; + summaries.add(summary(i, heapAfter + 200 * 1024L, heapAfter)); + } + GcMetricsComputer.LeakMetrics m = GcMetricsComputer.detectLeak(summaries); + assertTrue(m.leakSuspected()); + assertTrue(m.confidencePercent() > 95.0, + "Expected near-100% confidence for perfect linear growth, got " + m.confidencePercent()); + } + + @Test + void eventsWithZeroHeapAfter_ignoredInLeakDetection() { + // Some events have heapUsedAfter == 0 (heap-summary not yet merged). + // detectLeak should skip them and still work with valid events. + List summaries = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + summaries.add(summary(i * 2, 0, 0)); // invalid — no heap data + } + for (int i = 0; i < 15; i++) { + long heapAfter = (i + 1) * 1024 * 1024L; + summaries.add(summary(20 + i, heapAfter + 512 * 1024L, heapAfter)); + } + GcMetricsComputer.LeakMetrics m = GcMetricsComputer.detectLeak(summaries); + // Should still detect the leak from the valid events + assertTrue(m.leakSuspected(), "Expected leak detection despite zero-heap events"); + } +}