Skip to content

Commit 6cd830a

Browse files
committed
feat: dashboard doctor/leak/timeline/gclog pages + argus explain command (#135, #137, #138, #141, #144)
CLI: - ExplainCommand: 25+ JVM term/flag/concept explanations with fuzzy matching. argus explain "G1 Evacuation Pause" - explain.properties knowledge base Dashboard: - Doctor page: HealthScoreComputer with GET /api/doctor endpoint, health gauge (0-100), finding cards with severity colors - Leak detection panel: R² gauge, status badge, heap trend mini-chart, OOM countdown (always visible on main page) - GC timeline: Chart.js scatter plot with hover tooltips, color-coded by GC type (Young/Mixed/Full) - GC log analysis: upgraded from modal to rich result with summary cards, pause histogram, cause breakdown charts, recommendation cards with copy-to-clipboard flags All completions updated (bash/zsh/fish). i18n added (en/ko/ja/zh). Signed-off-by: rlaope <piyrw9754@gmail.com>
1 parent a2cfb18 commit 6cd830a

19 files changed

Lines changed: 1144 additions & 44 deletions

File tree

argus-cli/src/main/java/io/argus/cli/ArgusCli.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import io.argus.cli.command.DoctorCommand;
1515
import io.argus.cli.command.DynLibsCommand;
1616
import io.argus.cli.command.EnvCommand;
17+
import io.argus.cli.command.ExplainCommand;
1718
import io.argus.cli.command.FinalizerCommand;
1819
import io.argus.cli.command.FlameCommand;
1920
import io.argus.cli.command.WatchCommand;
@@ -206,6 +207,7 @@ public static void main(String[] args) {
206207
register(commands, new MBeanCommand());
207208
register(commands, new TopCommand());
208209
register(commands, new WatchCommand());
210+
register(commands, new ExplainCommand());
209211
register(commands, new TuiCommand(commands));
210212

211213
if (version) {
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
package io.argus.cli.command;
2+
3+
import io.argus.cli.config.CliConfig;
4+
import io.argus.cli.config.Messages;
5+
import io.argus.cli.provider.ProviderRegistry;
6+
import io.argus.cli.render.AnsiStyle;
7+
import io.argus.cli.render.RichRenderer;
8+
import io.argus.core.command.CommandGroup;
9+
10+
import java.io.IOException;
11+
import java.io.InputStream;
12+
import java.util.ArrayList;
13+
import java.util.List;
14+
import java.util.Properties;
15+
16+
/**
17+
* Explains JVM metrics, GC causes, and flags in plain English.
18+
*/
19+
public final class ExplainCommand implements Command {
20+
21+
private static final int WIDTH = RichRenderer.DEFAULT_WIDTH;
22+
23+
private static final Properties KB = loadKnowledgeBase();
24+
25+
private static Properties loadKnowledgeBase() {
26+
Properties p = new Properties();
27+
try (InputStream in = ExplainCommand.class.getResourceAsStream("/explain.properties")) {
28+
if (in != null) {
29+
p.load(in);
30+
}
31+
} catch (IOException e) {
32+
// empty knowledge base — explain will show "no match"
33+
}
34+
return p;
35+
}
36+
37+
@Override
38+
public String name() { return "explain"; }
39+
40+
@Override
41+
public CommandGroup group() { return CommandGroup.PROFILING; }
42+
43+
@Override
44+
public CommandMode mode() { return CommandMode.READ; }
45+
46+
@Override
47+
public String description(Messages messages) {
48+
return messages.get("cmd.explain.desc");
49+
}
50+
51+
@Override
52+
public void execute(String[] args, CliConfig config, ProviderRegistry registry, Messages messages) {
53+
if (args.length == 0) {
54+
System.err.println("Usage: argus explain <term>");
55+
System.err.println("Examples:");
56+
System.err.println(" argus explain \"G1 Evacuation Pause\"");
57+
System.err.println(" argus explain -XX:MaxGCPauseMillis");
58+
System.err.println(" argus explain throughput");
59+
System.err.println(" argus explain gc-overhead");
60+
return;
61+
}
62+
63+
boolean json = "json".equals(config.format());
64+
boolean useColor = config.color();
65+
66+
// Collect the query (join remaining non-option args)
67+
StringBuilder queryBuilder = new StringBuilder();
68+
for (String arg : args) {
69+
if (arg.equals("--format=json")) {
70+
json = true;
71+
} else if (!arg.startsWith("--")) {
72+
if (queryBuilder.length() > 0) queryBuilder.append(' ');
73+
queryBuilder.append(arg);
74+
}
75+
}
76+
77+
String query = queryBuilder.toString().trim();
78+
if (query.isEmpty()) {
79+
System.err.println("Usage: argus explain <term>");
80+
return;
81+
}
82+
83+
// Look up: exact match first, then fuzzy
84+
String exactKey = "explain." + query;
85+
String explanation = KB.getProperty(exactKey);
86+
String matchedTerm = query;
87+
88+
if (explanation == null) {
89+
// Fuzzy: find all keys whose suffix contains the query (case-insensitive)
90+
String lowerQuery = query.toLowerCase();
91+
List<String> fuzzyMatches = new ArrayList<>();
92+
for (String key : KB.stringPropertyNames()) {
93+
if (!key.startsWith("explain.")) continue;
94+
String term = key.substring("explain.".length());
95+
if (term.toLowerCase().contains(lowerQuery)) {
96+
fuzzyMatches.add(term);
97+
}
98+
}
99+
100+
if (fuzzyMatches.size() == 1) {
101+
matchedTerm = fuzzyMatches.get(0);
102+
explanation = KB.getProperty("explain." + matchedTerm);
103+
} else if (fuzzyMatches.size() > 1) {
104+
if (json) {
105+
printJsonSuggestions(query, fuzzyMatches);
106+
} else {
107+
printSuggestions(useColor, query, fuzzyMatches);
108+
}
109+
return;
110+
}
111+
}
112+
113+
if (explanation == null) {
114+
if (json) {
115+
System.out.println("{\"query\":\"" + RichRenderer.escapeJson(query)
116+
+ "\",\"found\":false,\"explanation\":null}");
117+
} else {
118+
System.out.println(RichRenderer.boxHeader(useColor, "explain", WIDTH, "\"" + query + "\""));
119+
System.out.println(RichRenderer.emptyLine(WIDTH));
120+
System.out.println(RichRenderer.boxLine(
121+
AnsiStyle.style(useColor, AnsiStyle.YELLOW) + "No explanation found for: " + query
122+
+ AnsiStyle.style(useColor, AnsiStyle.RESET), WIDTH));
123+
System.out.println(RichRenderer.emptyLine(WIDTH));
124+
System.out.println(RichRenderer.boxLine("Try: argus explain gc-overhead", WIDTH));
125+
System.out.println(RichRenderer.boxLine(" argus explain throughput", WIDTH));
126+
System.out.println(RichRenderer.boxLine(" argus explain \"G1 Evacuation Pause\"", WIDTH));
127+
System.out.println(RichRenderer.emptyLine(WIDTH));
128+
System.out.println(RichRenderer.boxFooter(useColor, null, WIDTH));
129+
}
130+
return;
131+
}
132+
133+
if (json) {
134+
printJson(query, matchedTerm, explanation);
135+
return;
136+
}
137+
138+
printExplanation(useColor, matchedTerm, explanation);
139+
}
140+
141+
private static void printExplanation(boolean useColor, String term, String explanation) {
142+
System.out.println(RichRenderer.boxHeader(useColor, "explain", WIDTH, "\"" + term + "\""));
143+
System.out.println(RichRenderer.emptyLine(WIDTH));
144+
145+
// Term name in bold
146+
String bold = AnsiStyle.style(useColor, AnsiStyle.BOLD);
147+
String reset = AnsiStyle.style(useColor, AnsiStyle.RESET);
148+
System.out.println(RichRenderer.boxLine(bold + term + reset, WIDTH));
149+
System.out.println(RichRenderer.emptyLine(WIDTH));
150+
151+
// Word-wrap explanation at (WIDTH - 4) characters
152+
int wrapAt = WIDTH - 4;
153+
for (String line : wordWrap(explanation, wrapAt)) {
154+
System.out.println(RichRenderer.boxLine(line, WIDTH));
155+
}
156+
157+
System.out.println(RichRenderer.emptyLine(WIDTH));
158+
System.out.println(RichRenderer.boxFooter(useColor, null, WIDTH));
159+
}
160+
161+
private static void printSuggestions(boolean useColor, String query, List<String> matches) {
162+
System.out.println(RichRenderer.boxHeader(useColor, "explain", WIDTH, "\"" + query + "\""));
163+
System.out.println(RichRenderer.emptyLine(WIDTH));
164+
System.out.println(RichRenderer.boxLine(
165+
AnsiStyle.style(useColor, AnsiStyle.YELLOW) + "Multiple matches found:"
166+
+ AnsiStyle.style(useColor, AnsiStyle.RESET), WIDTH));
167+
System.out.println(RichRenderer.emptyLine(WIDTH));
168+
for (String match : matches) {
169+
System.out.println(RichRenderer.boxLine(
170+
" " + AnsiStyle.style(useColor, AnsiStyle.CYAN) + match
171+
+ AnsiStyle.style(useColor, AnsiStyle.RESET), WIDTH));
172+
}
173+
System.out.println(RichRenderer.emptyLine(WIDTH));
174+
System.out.println(RichRenderer.boxLine("Use a more specific term to get a full explanation.", WIDTH));
175+
System.out.println(RichRenderer.emptyLine(WIDTH));
176+
System.out.println(RichRenderer.boxFooter(useColor, null, WIDTH));
177+
}
178+
179+
private static void printJson(String query, String term, String explanation) {
180+
System.out.println("{\"query\":\"" + RichRenderer.escapeJson(query)
181+
+ "\",\"found\":true"
182+
+ ",\"term\":\"" + RichRenderer.escapeJson(term) + "\""
183+
+ ",\"explanation\":\"" + RichRenderer.escapeJson(explanation) + "\"}");
184+
}
185+
186+
private static void printJsonSuggestions(String query, List<String> matches) {
187+
StringBuilder sb = new StringBuilder();
188+
sb.append("{\"query\":\"").append(RichRenderer.escapeJson(query))
189+
.append("\",\"found\":false,\"suggestions\":[");
190+
for (int i = 0; i < matches.size(); i++) {
191+
if (i > 0) sb.append(',');
192+
sb.append('"').append(RichRenderer.escapeJson(matches.get(i))).append('"');
193+
}
194+
sb.append("]}");
195+
System.out.println(sb);
196+
}
197+
198+
/** Splits text into lines no longer than maxWidth, breaking on spaces. */
199+
private static List<String> wordWrap(String text, int maxWidth) {
200+
List<String> lines = new ArrayList<>();
201+
String[] words = text.split(" ");
202+
StringBuilder current = new StringBuilder();
203+
for (String word : words) {
204+
if (current.length() == 0) {
205+
current.append(word);
206+
} else if (current.length() + 1 + word.length() <= maxWidth) {
207+
current.append(' ').append(word);
208+
} else {
209+
lines.add(current.toString());
210+
current = new StringBuilder(word);
211+
}
212+
}
213+
if (current.length() > 0) {
214+
lines.add(current.toString());
215+
}
216+
return lines;
217+
}
218+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# GC Causes
2+
explain.G1\ Evacuation\ Pause=Stop-the-world event where G1 copies live objects between regions. High pause times suggest too many objects surviving to old gen. Try: argus gcnew <pid> --age-histogram
3+
explain.Allocation\ Failure=Young gen ran out of space for new objects. The JVM triggered a Young GC to free memory. Frequent failures indicate the young gen is too small (-Xmn) or allocation rate is too high.
4+
explain.Metadata\ GC\ Threshold=Metaspace reached its threshold, triggering GC to unload classes. Check for ClassLoader leaks if this recurs. Set -XX:MaxMetaspaceSize explicitly.
5+
explain.Humongous\ Allocation=Object larger than half a G1 region was allocated directly to old gen, bypassing young gen. This fragments the heap. Increase region size: -XX:G1HeapRegionSize=16m
6+
explain.Full\ GC=Complete garbage collection scanning the entire heap. Stops all application threads. This is the most expensive GC event. Indicates heap pressure or promotion failure.
7+
explain.System.gc()=Application code called System.gc() explicitly. Consider -XX:+DisableExplicitGC to prevent this, or find and remove the call.
8+
explain.Ergonomics=JVM adaptive sizing policy triggered a GC. The JVM is adjusting heap regions to meet pause-time or throughput goals.
9+
explain.G1\ Humongous\ Allocation=Variant of Humongous Allocation specific to G1GC. Object bypassed young gen and went directly to old gen. Increase -XX:G1HeapRegionSize.
10+
explain.Promotion\ Failed=Survivor space or old gen could not accommodate promoted objects during young GC, causing a fallback Full GC. Increase old gen size or reduce promotion rate.
11+
explain.Concurrent\ Mode\ Failure=CMS or G1 concurrent collection could not finish before old gen filled up, triggering a stop-the-world Full GC. Increase heap or start concurrent GC earlier.
12+
13+
# JVM Flags
14+
explain.-XX\:MaxGCPauseMillis=Target maximum GC pause time (ms). G1GC tries to keep pauses below this. Default: 200ms. For latency-sensitive apps try 100ms. For throughput apps try 500ms.
15+
explain.-XX\:G1HeapRegionSize=G1 heap region size (1MB-32MB, must be power of 2). Larger regions reduce humongous allocations but may increase pause times. Default: auto-sized based on heap.
16+
explain.-XX\:+UseZGC=Enable ZGC garbage collector. Sub-millisecond pauses regardless of heap size. Best for latency-sensitive workloads with large heaps (>4GB). Requires JDK 15+.
17+
explain.-XX\:+UseG1GC=Enable G1 garbage collector (default since JDK 9). Balances throughput and latency. Best general-purpose collector for heaps 4GB-32GB.
18+
explain.-XX\:MaxMetaspaceSize=Maximum metaspace size. Without this, metaspace can grow unbounded. Recommended: set to 256m-512m to catch ClassLoader leaks early.
19+
explain.-XX\:NewRatio=Ratio of old gen to young gen. NewRatio=2 means old gen is 2x young gen. Lower values give more young gen space, reducing young GC frequency.
20+
explain.-XX\:MaxTenuringThreshold=Number of GC cycles an object must survive before promotion to old gen. Default: 15. Lower values promote faster (less survivor copying). Use argus gcnew --age-histogram to find optimal value.
21+
explain.-Xmx=Maximum heap size. The most important JVM flag. Too small = frequent GC + OOM risk. Too large = wasted memory + longer Full GC pauses. Start with 2x-4x live data set size.
22+
explain.-Xms=Initial heap size. Set equal to -Xmx for predictable performance (avoids heap resizing pauses at startup).
23+
explain.-XX\:+HeapDumpOnOutOfMemoryError=Automatically create a heap dump when OutOfMemoryError occurs. Essential for production — lets you analyze memory leaks post-mortem with argus heapanalyze.
24+
explain.-XX\:+DisableExplicitGC=Disables System.gc() calls from application code. Prevents unintended full GC pauses caused by libraries or frameworks calling System.gc() directly.
25+
explain.-XX\:ParallelGCThreads=Number of threads used during stop-the-world GC phases. Default: number of CPUs (up to 8, then scaled). Increase for machines with many cores and large heaps.
26+
explain.-XX\:ConcGCThreads=Number of threads for concurrent GC work (G1/ZGC/Shenandoah). Default: ParallelGCThreads/4. Increase if concurrent marking can't keep up with allocation rate.
27+
explain.-XX\:+UseShenandoahGC=Enable Shenandoah GC. Near-constant pause times independent of heap size. Good alternative to ZGC for large heaps. Available in OpenJDK 12+.
28+
explain.-XX\:SurvivorRatio=Ratio of Eden to each Survivor space. SurvivorRatio=8 means Eden is 8x each Survivor space. Decrease if objects are being promoted too early.
29+
30+
# Concepts
31+
explain.throughput=The percentage of time NOT spent in GC. Throughput 95% means 5% of time is GC overhead. Target: >95% for batch, >99% for latency-sensitive. Check with: argus gc <pid>
32+
explain.gc-overhead=Percentage of wall-clock time spent in GC pauses. >5% is concerning, >10% is critical. The JVM throws OutOfMemoryError if overhead exceeds 98% for sustained periods.
33+
explain.allocation-rate=How fast the application creates objects (MB/s). High allocation rate causes frequent GC. Measure with: argus gclog <file> --rates
34+
explain.promotion-rate=Rate at which objects are promoted from young gen to old gen (MB/s). High promotion rate fills old gen, leading to Full GC. Check tenuring threshold: argus gcnew <pid>
35+
explain.memory-leak=Heap-after-GC steadily increasing over time. Objects are being retained that should be garbage collected. Diagnose with: argus gclog <file> --leak-detect, then argus heapdump <pid>
36+
explain.safepoint=A point where all application threads are paused so the JVM can perform internal operations (GC, deoptimization, etc). Long time-to-safepoint delays GC start.
37+
explain.pinning=Virtual thread is pinned to its carrier thread, preventing other virtual threads from using it. Caused by synchronized blocks or native calls. Check with: argus watch <pid>
38+
explain.carrier-thread=Platform thread that carries (executes) virtual threads. ForkJoinPool manages carriers. Virtual threads are multiplexed onto carriers.
39+
explain.young-gen=The heap region where new objects are allocated. Collected frequently by Young GC (minor GC). Objects that survive enough collections are promoted to old gen.
40+
explain.old-gen=The heap region for long-lived objects promoted from young gen. Collected less frequently but pauses are longer. Full GC collects both young and old gen.
41+
explain.metaspace=Native memory area storing class metadata (replaced PermGen in Java 8+). Grows dynamically by default. Frequent Metadata GC Threshold events suggest ClassLoader leaks.
42+
explain.survivor-space=Two regions (S0, S1) between Eden and old gen. Surviving objects bounce between survivors each GC until MaxTenuringThreshold is reached. Check usage with: argus gcutil <pid>
43+
explain.live-set=Amount of heap occupied after a full GC — the minimum heap your application needs. Size -Xmx to 2x-3x your live set for healthy GC behavior.
44+
explain.stop-the-world=JVM pauses all application threads to perform a GC or internal operation. During this time, no application code runs. Minimize with low-pause collectors like ZGC.

argus-cli/src/main/resources/messages_en.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ cmd.tui.desc=Interactive terminal UI — browse and execute all commands (k9s-st
223223

224224
# Suggest
225225
cmd.suggest.desc=JVM flag optimization based on workload analysis
226+
cmd.explain.desc=Explain JVM metrics, GC causes, and flags in plain English
226227

227228
# GC Cause
228229
cmd.gccause.desc=Show GC cause with utilization stats

argus-cli/src/main/resources/messages_ja.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ cmd.watch.desc=\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u30bf\u30fc\u30df\u30ca\u30e
212212
cmd.tui.desc=\u30a4\u30f3\u30bf\u30e9\u30af\u30c6\u30a3\u30d6\u30bf\u30fc\u30df\u30ca\u30ebUI \u2014 \u5168\u30b3\u30de\u30f3\u30c9\u3092\u95b2\u89a7\u30fb\u5b9f\u884c (k9s\u30b9\u30bf\u30a4\u30eb)
213213
# Suggest
214214
cmd.suggest.desc=\u30ef\u30fc\u30af\u30ed\u30fc\u30c9\u5206\u6790\u306b\u57fa\u3065\u304fJVM\u30d5\u30e9\u30b0\u6700\u9069\u5316
215+
cmd.explain.desc=JVM\u30e1\u30c8\u30ea\u30af\u30b9\u3001GC\u539f\u56e0\u3001\u30d5\u30e9\u30b0\u3092\u3068\u308f\u304b\u308a\u3084\u3059\u304f\u8aac\u660e
215216

216217
# GC Cause
217218
cmd.gccause.desc=GC\u539F\u56E0\u3068\u4F7F\u7528\u7387\u7D71\u8A08\u3092\u8868\u793A

argus-cli/src/main/resources/messages_ko.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ cmd.watch.desc=\uc2e4\uc2dc\uac04 \ud130\ubbf8\ub110 \ub300\uc2dc\ubcf4\ub4dc (J
212212
cmd.tui.desc=\uc778\ud130\ub799\ud2f0\ube0c \ud130\ubbf8\ub110 UI \u2014 \ubaa8\ub4e0 \uba85\ub839\uc5b4 \ud0d0\uc0c9 \ubc0f \uc2e4\ud589 (k9s \uc2a4\ud0c0\uc77c)
213213
# Suggest
214214
cmd.suggest.desc=\uc6cc\ud06c\ub85c\ub4dc \ubd84\uc11d \uae30\ubc18 JVM \ud50c\ub798\uadf8 \ucd5c\uc801\ud654
215+
cmd.explain.desc=JVM \uba54\ud2b8\ub9ad, GC \uc6d0\uc778, \ud50c\ub798\uadf8\ub97c \uc27d\uac8c \uc124\uba85
215216

216217
# GC Cause
217218
cmd.gccause.desc=GC \uC6D0\uC778\uACFC \uC0AC\uC6A9\uB960 \uD1B5\uACC4 \uD45C\uC2DC

argus-cli/src/main/resources/messages_zh.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ cmd.watch.desc=\u5b9e\u65f6\u7ec8\u7aef\u4eea\u8868\u76d8 (JVM\u7248htop)
212212
cmd.tui.desc=\u4ea4\u4e92\u5f0f\u7ec8\u7aefUI \u2014 \u6d4f\u89c8\u5e76\u6267\u884c\u6240\u6709\u547d\u4ee4 (k9s\u98ce\u683c)
213213
# Suggest
214214
cmd.suggest.desc=\u57fa\u4e8e\u5de5\u4f5c\u8d1f\u8f7d\u5206\u6790\u7684JVM\u53c2\u6570\u4f18\u5316
215+
cmd.explain.desc=\u7528\u901a\u4fd7\u8bed\u8a00\u89e3\u91caJVM\u6307\u6807\u3001GC\u539f\u56e0\u548c\u53c2\u6570
215216

216217
# GC Cause
217218
cmd.gccause.desc=\u663E\u793AGC\u539F\u56E0\u548C\u5229\u7528\u7387\u7EDF\u8BA1

0 commit comments

Comments
 (0)