Skip to content

Commit 640698b

Browse files
authored
Merge pull request #146 from rlaope/feat/argus-cluster
feat: argus cluster — multi-JVM aggregated health view (#134)
2 parents 373ac44 + fc15b56 commit 640698b

9 files changed

Lines changed: 671 additions & 0 deletions

File tree

argus-cli/src/main/java/io/argus/cli/ArgusCli.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import io.argus.cli.command.Command;
44
import io.argus.cli.command.CommandExitException;
5+
import io.argus.cli.command.ClusterCommand;
56
import io.argus.cli.command.BuffersCommand;
67
import io.argus.cli.command.CiCommand;
78
import io.argus.cli.command.ClassStatCommand;
@@ -153,6 +154,7 @@ public static void main(String[] args) {
153154

154155
// Register all commands
155156
Map<String, Command> commands = new LinkedHashMap<>();
157+
register(commands, new ClusterCommand());
156158
register(commands, new InitCommand());
157159
register(commands, new PsCommand());
158160
register(commands, new HistoCommand());
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
package io.argus.cli.cluster;
2+
3+
import java.util.List;
4+
5+
/**
6+
* Aggregates health metrics across multiple JVM instances.
7+
*/
8+
public final class ClusterHealthAggregator {
9+
10+
/** Known Prometheus metric name candidates for each dimension. */
11+
private static final String[] HEAP_METRICS = {
12+
"argus_heap_used_percent",
13+
"jvm_memory_used_bytes",
14+
"heap_used_percent"
15+
};
16+
private static final String[] GC_METRICS = {
17+
"argus_gc_overhead_percent",
18+
"jvm_gc_overhead_percent",
19+
"gc_overhead_percent"
20+
};
21+
private static final String[] CPU_METRICS = {
22+
"argus_cpu_process_percent",
23+
"jvm_cpu_usage",
24+
"process_cpu_usage"
25+
};
26+
private static final String[] VT_METRICS = {
27+
"argus_virtual_threads_active",
28+
"jvm_virtual_threads_active",
29+
"virtual_threads_active"
30+
};
31+
private static final String[] LEAK_METRICS = {
32+
"argus_memory_leak_suspected",
33+
"memory_leak_suspected"
34+
};
35+
36+
private ClusterHealthAggregator() {}
37+
38+
public record InstanceMetrics(
39+
String target,
40+
double heapPercent,
41+
double gcOverhead,
42+
double cpuPercent,
43+
boolean leakSuspected,
44+
long activeVThreads,
45+
boolean reachable
46+
) {}
47+
48+
public record AggregateStats(
49+
double heapMin, double heapMax, double heapAvg,
50+
double gcMin, double gcMax, double gcAvg,
51+
double cpuMin, double cpuMax, double cpuAvg,
52+
long vtTotal,
53+
int leakCount,
54+
String worstTarget,
55+
String worstReason
56+
) {}
57+
58+
/**
59+
* Extracts per-instance metrics from the raw Prometheus map.
60+
*/
61+
public static InstanceMetrics extract(String target, java.util.Map<String, Double> raw) {
62+
double heap = pick(raw, HEAP_METRICS, -1.0);
63+
double gc = pick(raw, GC_METRICS, -1.0);
64+
double cpu = pick(raw, CPU_METRICS, -1.0);
65+
double vt = pick(raw, VT_METRICS, 0.0);
66+
double leak = pick(raw, LEAK_METRICS, 0.0);
67+
return new InstanceMetrics(target, heap, gc, cpu, leak > 0.5, (long) vt, true);
68+
}
69+
70+
/**
71+
* Computes aggregate statistics from a list of reachable instance metrics.
72+
*/
73+
public static AggregateStats aggregate(List<InstanceMetrics> instances) {
74+
List<InstanceMetrics> up = instances.stream().filter(InstanceMetrics::reachable).toList();
75+
if (up.isEmpty()) {
76+
return new AggregateStats(-1,-1,-1,-1,-1,-1,-1,-1,-1,0,0,null,"no reachable instances");
77+
}
78+
79+
double heapMin = Double.MAX_VALUE, heapMax = -1, heapSum = 0;
80+
double gcMin = Double.MAX_VALUE, gcMax = -1, gcSum = 0;
81+
double cpuMin = Double.MAX_VALUE, cpuMax = -1, cpuSum = 0;
82+
long vtTotal = 0;
83+
int leakCount = 0;
84+
int heapCnt = 0, gcCnt = 0, cpuCnt = 0;
85+
86+
for (InstanceMetrics m : up) {
87+
if (m.heapPercent() >= 0) {
88+
heapMin = Math.min(heapMin, m.heapPercent());
89+
heapMax = Math.max(heapMax, m.heapPercent());
90+
heapSum += m.heapPercent();
91+
heapCnt++;
92+
}
93+
if (m.gcOverhead() >= 0) {
94+
gcMin = Math.min(gcMin, m.gcOverhead());
95+
gcMax = Math.max(gcMax, m.gcOverhead());
96+
gcSum += m.gcOverhead();
97+
gcCnt++;
98+
}
99+
if (m.cpuPercent() >= 0) {
100+
cpuMin = Math.min(cpuMin, m.cpuPercent());
101+
cpuMax = Math.max(cpuMax, m.cpuPercent());
102+
cpuSum += m.cpuPercent();
103+
cpuCnt++;
104+
}
105+
vtTotal += m.activeVThreads();
106+
if (m.leakSuspected()) leakCount++;
107+
}
108+
109+
// Worst instance: highest GC overhead, then heap
110+
InstanceMetrics worst = up.stream()
111+
.filter(m -> m.gcOverhead() >= 0)
112+
.max(java.util.Comparator.comparingDouble(InstanceMetrics::gcOverhead))
113+
.or(() -> up.stream()
114+
.filter(m -> m.heapPercent() >= 0)
115+
.max(java.util.Comparator.comparingDouble(InstanceMetrics::heapPercent)))
116+
.orElse(up.get(0));
117+
118+
StringBuilder reason = new StringBuilder();
119+
if (worst.gcOverhead() >= 0) {
120+
reason.append(String.format("GC overhead %.1f%%", worst.gcOverhead()));
121+
}
122+
if (worst.leakSuspected()) {
123+
if (!reason.isEmpty()) reason.append(", ");
124+
reason.append("memory leak suspected");
125+
}
126+
127+
return new AggregateStats(
128+
heapCnt > 0 ? heapMin : -1, heapCnt > 0 ? heapMax : -1, heapCnt > 0 ? heapSum / heapCnt : -1,
129+
gcCnt > 0 ? gcMin : -1, gcCnt > 0 ? gcMax : -1, gcCnt > 0 ? gcSum / gcCnt : -1,
130+
cpuCnt > 0 ? cpuMin : -1, cpuCnt > 0 ? cpuMax : -1, cpuCnt > 0 ? cpuSum / cpuCnt : -1,
131+
vtTotal, leakCount, worst.target(), reason.toString()
132+
);
133+
}
134+
135+
private static double pick(java.util.Map<String, Double> map, String[] keys, double def) {
136+
for (String key : keys) {
137+
Double v = map.get(key);
138+
if (v != null) return v;
139+
}
140+
return def;
141+
}
142+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package io.argus.cli.cluster;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
/**
7+
* Parses Prometheus exposition text format into a map of metric name to value.
8+
* Skips comment lines (#) and handles optional labels in braces.
9+
*/
10+
public final class PrometheusTextParser {
11+
12+
private PrometheusTextParser() {}
13+
14+
/**
15+
* Parses Prometheus text format.
16+
* For metrics with labels (e.g. {@code metric{label="v"} 1.0}), the base metric name is used as the key.
17+
* When multiple samples share the same base name, the last value wins.
18+
*
19+
* @param text raw Prometheus exposition text
20+
* @return map of metric name to value
21+
*/
22+
public static Map<String, Double> parse(String text) {
23+
Map<String, Double> metrics = new HashMap<>();
24+
if (text == null || text.isEmpty()) {
25+
return metrics;
26+
}
27+
for (String rawLine : text.split("\n")) {
28+
String line = rawLine.trim();
29+
if (line.isEmpty() || line.startsWith("#")) {
30+
continue;
31+
}
32+
// Split off the value (last whitespace-delimited token)
33+
// Prometheus line: metric_name{labels} value [timestamp]
34+
// or: metric_name value [timestamp]
35+
int lastSpace = line.lastIndexOf(' ');
36+
if (lastSpace < 0) continue;
37+
// Check for optional timestamp (third token)
38+
String valueToken = line.substring(lastSpace + 1).trim();
39+
String rest = line.substring(0, lastSpace).trim();
40+
41+
// If rest still has a space the value token might be a timestamp; strip it
42+
int secondSpace = rest.lastIndexOf(' ');
43+
if (secondSpace >= 0) {
44+
// rest = "metric{...} value", valueToken was timestamp — re-parse
45+
String candidate = rest.substring(secondSpace + 1).trim();
46+
if (isNumeric(candidate)) {
47+
valueToken = candidate;
48+
rest = rest.substring(0, secondSpace).trim();
49+
}
50+
}
51+
52+
if (!isNumeric(valueToken)) continue;
53+
double value;
54+
try {
55+
value = Double.parseDouble(valueToken);
56+
} catch (NumberFormatException e) {
57+
continue;
58+
}
59+
60+
// Extract base metric name (strip labels block)
61+
String metricName;
62+
int braceOpen = rest.indexOf('{');
63+
if (braceOpen >= 0) {
64+
metricName = rest.substring(0, braceOpen).trim();
65+
} else {
66+
metricName = rest.trim();
67+
}
68+
if (!metricName.isEmpty()) {
69+
metrics.put(metricName, value);
70+
}
71+
}
72+
return metrics;
73+
}
74+
75+
private static boolean isNumeric(String s) {
76+
if (s == null || s.isEmpty()) return false;
77+
try {
78+
Double.parseDouble(s);
79+
return true;
80+
} catch (NumberFormatException e) {
81+
return false;
82+
}
83+
}
84+
}

0 commit comments

Comments
 (0)