Skip to content

Commit dfffcd9

Browse files
committed
refactoring & benchmarking
1 parent 9dd4a90 commit dfffcd9

16 files changed

Lines changed: 5029 additions & 351 deletions

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ node_modules
33
dist
44

55
coverage
6+
build
67

78
.oldgit
89

@@ -12,4 +13,5 @@ arken.db*
1213

1314
secrets.json
1415

15-
.env
16+
.env
17+
benchmarks.jsonl

benchmark-config.ts

Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
import path from 'node:path';
2+
3+
export type CliBenchmarkPhaseName = 'boot' | 'initialized' | 'completed' | 'execution';
4+
export type CliBenchmarkScenarioKind = 'local' | 'live-remote';
5+
6+
export type CliBenchmarkTargets = Record<CliBenchmarkPhaseName, number>;
7+
8+
export type CliBenchmarkScenario = {
9+
kind: CliBenchmarkScenarioKind;
10+
name: string;
11+
description: string;
12+
args: string[];
13+
envOverrides: NodeJS.ProcessEnv;
14+
expectedExitCode: number;
15+
expectedStdoutIncludes?: string[];
16+
expectedStderrIncludes?: string[];
17+
targetsMs: CliBenchmarkTargets;
18+
tolerancePercent: number;
19+
warmupRuns: number;
20+
measuredRuns: number;
21+
};
22+
23+
export type CliBenchmarkPhaseSummary = {
24+
avgMs: number;
25+
p50Ms: number;
26+
p95Ms: number;
27+
minMs: number;
28+
maxMs: number;
29+
targetMs: number;
30+
allowedUpperMs: number;
31+
withinTolerance: boolean;
32+
};
33+
34+
export type CliBenchmarkPhaseComparison = {
35+
previousThreeAvgMs: number | null;
36+
deltaMs: number | null;
37+
deltaPercent: number | null;
38+
};
39+
40+
export type CliBenchmarkRunRecord = {
41+
version: 1;
42+
kind: 'cli-benchmark-run';
43+
scenario: string;
44+
argv: string[];
45+
pid: number;
46+
nodeVersion: string;
47+
recordedAt: string;
48+
command: string | null;
49+
interactive: boolean;
50+
summaryOnly: boolean;
51+
outcome: 'completed' | 'failed';
52+
exitCode: number;
53+
failureMessage?: string;
54+
phasesMs: {
55+
boot: number;
56+
initialized: number | null;
57+
completed: number;
58+
execution: number | null;
59+
};
60+
};
61+
62+
export type CliBenchmarkHistoryRecord = {
63+
version: 1;
64+
suite: 'cli.startup';
65+
subject: '@arken/cli';
66+
kind?: CliBenchmarkScenarioKind;
67+
capturedAt: string;
68+
scenario: string;
69+
description: string;
70+
command: string[];
71+
envOverrides: Record<string, string>;
72+
warmupRuns: number;
73+
measuredRuns: number;
74+
nodeVersion: string;
75+
cwd: string;
76+
summary: Record<CliBenchmarkPhaseName, CliBenchmarkPhaseSummary>;
77+
comparisons: Record<CliBenchmarkPhaseName, CliBenchmarkPhaseComparison>;
78+
samples: Array<{
79+
run: number;
80+
exitCode: number;
81+
bootMs: number;
82+
initializedMs: number;
83+
completedMs: number;
84+
executionMs: number | null;
85+
}>;
86+
};
87+
88+
export const CLI_BENCHMARK_HISTORY_PATH = path.join(__dirname, 'benchmarks.jsonl');
89+
90+
export const DEFAULT_UNREACHABLE_REMOTE_ENV: NodeJS.ProcessEnv = {
91+
CEREBRO_SERVICE_URI: 'ws://127.0.0.1:1',
92+
};
93+
94+
export const CLI_BENCHMARK_SCENARIOS: CliBenchmarkScenario[] = [
95+
{
96+
kind: 'local',
97+
name: 'help',
98+
description: 'Top-level help without initializing remote transport',
99+
args: ['--help'],
100+
envOverrides: DEFAULT_UNREACHABLE_REMOTE_ENV,
101+
expectedExitCode: 0,
102+
expectedStdoutIncludes: ['COMMANDS:', 'config.list', 'cerebro.info'],
103+
targetsMs: {
104+
boot: 2_000,
105+
initialized: 2_400,
106+
completed: 2_400,
107+
execution: 400,
108+
},
109+
tolerancePercent: 35,
110+
warmupRuns: 1,
111+
measuredRuns: 5,
112+
},
113+
{
114+
kind: 'local',
115+
name: 'config.list',
116+
description: 'Local config command without remote connectivity',
117+
args: ['config.list'],
118+
envOverrides: DEFAULT_UNREACHABLE_REMOTE_ENV,
119+
expectedExitCode: 0,
120+
expectedStdoutIncludes: ['Current Configuration', '"metaverse": "Arken"', '"application": "Cerebro"'],
121+
targetsMs: {
122+
boot: 2_000,
123+
initialized: 2_700,
124+
completed: 2_900,
125+
execution: 1_200,
126+
},
127+
tolerancePercent: 35,
128+
warmupRuns: 1,
129+
measuredRuns: 5,
130+
},
131+
{
132+
kind: 'local',
133+
name: 'invalid-command',
134+
description: 'Summary-only invalid command path without remote side effects',
135+
args: ['does.not.exist'],
136+
envOverrides: DEFAULT_UNREACHABLE_REMOTE_ENV,
137+
expectedExitCode: 1,
138+
expectedStdoutIncludes: ['COMMANDS:', 'config.list'],
139+
expectedStderrIncludes: ['Command not found: "does.not.exist".'],
140+
targetsMs: {
141+
boot: 2_000,
142+
initialized: 2_400,
143+
completed: 2_600,
144+
execution: 500,
145+
},
146+
tolerancePercent: 35,
147+
warmupRuns: 1,
148+
measuredRuns: 5,
149+
},
150+
{
151+
kind: 'live-remote',
152+
name: 'cerebro.info',
153+
description: 'Live Cerebro info query over the local service transport',
154+
args: ['cerebro.info'],
155+
envOverrides: {},
156+
expectedExitCode: 0,
157+
expectedStdoutIncludes: ['"name": "Cerebro"'],
158+
targetsMs: {
159+
boot: 2_000,
160+
initialized: 3_000,
161+
completed: 3_200,
162+
execution: 1_500,
163+
},
164+
tolerancePercent: 50,
165+
warmupRuns: 1,
166+
measuredRuns: 3,
167+
},
168+
{
169+
kind: 'live-remote',
170+
name: 'hal.status',
171+
description: 'Live HAL status snapshot over cerebro.exec',
172+
args: ['cerebro.exec', '--agent', 'HAL2000', '--method', 'hal2000.status.snapshot'],
173+
envOverrides: {},
174+
expectedExitCode: 0,
175+
expectedStdoutIncludes: ['"available": true', '"status":', '"captureHealth": {'],
176+
targetsMs: {
177+
boot: 2_000,
178+
initialized: 3_200,
179+
completed: 3_500,
180+
execution: 1_800,
181+
},
182+
tolerancePercent: 55,
183+
warmupRuns: 1,
184+
measuredRuns: 3,
185+
},
186+
{
187+
kind: 'live-remote',
188+
name: 'hal.telemetry',
189+
description: 'Live HAL Discord command telemetry over cerebro.exec',
190+
args: ['cerebro.exec', '--agent', 'HAL2000', '--method', 'hal2000.discordCommand.telemetry'],
191+
envOverrides: {},
192+
expectedExitCode: 0,
193+
expectedStdoutIncludes: ['"totalCount":', '"successCount":', '"commands": ['],
194+
targetsMs: {
195+
boot: 2_000,
196+
initialized: 3_200,
197+
completed: 3_500,
198+
execution: 1_800,
199+
},
200+
tolerancePercent: 55,
201+
warmupRuns: 1,
202+
measuredRuns: 3,
203+
},
204+
];
205+
206+
export function selectBenchmarkScenarios(params: {
207+
requestedNames?: string[] | null;
208+
includeLiveRemote?: boolean;
209+
}): CliBenchmarkScenario[] {
210+
const requestedNames = params.requestedNames?.filter(Boolean) ?? [];
211+
if (requestedNames.length) {
212+
const requestedNameSet = new Set(requestedNames);
213+
return CLI_BENCHMARK_SCENARIOS.filter((scenario) => requestedNameSet.has(scenario.name));
214+
}
215+
216+
if (params.includeLiveRemote) {
217+
return [...CLI_BENCHMARK_SCENARIOS];
218+
}
219+
220+
return CLI_BENCHMARK_SCENARIOS.filter((scenario) => scenario.kind === 'local');
221+
}
222+
223+
function percentile(sortedValues: number[], percentileValue: number): number {
224+
if (!sortedValues.length) {
225+
return 0;
226+
}
227+
228+
const index = Math.min(
229+
sortedValues.length - 1,
230+
Math.max(0, Math.ceil((percentileValue / 100) * sortedValues.length) - 1)
231+
);
232+
return sortedValues[index] ?? 0;
233+
}
234+
235+
export function summarizePhase(values: number[], targetMs: number, tolerancePercent: number) {
236+
const sortedValues = [...values].sort((left, right) => left - right);
237+
const totalMs = sortedValues.reduce((sum, value) => sum + value, 0);
238+
const avgMs = sortedValues.length ? totalMs / sortedValues.length : 0;
239+
const allowedUpperMs = targetMs * (1 + tolerancePercent / 100);
240+
241+
return {
242+
avgMs,
243+
p50Ms: percentile(sortedValues, 50),
244+
p95Ms: percentile(sortedValues, 95),
245+
minMs: sortedValues[0] ?? 0,
246+
maxMs: sortedValues[sortedValues.length - 1] ?? 0,
247+
targetMs,
248+
allowedUpperMs,
249+
withinTolerance: avgMs <= allowedUpperMs,
250+
};
251+
}
252+
253+
export function parseJsonLinesHistory(historyText: string): CliBenchmarkHistoryRecord[] {
254+
return historyText
255+
.split('\n')
256+
.map((line) => line.trim())
257+
.filter(Boolean)
258+
.map((line) => JSON.parse(line) as CliBenchmarkHistoryRecord);
259+
}
260+
261+
export function compareAgainstPreviousThree(
262+
history: CliBenchmarkHistoryRecord[],
263+
scenarioName: string,
264+
phaseName: CliBenchmarkPhaseName,
265+
currentAvgMs: number
266+
): CliBenchmarkPhaseComparison {
267+
const previous = history
268+
.filter((record) => record.suite === 'cli.startup' && record.scenario === scenarioName)
269+
.slice(-3);
270+
271+
if (!previous.length) {
272+
return {
273+
previousThreeAvgMs: null,
274+
deltaMs: null,
275+
deltaPercent: null,
276+
};
277+
}
278+
279+
const previousThreeAvgMs =
280+
previous.reduce((sum, record) => sum + record.summary[phaseName].avgMs, 0) / previous.length;
281+
const deltaMs = currentAvgMs - previousThreeAvgMs;
282+
const deltaPercent = previousThreeAvgMs === 0 ? 0 : (deltaMs / previousThreeAvgMs) * 100;
283+
284+
return {
285+
previousThreeAvgMs,
286+
deltaMs,
287+
deltaPercent,
288+
};
289+
}
290+
291+
export function formatDeltaPercent(deltaPercent: number | null): string {
292+
if (deltaPercent === null) {
293+
return 'n/a';
294+
}
295+
296+
const sign = deltaPercent > 0 ? '+' : '';
297+
return `${sign}${deltaPercent.toFixed(2)}%`;
298+
}
299+
300+
export function serializeEnvOverrides(envOverrides: NodeJS.ProcessEnv): Record<string, string> {
301+
return Object.fromEntries(
302+
Object.entries(envOverrides)
303+
.filter(([, value]) => value !== undefined)
304+
.map(([key, value]) => [key, String(value)])
305+
);
306+
}

0 commit comments

Comments
 (0)