|
| 1 | +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; |
| 2 | +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; |
| 3 | +import { EvolutionEngine } from "../engine.ts"; |
| 4 | +import type { SessionSummary } from "../types.ts"; |
| 5 | + |
| 6 | +const TEST_DIR = "/tmp/phantom-test-cost-cap"; |
| 7 | +const CONFIG_PATH = `${TEST_DIR}/config/evolution.yaml`; |
| 8 | + |
| 9 | +let savedApiKey: string | undefined; |
| 10 | + |
| 11 | +function setupTestEnv(costCap: number): void { |
| 12 | + mkdirSync(`${TEST_DIR}/config`, { recursive: true }); |
| 13 | + mkdirSync(`${TEST_DIR}/phantom-config/meta`, { recursive: true }); |
| 14 | + mkdirSync(`${TEST_DIR}/phantom-config/strategies`, { recursive: true }); |
| 15 | + mkdirSync(`${TEST_DIR}/phantom-config/memory`, { recursive: true }); |
| 16 | + |
| 17 | + writeFileSync( |
| 18 | + CONFIG_PATH, |
| 19 | + [ |
| 20 | + "cadence:", |
| 21 | + " reflection_interval: 1", |
| 22 | + " consolidation_interval: 10", |
| 23 | + "gates:", |
| 24 | + " drift_threshold: 0.7", |
| 25 | + " max_file_lines: 200", |
| 26 | + " auto_rollback_threshold: 0.1", |
| 27 | + " auto_rollback_window: 5", |
| 28 | + "judges:", |
| 29 | + ' enabled: "never"', |
| 30 | + ` cost_cap_usd_per_day: ${costCap}`, |
| 31 | + " max_golden_suite_size: 50", |
| 32 | + "paths:", |
| 33 | + ` config_dir: "${TEST_DIR}/phantom-config"`, |
| 34 | + ` constitution: "${TEST_DIR}/phantom-config/constitution.md"`, |
| 35 | + ` version_file: "${TEST_DIR}/phantom-config/meta/version.json"`, |
| 36 | + ` metrics_file: "${TEST_DIR}/phantom-config/meta/metrics.json"`, |
| 37 | + ` evolution_log: "${TEST_DIR}/phantom-config/meta/evolution-log.jsonl"`, |
| 38 | + ` golden_suite: "${TEST_DIR}/phantom-config/meta/golden-suite.jsonl"`, |
| 39 | + ` session_log: "${TEST_DIR}/phantom-config/memory/session-log.jsonl"`, |
| 40 | + ].join("\n"), |
| 41 | + "utf-8", |
| 42 | + ); |
| 43 | + |
| 44 | + writeFileSync(`${TEST_DIR}/phantom-config/constitution.md`, "# Constitution\n1. Be honest.\n", "utf-8"); |
| 45 | + writeFileSync(`${TEST_DIR}/phantom-config/persona.md`, "", "utf-8"); |
| 46 | + writeFileSync(`${TEST_DIR}/phantom-config/user-profile.md`, "# User Profile\n", "utf-8"); |
| 47 | + writeFileSync(`${TEST_DIR}/phantom-config/domain-knowledge.md`, "", "utf-8"); |
| 48 | + writeFileSync(`${TEST_DIR}/phantom-config/strategies/task-patterns.md`, "", "utf-8"); |
| 49 | + writeFileSync(`${TEST_DIR}/phantom-config/strategies/tool-preferences.md`, "", "utf-8"); |
| 50 | + writeFileSync(`${TEST_DIR}/phantom-config/strategies/error-recovery.md`, "", "utf-8"); |
| 51 | + writeFileSync(`${TEST_DIR}/phantom-config/memory/session-log.jsonl`, "", "utf-8"); |
| 52 | + writeFileSync( |
| 53 | + `${TEST_DIR}/phantom-config/meta/version.json`, |
| 54 | + JSON.stringify({ |
| 55 | + version: 0, |
| 56 | + parent: null, |
| 57 | + timestamp: new Date().toISOString(), |
| 58 | + changes: [], |
| 59 | + metrics_at_change: { session_count: 0, success_rate_7d: 0, correction_rate_7d: 0 }, |
| 60 | + }), |
| 61 | + "utf-8", |
| 62 | + ); |
| 63 | + writeFileSync( |
| 64 | + `${TEST_DIR}/phantom-config/meta/metrics.json`, |
| 65 | + JSON.stringify({ |
| 66 | + session_count: 0, |
| 67 | + success_count: 0, |
| 68 | + failure_count: 0, |
| 69 | + correction_count: 0, |
| 70 | + evolution_count: 0, |
| 71 | + rollback_count: 0, |
| 72 | + last_session_at: null, |
| 73 | + last_evolution_at: null, |
| 74 | + success_rate_7d: 0, |
| 75 | + correction_rate_7d: 0, |
| 76 | + sessions_since_consolidation: 0, |
| 77 | + }), |
| 78 | + "utf-8", |
| 79 | + ); |
| 80 | + writeFileSync(`${TEST_DIR}/phantom-config/meta/evolution-log.jsonl`, "", "utf-8"); |
| 81 | + writeFileSync(`${TEST_DIR}/phantom-config/meta/golden-suite.jsonl`, "", "utf-8"); |
| 82 | +} |
| 83 | + |
| 84 | +function makeSession(overrides: Partial<SessionSummary> = {}): SessionSummary { |
| 85 | + return { |
| 86 | + session_id: `session-${Date.now()}`, |
| 87 | + session_key: "cli:main", |
| 88 | + user_id: "user-1", |
| 89 | + user_messages: ["No, use TypeScript not JavaScript"], |
| 90 | + assistant_messages: ["Got it."], |
| 91 | + tools_used: [], |
| 92 | + files_tracked: [], |
| 93 | + outcome: "success", |
| 94 | + cost_usd: 0.05, |
| 95 | + started_at: "2026-03-25T10:00:00Z", |
| 96 | + ended_at: "2026-03-25T10:05:00Z", |
| 97 | + ...overrides, |
| 98 | + }; |
| 99 | +} |
| 100 | + |
| 101 | +describe("Cost Cap", () => { |
| 102 | + beforeEach(() => { |
| 103 | + savedApiKey = process.env.ANTHROPIC_API_KEY; |
| 104 | + }); |
| 105 | + |
| 106 | + afterEach(() => { |
| 107 | + if (savedApiKey !== undefined) { |
| 108 | + process.env.ANTHROPIC_API_KEY = savedApiKey; |
| 109 | + } else { |
| 110 | + process.env.ANTHROPIC_API_KEY = undefined; |
| 111 | + } |
| 112 | + rmSync(TEST_DIR, { recursive: true, force: true }); |
| 113 | + }); |
| 114 | + |
| 115 | + test("cost cap config is parsed from YAML", () => { |
| 116 | + setupTestEnv(10.0); |
| 117 | + const engine = new EvolutionEngine(CONFIG_PATH); |
| 118 | + const config = engine.getEvolutionConfig(); |
| 119 | + expect(config.judges.cost_cap_usd_per_day).toBe(10.0); |
| 120 | + }); |
| 121 | + |
| 122 | + test("cost cap defaults to 50 when not configured", () => { |
| 123 | + mkdirSync(`${TEST_DIR}/config`, { recursive: true }); |
| 124 | + mkdirSync(`${TEST_DIR}/phantom-config/meta`, { recursive: true }); |
| 125 | + mkdirSync(`${TEST_DIR}/phantom-config/strategies`, { recursive: true }); |
| 126 | + mkdirSync(`${TEST_DIR}/phantom-config/memory`, { recursive: true }); |
| 127 | + |
| 128 | + writeFileSync( |
| 129 | + CONFIG_PATH, |
| 130 | + [ |
| 131 | + "paths:", |
| 132 | + ` config_dir: "${TEST_DIR}/phantom-config"`, |
| 133 | + ` constitution: "${TEST_DIR}/phantom-config/constitution.md"`, |
| 134 | + ` version_file: "${TEST_DIR}/phantom-config/meta/version.json"`, |
| 135 | + ` metrics_file: "${TEST_DIR}/phantom-config/meta/metrics.json"`, |
| 136 | + ` evolution_log: "${TEST_DIR}/phantom-config/meta/evolution-log.jsonl"`, |
| 137 | + ` golden_suite: "${TEST_DIR}/phantom-config/meta/golden-suite.jsonl"`, |
| 138 | + ` session_log: "${TEST_DIR}/phantom-config/memory/session-log.jsonl"`, |
| 139 | + ].join("\n"), |
| 140 | + "utf-8", |
| 141 | + ); |
| 142 | + writeFileSync(`${TEST_DIR}/phantom-config/constitution.md`, "# Constitution\n", "utf-8"); |
| 143 | + writeFileSync(`${TEST_DIR}/phantom-config/persona.md`, "", "utf-8"); |
| 144 | + writeFileSync(`${TEST_DIR}/phantom-config/user-profile.md`, "", "utf-8"); |
| 145 | + writeFileSync(`${TEST_DIR}/phantom-config/domain-knowledge.md`, "", "utf-8"); |
| 146 | + writeFileSync(`${TEST_DIR}/phantom-config/strategies/task-patterns.md`, "", "utf-8"); |
| 147 | + writeFileSync(`${TEST_DIR}/phantom-config/strategies/tool-preferences.md`, "", "utf-8"); |
| 148 | + writeFileSync(`${TEST_DIR}/phantom-config/strategies/error-recovery.md`, "", "utf-8"); |
| 149 | + writeFileSync(`${TEST_DIR}/phantom-config/memory/session-log.jsonl`, "", "utf-8"); |
| 150 | + writeFileSync( |
| 151 | + `${TEST_DIR}/phantom-config/meta/version.json`, |
| 152 | + JSON.stringify({ |
| 153 | + version: 0, |
| 154 | + parent: null, |
| 155 | + timestamp: new Date().toISOString(), |
| 156 | + changes: [], |
| 157 | + metrics_at_change: { session_count: 0, success_rate_7d: 0, correction_rate_7d: 0 }, |
| 158 | + }), |
| 159 | + "utf-8", |
| 160 | + ); |
| 161 | + writeFileSync( |
| 162 | + `${TEST_DIR}/phantom-config/meta/metrics.json`, |
| 163 | + JSON.stringify({ |
| 164 | + session_count: 0, |
| 165 | + success_count: 0, |
| 166 | + failure_count: 0, |
| 167 | + correction_count: 0, |
| 168 | + evolution_count: 0, |
| 169 | + rollback_count: 0, |
| 170 | + last_session_at: null, |
| 171 | + last_evolution_at: null, |
| 172 | + success_rate_7d: 0, |
| 173 | + correction_rate_7d: 0, |
| 174 | + sessions_since_consolidation: 0, |
| 175 | + }), |
| 176 | + "utf-8", |
| 177 | + ); |
| 178 | + writeFileSync(`${TEST_DIR}/phantom-config/meta/evolution-log.jsonl`, "", "utf-8"); |
| 179 | + writeFileSync(`${TEST_DIR}/phantom-config/meta/golden-suite.jsonl`, "", "utf-8"); |
| 180 | + |
| 181 | + const engine = new EvolutionEngine(CONFIG_PATH); |
| 182 | + expect(engine.getEvolutionConfig().judges.cost_cap_usd_per_day).toBe(50.0); |
| 183 | + }); |
| 184 | + |
| 185 | + test("engine uses heuristic path when judges are disabled", async () => { |
| 186 | + setupTestEnv(50.0); |
| 187 | + const engine = new EvolutionEngine(CONFIG_PATH); |
| 188 | + |
| 189 | + // judges.enabled: "never" means heuristics |
| 190 | + expect(engine.usesLLMJudges()).toBe(false); |
| 191 | + |
| 192 | + const result = await engine.afterSession(makeSession()); |
| 193 | + // Should still work with heuristics |
| 194 | + expect(result.changes_applied.length).toBeGreaterThan(0); |
| 195 | + |
| 196 | + const userProfile = readFileSync(`${TEST_DIR}/phantom-config/user-profile.md`, "utf-8"); |
| 197 | + expect(userProfile).toContain("TypeScript"); |
| 198 | + }); |
| 199 | +}); |
0 commit comments