diff --git a/cli/selftune/orchestrate.ts b/cli/selftune/orchestrate.ts index ae2f61d..e2dab3e 100644 --- a/cli/selftune/orchestrate.ts +++ b/cli/selftune/orchestrate.ts @@ -75,6 +75,165 @@ export interface OrchestrateResult { }; } +// --------------------------------------------------------------------------- +// Human-readable decision report +// --------------------------------------------------------------------------- + +function formatSyncPhase(syncResult: SyncResult): string[] { + const lines: string[] = ["Phase 1: Sync"]; + const sources: [string, keyof SyncResult["sources"]][] = [ + ["Claude", "claude"], + ["Codex", "codex"], + ["OpenCode", "opencode"], + ["OpenClaw", "openclaw"], + ]; + + for (const [label, key] of sources) { + const s = syncResult.sources[key]; + if (!s.available) { + lines.push(` ${label.padEnd(12)}not available`); + } else if (s.synced > 0) { + lines.push(` ${label.padEnd(12)}scanned ${s.scanned}, synced ${s.synced}`); + } else { + lines.push(` ${label.padEnd(12)}scanned ${s.scanned}, up to date`); + } + } + + if (syncResult.repair.ran && syncResult.repair.repaired_records > 0) { + lines.push( + ` Repair ${syncResult.repair.repaired_records} records across ${syncResult.repair.repaired_sessions} sessions`, + ); + } + + return lines; +} + +function formatStatusPhase(statusResult: StatusResult): string[] { + const lines: string[] = ["Phase 2: Status"]; + const byStatus: Record = {}; + for (const skill of statusResult.skills) { + byStatus[skill.status] = (byStatus[skill.status] ?? 0) + 1; + } + const healthLabel = statusResult.system.healthy ? "healthy" : "UNHEALTHY"; + lines.push(` ${statusResult.skills.length} skills found, system ${healthLabel}`); + + const parts: string[] = []; + for (const s of ["CRITICAL", "WARNING", "HEALTHY", "UNGRADED", "UNKNOWN"]) { + if (byStatus[s]) parts.push(`${byStatus[s]} ${s}`); + } + if (parts.length > 0) lines.push(` ${parts.join(", ")}`); + + return lines; +} + +function formatDecisionPhase(candidates: SkillAction[]): string[] { + const lines: string[] = ["Phase 3: Skill Decisions"]; + if (candidates.length === 0) { + lines.push(" (no skills to evaluate)"); + return lines; + } + + for (const c of candidates) { + const icon = c.action === "skip" ? "\u2298" : c.action === "watch" ? "\u25cb" : "\u2192"; + const actionLabel = c.action.toUpperCase().padEnd(7); + lines.push(` ${icon} ${c.skill.padEnd(20)} ${actionLabel} ${c.reason}`); + } + + return lines; +} + +function formatEvolutionPhase(candidates: SkillAction[]): string[] { + const evolved = candidates.filter((c) => c.action === "evolve" && c.evolveResult !== undefined); + if (evolved.length === 0) return []; + + const lines: string[] = ["Phase 4: Evolution Results"]; + for (const c of evolved) { + const r = c.evolveResult; + if (!r) continue; + const status = r.deployed ? "deployed" : "not deployed"; + const detail = r.reason; + const validation = r.validation + ? ` (${(r.validation.before_pass_rate * 100).toFixed(0)}% \u2192 ${(r.validation.after_pass_rate * 100).toFixed(0)}%)` + : ""; + lines.push(` ${c.skill.padEnd(20)} ${status}${validation}`); + lines.push(` ${"".padEnd(20)} ${detail}`); + } + + return lines; +} + +function formatWatchPhase(candidates: SkillAction[]): string[] { + const watched = candidates.filter((c) => c.action === "watch"); + if (watched.length === 0) return []; + + const lines: string[] = ["Phase 5: Watch"]; + for (const c of watched) { + const snap = c.watchResult?.snapshot; + const passInfo = snap ? `pass_rate=${snap.pass_rate.toFixed(2)}` : ""; + const baseInfo = snap ? `, baseline=${snap.baseline_pass_rate.toFixed(2)}` : ""; + const alertTag = c.watchResult?.alert ? " [ALERT]" : ""; + lines.push(` ${c.skill.padEnd(20)} ${c.reason}${alertTag} (${passInfo}${baseInfo})`); + } + + return lines; +} + +export function formatOrchestrateReport(result: OrchestrateResult): string { + const sep = "\u2550".repeat(48); + const lines: string[] = []; + + lines.push(sep); + lines.push("selftune orchestrate \u2014 decision report"); + lines.push(sep); + lines.push(""); + + // Mode banner + if (result.summary.dryRun) { + lines.push("Mode: DRY RUN (pass --auto-approve to deploy)"); + } else if (result.summary.autoApprove) { + lines.push("Mode: AUTO-APPROVE (changes will be deployed)"); + } else { + lines.push("Mode: LIVE (proposals validated but not deployed)"); + } + lines.push(""); + + // Phase 1: Sync + lines.push(...formatSyncPhase(result.syncResult)); + lines.push(""); + + // Phase 2: Status + lines.push(...formatStatusPhase(result.statusResult)); + lines.push(""); + + // Phase 3: Skill decisions + lines.push(...formatDecisionPhase(result.candidates)); + lines.push(""); + + // Phase 4: Evolution results (only if any evolve ran) + const evoLines = formatEvolutionPhase(result.candidates); + if (evoLines.length > 0) { + lines.push(...evoLines); + lines.push(""); + } + + // Phase 5: Watch (only if any watched) + const watchLines = formatWatchPhase(result.candidates); + if (watchLines.length > 0) { + lines.push(...watchLines); + lines.push(""); + } + + // Final summary + lines.push("Summary"); + lines.push(` Evaluated: ${result.summary.evaluated} skills`); + lines.push(` Deployed: ${result.summary.deployed}`); + lines.push(` Watched: ${result.summary.watched}`); + lines.push(` Skipped: ${result.summary.skipped}`); + lines.push(` Elapsed: ${(result.summary.elapsedMs / 1000).toFixed(1)}s`); + + return lines.join("\n"); +} + /** Candidate selection criteria. */ const CANDIDATE_STATUSES = new Set(["CRITICAL", "WARNING", "UNGRADED"]); @@ -486,25 +645,37 @@ Examples: syncForce: values["sync-force"] ?? false, }); - // Print JSON summary to stdout - console.log(JSON.stringify(result.summary, null, 2)); - - // Print human-readable recap to stderr - console.error(`\n${"═".repeat(40)}`); - console.error("selftune orchestrate — summary"); - console.error("═".repeat(40)); - console.error(` Total skills: ${result.summary.totalSkills}`); - console.error(` Evaluated: ${result.summary.evaluated}`); - console.error(` Deployed: ${result.summary.deployed}`); - console.error(` Watched: ${result.summary.watched}`); - console.error(` Skipped: ${result.summary.skipped}`); - console.error(` Dry run: ${result.summary.dryRun}`); - console.error(` Auto-approve: ${result.summary.autoApprove}`); - console.error(` Elapsed: ${(result.summary.elapsedMs / 1000).toFixed(1)}s`); - - if (result.summary.dryRun && result.summary.evaluated > 0) { - console.error("\n Pass --auto-approve to deploy validated changes."); - } + // JSON output: include candidates for machine consumption + const jsonOutput = { + ...result.summary, + decisions: result.candidates.map((c) => ({ + skill: c.skill, + action: c.action, + reason: c.reason, + ...(c.evolveResult + ? { + deployed: c.evolveResult.deployed, + validation: c.evolveResult.validation + ? { + before: c.evolveResult.validation.before_pass_rate, + after: c.evolveResult.validation.after_pass_rate, + improved: c.evolveResult.validation.improved, + } + : null, + } + : {}), + ...(c.watchResult + ? { + alert: c.watchResult.alert, + passRate: c.watchResult.snapshot.pass_rate, + } + : {}), + })), + }; + console.log(JSON.stringify(jsonOutput, null, 2)); + + // Print human-readable decision report to stderr + console.error(`\n${formatOrchestrateReport(result)}`); process.exit(0); } diff --git a/tests/orchestrate.test.ts b/tests/orchestrate.test.ts index fd7a165..64b4fcb 100644 --- a/tests/orchestrate.test.ts +++ b/tests/orchestrate.test.ts @@ -1,7 +1,10 @@ import { describe, expect, test } from "bun:test"; + import { + formatOrchestrateReport, type OrchestrateDeps, type OrchestrateOptions, + type OrchestrateResult, orchestrate, selectCandidates, } from "../cli/selftune/orchestrate.js"; @@ -332,3 +335,188 @@ describe("orchestrate", () => { expect(candidate?.reason).toContain("no agent CLI"); }); }); + +// --------------------------------------------------------------------------- +// formatOrchestrateReport +// --------------------------------------------------------------------------- + +function makeOrchestrateResult(overrides: Partial = {}): OrchestrateResult { + const step: SyncStepResult = { available: true, scanned: 10, synced: 2, skipped: 0 }; + return { + syncResult: { + since: null, + dry_run: false, + sources: { + claude: step, + codex: { available: false, scanned: 0, synced: 0, skipped: 0 }, + opencode: { available: true, scanned: 5, synced: 0, skipped: 0 }, + openclaw: { available: false, scanned: 0, synced: 0, skipped: 0 }, + }, + repair: { ran: true, repaired_sessions: 3, repaired_records: 7, codex_repaired_records: 0 }, + timings: [], + total_elapsed_ms: 500, + }, + statusResult: makeStatusResult([ + makeSkill({ name: "Research", status: "CRITICAL", passRate: 0.35, missedQueries: 8 }), + makeSkill({ name: "Browser", status: "WARNING", passRate: 0.55, missedQueries: 3 }), + makeSkill({ name: "Content", status: "HEALTHY", passRate: 0.9, missedQueries: 0 }), + ]), + candidates: [ + { skill: "Research", action: "evolve", reason: "status=CRITICAL, passRate=35%, missed=8" }, + { skill: "Browser", action: "evolve", reason: "status=WARNING, passRate=55%, missed=3" }, + { skill: "Content", action: "skip", reason: "status=HEALTHY — no action needed" }, + ], + summary: { + totalSkills: 3, + evaluated: 2, + evolved: 0, + deployed: 0, + watched: 0, + skipped: 1, + dryRun: true, + autoApprove: false, + elapsedMs: 1200, + }, + ...overrides, + }; +} + +describe("formatOrchestrateReport", () => { + test("includes dry-run mode banner", () => { + const report = formatOrchestrateReport(makeOrchestrateResult()); + expect(report).toContain("DRY RUN"); + expect(report).toContain("--auto-approve"); + }); + + test("includes auto-approve mode banner", () => { + const report = formatOrchestrateReport( + makeOrchestrateResult({ + summary: { + ...makeOrchestrateResult().summary, + dryRun: false, + autoApprove: true, + }, + }), + ); + expect(report).toContain("AUTO-APPROVE"); + }); + + test("shows sync sources with availability", () => { + const report = formatOrchestrateReport(makeOrchestrateResult()); + expect(report).toContain("Claude"); + expect(report).toContain("synced 2"); + expect(report).toContain("Codex"); + expect(report).toContain("not available"); + expect(report).toContain("OpenCode"); + expect(report).toContain("up to date"); + }); + + test("shows repair info when records were repaired", () => { + const report = formatOrchestrateReport(makeOrchestrateResult()); + expect(report).toContain("7 records across 3 sessions"); + }); + + test("shows status breakdown by category", () => { + const report = formatOrchestrateReport(makeOrchestrateResult()); + expect(report).toContain("1 CRITICAL"); + expect(report).toContain("1 WARNING"); + expect(report).toContain("1 HEALTHY"); + }); + + test("lists each skill decision with action and reason", () => { + const report = formatOrchestrateReport(makeOrchestrateResult()); + expect(report).toContain("Research"); + expect(report).toContain("EVOLVE"); + expect(report).toContain("status=CRITICAL"); + expect(report).toContain("Content"); + expect(report).toContain("SKIP"); + expect(report).toContain("no action needed"); + }); + + test("includes evolution results when evolve ran", () => { + const result = makeOrchestrateResult({ + candidates: [ + { + skill: "Research", + action: "evolve", + reason: "status=CRITICAL", + evolveResult: { + proposal: null, + validation: { + improved: true, + before_pass_rate: 0.35, + after_pass_rate: 0.7, + net_change: 0.35, + regressions: [], + new_passes: [], + per_entry_results: [], + }, + deployed: true, + auditEntries: [], + reason: "Evolution deployed successfully", + llmCallCount: 5, + elapsedMs: 3000, + }, + }, + ], + }); + const report = formatOrchestrateReport(result); + expect(report).toContain("Evolution Results"); + expect(report).toContain("deployed"); + expect(report).toContain("35%"); + expect(report).toContain("70%"); + }); + + test("includes watch results when skills were watched", () => { + const result = makeOrchestrateResult({ + candidates: [ + { + skill: "RecentSkill", + action: "watch", + reason: "stable", + watchResult: { + snapshot: { + timestamp: new Date().toISOString(), + skill_name: "RecentSkill", + window_sessions: 20, + skill_checks: 10, + pass_rate: 0.9, + false_negative_rate: 0.1, + by_invocation_type: { + explicit: { passed: 5, total: 5 }, + implicit: { passed: 3, total: 5 }, + contextual: { passed: 0, total: 0 }, + negative: { passed: 0, total: 0 }, + }, + regression_detected: false, + baseline_pass_rate: 0.8, + }, + alert: null, + rolledBack: false, + recommendation: "stable", + }, + }, + ], + }); + const report = formatOrchestrateReport(result); + expect(report).toContain("Watch"); + expect(report).toContain("RecentSkill"); + expect(report).toContain("pass_rate=0.90"); + }); + + test("shows summary counts", () => { + const report = formatOrchestrateReport(makeOrchestrateResult()); + expect(report).toContain("Evaluated: 2 skills"); + expect(report).toContain("Skipped: 1"); + expect(report).toContain("Elapsed: 1.2s"); + }); + + test("omits evolution and watch phases when empty", () => { + const result = makeOrchestrateResult({ + candidates: [{ skill: "Content", action: "skip", reason: "status=HEALTHY" }], + }); + const report = formatOrchestrateReport(result); + expect(report).not.toContain("Evolution Results"); + expect(report).not.toContain("Phase 5: Watch"); + }); +});