diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5084e7b..4a7ed14 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,6 +20,16 @@ jobs:
- run: bunx @biomejs/biome check .
- run: bun run lint-architecture.ts
+ build-dashboard:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ steps:
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+ - uses: oven-sh/setup-bun@ecf28ddc73e819eb6fa29df6b34ef8921c743461 # v2
+ - run: bun install
+ - run: bun run build:dashboard
+
test:
runs-on: ubuntu-latest
permissions:
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 2d06499..f4f7139 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -60,6 +60,9 @@ jobs:
- name: Install dependencies
run: bun install
+ - name: Build dashboard SPA
+ run: bun run build:dashboard
+
- name: Verify npm version for trusted publishing
run: npm --version
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index c080752..af694bc 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -44,8 +44,8 @@ cli/selftune/
├── observability.ts Health checks (doctor command)
├── status.ts Skill health summary (status command)
├── last.ts Last session insight (last command)
-├── dashboard.ts HTML dashboard builder (dashboard command)
-├── dashboard-server.ts Live Bun.serve server with SSE (dashboard --serve)
+├── dashboard.ts Dashboard command entry point (SPA server launcher)
+├── dashboard-server.ts Bun.serve SPA + v2 API server
├── types.ts Shared interfaces (incl. SelftuneConfig)
├── constants.ts Log paths, config paths, known tools
├── utils/ Shared utilities (jsonl, transcript, logging, llm-call, schema-validator, trigger-check)
@@ -100,9 +100,6 @@ apps/local-dashboard/ React SPA dashboard (Vite + TypeScript + shadcn/ui)
├── vite.config.ts Dev proxy → dashboard-server, build to dist/
└── package.json React 19, Tailwind v4, shadcn/ui, recharts
-dashboard/ Legacy HTML dashboard (served at /legacy/)
-└── index.html Original embedded-JSON dashboard (v1 endpoints)
-
templates/ Settings and config templates
├── single-skill-settings.json
├── multi-skill-settings.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a821bd7..e3215e8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,7 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
- Onboarding flow: full empty-state guide for first-time users (3-step setup), dismissible welcome banner for returning users (localStorage-persisted)
- **SQLite v2 API endpoints** — `GET /api/v2/overview` and `GET /api/v2/skills/:name` backed by materialized SQLite queries (`getOverviewPayload()`, `getSkillReportPayload()`, `getSkillsList()`)
- **SQL query optimizations** — Replaced `NOT IN` subqueries with `LEFT JOIN + IS NULL`, moved JS-side dedup to SQL `GROUP BY`, added `LIMIT 200` to unbounded evidence queries
-- **SPA serving from dashboard server** — Built SPA served at `/`, legacy HTML dashboard moved to `/legacy/`
+- **SPA serving from dashboard server** — Built SPA served at `/` as the supported local dashboard experience
- **Source-truth-driven pipeline** — Transcripts and rollouts are now the authoritative source; `sync` rebuilds repaired overlays from source data rather than relying solely on hook-time capture
- **Telemetry contract package** — `@selftune/telemetry-contract` workspace package with canonical schema types, validators, versioning, metadata, and golden fixture tests
- **Test split** — `make test-fast` / `make test-slow` and `bun run test:fast` / `bun run test:slow` for faster development feedback loop
diff --git a/README.md b/README.md
index 56de090..86b84c4 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
[](https://www.npmjs.com/package/selftune?activeTab=dependencies)
[](https://bun.sh)
-Your agent skills learn how you work. Detect what's broken. Fix it automatically.
+Your agent skills learn how you work. Detect what's broken. Improve low-risk skill behavior automatically.
**[Install](#install)** · **[Use Cases](#built-for-how-you-actually-work)** · **[How It Works](#how-it-works)** · **[Commands](#commands)** · **[Platforms](#platforms)** · **[Docs](docs/integration-guide.md)**
@@ -23,7 +23,7 @@ Your agent skills learn how you work. Detect what's broken. Fix it automatically
---
-Your skills don't understand how you talk. You say "make me a slide deck" and nothing happens — no error, no log, no signal. selftune watches your real sessions, learns how you actually speak, and rewrites skill descriptions to match. Automatically.
+Your skills do not understand how you talk. You say "make me a slide deck" and nothing happens: no error, no signal, no clue why the right skill never fired. selftune reads the transcripts and telemetry your agent already saves, learns how you actually speak, and improves skill descriptions to match. It validates changes before deployment, watches for regressions after, and rolls back when needed.
Built for **Claude Code**. Also works with Codex, OpenCode, and OpenClaw. Zero runtime dependencies.
@@ -35,9 +35,28 @@ npx skills add selftune-dev/selftune
Then tell your agent: **"initialize selftune"**
-Two minutes. No API keys. No external services. No configuration ceremony. Uses your existing agent subscription. Within minutes you'll see which skills are undertriggering.
+Two minutes. No API keys. No external services. No configuration ceremony. Uses your existing agent subscription.
-**CLI only** (no skill, just the CLI):
+Quick proof path:
+
+```bash
+npx selftune@latest doctor
+npx selftune@latest sync
+npx selftune@latest status
+npx selftune@latest dashboard
+```
+
+Use `--force` only when you explicitly need to rebuild local state from scratch.
+
+Autonomy quick start:
+
+```bash
+npx selftune@latest init --enable-autonomy
+npx selftune@latest orchestrate --dry-run
+npx selftune@latest schedule --install --dry-run
+```
+
+**CLI only** (no installed skill):
```bash
npx selftune@latest doctor
@@ -68,51 +87,51 @@ combinations repeat, which ones help, and where the friction is.
-A continuous feedback loop that makes your skills learn and adapt. Automatically.
+A continuous feedback loop that makes your skills learn and adapt from real work.
-**Observe** — Hooks capture every user query and which skills fired. On Claude Code, hooks install automatically. Use `selftune replay` to backfill existing transcripts. This is how your skills start learning.
+**Observe** — selftune reads the transcripts and telemetry your agents already save. On Claude Code, hooks can add low-latency hints, but transcripts and logs are the source of truth. Use `selftune sync` to ingest current activity and `selftune replay` to backfill older Claude Code sessions.
-**Detect** — selftune finds the gap between how you talk and how your skills are described. You say "make me a slide deck" and your pptx skill stays silent — selftune catches that mismatch.
+**Detect** — selftune finds the gap between how you talk and how your skills are described. It spots missed triggers, underperforming descriptions, noisy environments, and regressions in real usage.
-**Evolve** — Rewrites skill descriptions — and full skill bodies — to match how you actually work. Batched validation with per-stage model control (`--cheap-loop` uses haiku for the loop, sonnet for the gate). Teacher-student body evolution with 3-gate validation. Baseline comparison gates on measurable lift. Automatic backup.
+**Evolve** — For low-risk changes, selftune can autonomously rewrite skill descriptions to match how you actually work. Every proposal is validated before deploy. Full skill-body or routing changes stay available for higher-touch workflows.
-**Watch** — After deploying changes, selftune monitors skill trigger rates. If anything regresses, it rolls back automatically. Your skills keep improving without you touching them.
+**Watch** — After deploying changes, selftune monitors trigger quality and post-deploy evidence. If something regresses, it can roll back automatically. The goal is autonomous improvement with safeguards, not blind self-editing.
-## What's New in v0.2.0
+## What's New in v0.2.x
-- **Full skill body evolution** — Beyond descriptions: evolve routing tables and entire skill bodies using teacher-student model with structural, trigger, and quality gates
-- **Synthetic eval generation** — `selftune evals --synthetic` generates eval sets from SKILL.md via LLM, no session logs needed. Solves cold-start: new skills get evals immediately.
-- **Cheap-loop evolution** — `selftune evolve --cheap-loop` uses haiku for proposal generation and validation, sonnet only for the final deployment gate. ~80% cost reduction.
-- **Batch trigger validation** — Validation now batches 10 queries per LLM call instead of one-per-query. ~10x faster evolution loops.
-- **Per-stage model control** — `--validation-model`, `--proposal-model`, and `--gate-model` flags give fine-grained control over which model runs each evolution stage.
-- **Auto-activation system** — Hooks detect when selftune should run and suggest actions
-- **Enforcement guardrails** — Blocks SKILL.md edits on monitored skills unless `selftune watch` has been run
-- **React SPA dashboard** — `selftune dashboard` serves a React SPA with skill health grid, per-skill drilldown, evidence viewer, evolution timeline, dark/light theming, and SQLite-backed v2 API (legacy dashboard at `/legacy/`)
-- **Evolution memory** — Persists context, plans, and decisions across context resets
-- **4 specialized agents** — Diagnosis analyst, pattern analyst, evolution reviewer, integration guide
-- **Sandbox test harness** — Comprehensive automated test coverage, including devcontainer-based LLM testing
-- **Workflow discovery + codification** — `selftune workflows` finds repeated
- multi-skill sequences from telemetry, and `selftune workflows save
- ` appends them to `## Workflows` in SKILL.md
+- **Source-truth sync** — `selftune sync` now leads the product loop, using transcripts/logs as truth and hooks as hints
+- **SQLite-backed local app** — `selftune dashboard` now serves the React SPA by default with faster overview/report routes on top of materialized local data
+- **Autonomous low-risk evolution** — description evolution is autonomous by default, with explicit review-required mode for stricter policies
+- **Autonomous scheduling** — `selftune init --enable-autonomy` and `selftune schedule --install` make the orchestrated loop the default recurring runtime
+- **Full skill body evolution** — evolve routing tables and entire skill bodies using teacher-student model with structural, trigger, and quality gates
+- **Synthetic eval generation** — `selftune evals --synthetic` generates eval sets from `SKILL.md` for cold-start skills
+- **Cheap-loop evolution** — `selftune evolve --cheap-loop` uses haiku for proposal generation and validation, sonnet only for the final deployment gate
+- **Per-stage model control** — `--validation-model`, `--proposal-model`, and `--gate-model` give fine-grained control over each evolution stage
+- **Sandbox test harness** — automated coverage, including devcontainer-based LLM testing
+- **Workflow discovery + codification** — `selftune workflows` finds repeated multi-skill sequences from telemetry and can append them to `## Workflows` in `SKILL.md`
## Commands
| Command | What it does |
|---|---|
+| `selftune doctor` | Health check: logs, config, permissions, dashboard build/runtime expectations |
+| `selftune sync` | Ingest source-truth activity from supported agents and rebuild local state |
| `selftune status` | See which skills are undertriggering and why |
+| `selftune dashboard` | Open the React SPA dashboard (SQLite-backed) |
+| `selftune orchestrate` | Run the core loop: sync, inspect candidates, evolve, and watch |
+| `selftune schedule --install` | Install platform-native scheduling for the autonomous loop |
| `selftune evals --skill ` | Generate eval sets from real session data (`--synthetic` for cold-start) |
| `selftune evolve --skill ` | Propose, validate, and deploy improved descriptions (`--cheap-loop`, `--with-baseline`) |
| `selftune evolve-body --skill ` | Evolve full skill body or routing table (teacher-student, 3-gate validation) |
+| `selftune watch --skill ` | Monitor after deploy. Auto-rollback on regression. |
+| `selftune replay` | Backfill data from existing Claude Code transcripts |
| `selftune baseline --skill ` | Measure skill value vs no-skill baseline |
| `selftune unit-test --skill ` | Run or generate skill-level unit tests |
| `selftune composability --skill ` | Measure synergy and conflicts between co-occurring skills, with workflow-candidate hints |
| `selftune workflows` | Discover repeated multi-skill workflows and save a discovered workflow into `SKILL.md` |
| `selftune import-skillsbench` | Import external eval corpus from [SkillsBench](https://github.com/benchflow-ai/skillsbench) |
| `selftune badge --skill ` | Generate skill health badge SVG |
-| `selftune watch --skill ` | Monitor after deploy. Auto-rollback on regression. |
-| `selftune dashboard` | Open the React SPA dashboard (SQLite-backed) |
-| `selftune replay` | Backfill data from existing Claude Code transcripts |
-| `selftune doctor` | Health check: logs, hooks, config, permissions |
+| `selftune cron setup` | Optional scheduler helper for OpenClaw-oriented automation |
Full command reference: `selftune --help`
@@ -141,13 +160,13 @@ Observability tools trace LLM calls. Skill authoring tools help you write skills
## Platforms
-**Claude Code** (primary) — Hooks install automatically. `selftune replay` backfills existing transcripts. Full feature support.
+**Claude Code** (primary) — Reads saved transcripts and telemetry directly. Hooks install automatically and add low-latency hints. `selftune replay` backfills older Claude Code sessions. Full feature support.
**Codex** — `selftune wrap-codex -- ` or `selftune ingest-codex`
**OpenCode** — `selftune ingest-opencode`
-**OpenClaw** — `selftune ingest-openclaw` + `selftune cron setup` for autonomous evolution
+**OpenClaw** — `selftune ingest-openclaw`. `selftune cron setup` remains available as an optional OpenClaw-oriented scheduler helper, but the main product loop is still `selftune orchestrate` plus generic scheduling.
Requires [Bun](https://bun.sh) or Node.js 18+. No extra API keys.
diff --git a/ROADMAP.md b/ROADMAP.md
index 40abd7c..d4cf915 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -16,7 +16,7 @@
- Per-skill drilldown with evidence viewer, evolution timeline
- SQLite v2 API endpoints (`/api/v2/overview`, `/api/v2/skills/:name`)
- Dark/light theme toggle with selftune branding
- - SPA served at `/`, legacy HTML dashboard at `/legacy/`
+ - SPA served at `/` as the supported local dashboard
## In Progress
- Multi-agent sandbox expansion
diff --git a/apps/local-dashboard/HANDOFF.md b/apps/local-dashboard/HANDOFF.md
index 251a32f..e5f6ae8 100644
--- a/apps/local-dashboard/HANDOFF.md
+++ b/apps/local-dashboard/HANDOFF.md
@@ -28,8 +28,14 @@ JSONL logs → materializeIncremental() → SQLite → getOverviewPayload() / ge
## How to run
```bash
+# From repo root
+bun run dev
+# → if 7888 is free, starts dashboard server on 7888 and SPA dev server on http://localhost:5199
+# → if 7888 is already in use, reuses that dashboard server and starts only the SPA dev server
+
+# Or run manually:
# Terminal 1: Start the dashboard server
-selftune dashboard --port 7888
+selftune dashboard --port 7888 --no-open
# Terminal 2: Start the SPA dev server (proxies /api to port 7888)
cd apps/local-dashboard
@@ -41,7 +47,7 @@ bunx vite
## What was rebased / changed
- **SPA types**: Rewritten to match `queries.ts` payload shapes (`OverviewResponse`, `SkillReportResponse`, `SkillSummary`, `EvidenceEntry`)
-- **API layer**: Now calls `/api/v2/overview` and `/api/v2/skills/:name` instead of `/api/data` + `/api/evaluations/:name`
+- **API layer**: Calls `/api/v2/overview` and `/api/v2/skills/:name`
- **SSE removed**: Replaced with 15s polling (SQLite reads are cheap, SSE was complex)
- **Overview page**: Uses `SkillSummary[]` from `getSkillsList()` for skill cards (pre-aggregated pass rate, check count, sessions)
- **Skill report page**: Single fetch to v2 endpoint instead of parallel overview + evaluations fetch. Shows evidence entries, evolution audit history per skill
@@ -61,13 +67,12 @@ bunx vite
## What still depends on old dashboard code
-- The old v1 endpoints (`/api/data`, `/api/events`, `/api/evaluations/:name`) still work and are used by the legacy `dashboard/index.html`
-- Badge endpoints (`/badge/:name`) and report HTML endpoints (`/report/:name`) use the old `computeStatus` + JSONL reader path
+- Badge endpoints (`/badge/:name`) and report HTML endpoints (`/report/:name`) still use the status/evidence JSONL path rather than SQLite-backed view models
- Action endpoints (`/api/actions/*`) are unchanged
## What remains before this can become default
-1. ~~**Serve built SPA from dashboard-server**~~: Done — `/` serves SPA, old dashboard at `/legacy/`
+1. ~~**Serve built SPA from dashboard-server**~~: Done — `/` serves the SPA
2. ~~**Production build**~~: Done — `bun run build:dashboard` in root package.json
3. **Regression detection**: The SQLite layer doesn't compute regression detection yet — `deriveStatus()` currently only uses pass rate + check count. Add a `regression_detected` column to skill summaries when the monitoring snapshot computation moves to SQLite.
4. **Monitoring snapshot migration**: Move `computeMonitoringSnapshot()` logic into the SQLite materializer or a query helper (window sessions, false negative rate, baseline comparison)
diff --git a/apps/local-dashboard/package.json b/apps/local-dashboard/package.json
index 06931d8..a6520ec 100644
--- a/apps/local-dashboard/package.json
+++ b/apps/local-dashboard/package.json
@@ -4,7 +4,7 @@
"version": "0.1.0",
"type": "module",
"scripts": {
- "dev": "concurrently \"cd ../.. && bun run cli/selftune/index.ts dashboard --serve --port 7888\" \"vite\"",
+ "dev": "concurrently \"cd ../.. && bun run cli/selftune/index.ts dashboard --port 7888 --no-open\" \"vite\"",
"build": "vite build",
"preview": "vite preview",
"typecheck": "tsc --noEmit"
diff --git a/apps/local-dashboard/src/types.ts b/apps/local-dashboard/src/types.ts
index ef9aae6..3f6fb9a 100644
--- a/apps/local-dashboard/src/types.ts
+++ b/apps/local-dashboard/src/types.ts
@@ -1,168 +1,22 @@
/** Data contracts for the v2 SQLite-backed dashboard API */
-// -- Shared primitives --------------------------------------------------------
-
-export interface TelemetryRecord {
- timestamp: string;
- session_id: string;
- skills_triggered: string[];
- errors_encountered: number;
- total_tool_calls: number;
-}
-
-export interface SkillUsageRecord {
- timestamp: string;
- session_id: string;
- skill_name: string;
- skill_path: string;
- query: string;
- triggered: boolean;
- source: string | null;
-}
-
-export interface EvalSnapshot {
- before_pass_rate?: number;
- after_pass_rate?: number;
- net_change?: number;
- improved?: boolean;
- regressions?: Array>;
- new_passes?: Array>;
-}
-
-export interface EvolutionEntry {
- timestamp: string;
- proposal_id: string;
- action: string;
- details: string;
- eval_snapshot?: EvalSnapshot | null;
-}
-
-export interface UnmatchedQuery {
- timestamp: string;
- session_id: string;
- query: string;
-}
-
-export interface PendingProposal {
- proposal_id: string;
- action: string;
- timestamp: string;
- details: string;
- skill_name?: string;
-}
-
-// -- /api/v2/overview response ------------------------------------------------
-
-export interface SkillSummary {
- skill_name: string;
- skill_scope: string | null;
- total_checks: number;
- triggered_count: number;
- pass_rate: number;
- unique_sessions: number;
- last_seen: string | null;
- has_evidence: boolean;
-}
-
-export interface OverviewResponse {
- overview: {
- telemetry: TelemetryRecord[];
- skills: SkillUsageRecord[];
- evolution: EvolutionEntry[];
- counts: {
- telemetry: number;
- skills: number;
- evolution: number;
- evidence: number;
- sessions: number;
- prompts: number;
- };
- unmatched_queries: UnmatchedQuery[];
- pending_proposals: PendingProposal[];
- };
- skills: SkillSummary[];
- version?: string;
-}
-
-// -- /api/v2/skills/:name response --------------------------------------------
-
-export interface EvidenceEntry {
- proposal_id: string;
- target: string;
- stage: string;
- timestamp: string;
- rationale: string | null;
- confidence: number | null;
- original_text: string | null;
- proposed_text: string | null;
- validation: Record | null;
- details: string | null;
- eval_set: Array>;
-}
-
-export interface CanonicalInvocation {
- timestamp: string;
- session_id: string;
- skill_name: string;
- invocation_mode: string | null;
- triggered: boolean;
- confidence: number | null;
- tool_name: string | null;
-}
-
-export interface PromptSample {
- prompt_text: string;
- prompt_kind: string | null;
- is_actionable: boolean;
- occurred_at: string;
- session_id: string;
-}
-
-export interface SessionMeta {
- session_id: string;
- platform: string | null;
- model: string | null;
- agent_cli: string | null;
- branch: string | null;
- workspace_path: string | null;
- started_at: string | null;
- ended_at: string | null;
- completion_status: string | null;
-}
-
-export interface SkillReportResponse {
- skill_name: string;
- usage: {
- total_checks: number;
- triggered_count: number;
- pass_rate: number;
- };
- recent_invocations: Array<{
- timestamp: string;
- session_id: string;
- query: string;
- triggered: boolean;
- source: string | null;
- }>;
- evidence: EvidenceEntry[];
- sessions_with_skill: number;
- evolution: EvolutionEntry[];
- pending_proposals: PendingProposal[];
- // Extended data
- token_usage: {
- total_input_tokens: number;
- total_output_tokens: number;
- };
- canonical_invocations: CanonicalInvocation[];
- duration_stats: {
- avg_duration_ms: number;
- total_duration_ms: number;
- execution_count: number;
- total_errors: number;
- };
- prompt_samples: PromptSample[];
- session_metadata: SessionMeta[];
-}
+export type {
+ CanonicalInvocation,
+ EvalSnapshot,
+ EvidenceEntry,
+ EvolutionEntry,
+ OverviewPayload,
+ OverviewResponse,
+ PendingProposal,
+ PromptSample,
+ SessionMeta,
+ SkillReportPayload,
+ SkillReportResponse,
+ SkillSummary,
+ SkillUsageRecord,
+ TelemetryRecord,
+ UnmatchedQuery,
+} from "../../../cli/selftune/dashboard-contract";
// -- UI types -----------------------------------------------------------------
diff --git a/cli/selftune/cron/setup.ts b/cli/selftune/cron/setup.ts
index ef9a4fb..0c91080 100644
--- a/cli/selftune/cron/setup.ts
+++ b/cli/selftune/cron/setup.ts
@@ -45,18 +45,11 @@ export const DEFAULT_CRON_JOBS: CronJobConfig[] = [
description: "Daily health check after source sync",
},
{
- name: "selftune-evolve",
- cron: "0 3 * * 0",
- message:
- "Run selftune sync, review source-truth status, and run selftune evolve --sync-first for any skills with enough negative evidence or clear undertriggering patterns. Report proposed changes and validation results.",
- description: "Weekly evolution at 3am Sunday",
- },
- {
- name: "selftune-watch",
+ name: "selftune-orchestrate",
cron: "0 */6 * * *",
message:
- "Run selftune sync first, then run selftune watch --sync-first on all recently evolved skills to detect regressions against the latest source-truth telemetry.",
- description: "Monitor regressions every 6 hours after source sync",
+ "Run selftune orchestrate --max-skills 3. This performs source-truth sync, selects candidate skills, evolves validated low-risk descriptions autonomously, and watches recent deployments for regressions.",
+ description: "Autonomous improvement loop every 6 hours",
},
];
@@ -123,7 +116,7 @@ export function loadCronJobs(jobsPath: string): CronJobConfig[] {
/** Register default cron jobs with OpenClaw. */
export async function setupCronJobs(tz: string, dryRun: boolean): Promise {
const openclawPath = Bun.which("openclaw");
- if (!openclawPath) {
+ if (!dryRun && !openclawPath) {
console.error("Error: openclaw is not installed or not in PATH.");
console.error("");
console.error("Install OpenClaw:");
diff --git a/cli/selftune/dashboard-contract.ts b/cli/selftune/dashboard-contract.ts
new file mode 100644
index 0000000..6c235b3
--- /dev/null
+++ b/cli/selftune/dashboard-contract.ts
@@ -0,0 +1,161 @@
+export interface TelemetryRecord {
+ timestamp: string;
+ session_id: string;
+ skills_triggered: string[];
+ errors_encountered: number;
+ total_tool_calls: number;
+}
+
+export interface SkillUsageRecord {
+ timestamp: string;
+ session_id: string;
+ skill_name: string;
+ skill_path: string;
+ query: string;
+ triggered: boolean;
+ source: string | null;
+}
+
+export interface EvalSnapshot {
+ before_pass_rate?: number;
+ after_pass_rate?: number;
+ net_change?: number;
+ improved?: boolean;
+ regressions?: Array>;
+ new_passes?: Array>;
+}
+
+export interface EvolutionEntry {
+ timestamp: string;
+ proposal_id: string;
+ action: string;
+ details: string;
+ eval_snapshot?: EvalSnapshot | null;
+}
+
+export interface UnmatchedQuery {
+ timestamp: string;
+ session_id: string;
+ query: string;
+}
+
+export interface PendingProposal {
+ proposal_id: string;
+ action: string;
+ timestamp: string;
+ details: string;
+ skill_name?: string;
+}
+
+export interface SkillSummary {
+ skill_name: string;
+ skill_scope: string | null;
+ total_checks: number;
+ triggered_count: number;
+ pass_rate: number;
+ unique_sessions: number;
+ last_seen: string | null;
+ has_evidence: boolean;
+}
+
+export interface OverviewPayload {
+ telemetry: TelemetryRecord[];
+ skills: SkillUsageRecord[];
+ evolution: EvolutionEntry[];
+ counts: {
+ telemetry: number;
+ skills: number;
+ evolution: number;
+ evidence: number;
+ sessions: number;
+ prompts: number;
+ };
+ unmatched_queries: UnmatchedQuery[];
+ pending_proposals: PendingProposal[];
+}
+
+export interface OverviewResponse {
+ overview: OverviewPayload;
+ skills: SkillSummary[];
+ version?: string;
+}
+
+export interface EvidenceEntry {
+ proposal_id: string;
+ target: string;
+ stage: string;
+ timestamp: string;
+ rationale: string | null;
+ confidence: number | null;
+ original_text: string | null;
+ proposed_text: string | null;
+ validation: Record | null;
+ details: string | null;
+ eval_set: Array>;
+}
+
+export interface CanonicalInvocation {
+ timestamp: string;
+ session_id: string;
+ skill_name: string;
+ invocation_mode: string | null;
+ triggered: boolean;
+ confidence: number | null;
+ tool_name: string | null;
+}
+
+export interface PromptSample {
+ prompt_text: string;
+ prompt_kind: string | null;
+ is_actionable: boolean;
+ occurred_at: string;
+ session_id: string;
+}
+
+export interface SessionMeta {
+ session_id: string;
+ platform: string | null;
+ model: string | null;
+ agent_cli: string | null;
+ branch: string | null;
+ workspace_path: string | null;
+ started_at: string | null;
+ ended_at: string | null;
+ completion_status: string | null;
+}
+
+export interface SkillReportPayload {
+ skill_name: string;
+ usage: {
+ total_checks: number;
+ triggered_count: number;
+ pass_rate: number;
+ };
+ recent_invocations: Array<{
+ timestamp: string;
+ session_id: string;
+ query: string;
+ triggered: boolean;
+ source: string | null;
+ }>;
+ evidence: EvidenceEntry[];
+ sessions_with_skill: number;
+}
+
+export interface SkillReportResponse extends SkillReportPayload {
+ evolution: EvolutionEntry[];
+ pending_proposals: PendingProposal[];
+ token_usage: {
+ total_input_tokens: number;
+ total_output_tokens: number;
+ };
+ canonical_invocations: CanonicalInvocation[];
+ duration_stats: {
+ avg_duration_ms: number;
+ total_duration_ms: number;
+ execution_count: number;
+ total_errors: number;
+ };
+ prompt_samples: PromptSample[];
+ session_metadata: SessionMeta[];
+}
diff --git a/cli/selftune/dashboard-server.ts b/cli/selftune/dashboard-server.ts
index bcbc97c..d47671c 100644
--- a/cli/selftune/dashboard-server.ts
+++ b/cli/selftune/dashboard-server.ts
@@ -1,16 +1,17 @@
/**
- * selftune dashboard server — Live Bun.serve HTTP server with SSE, data API,
- * and action endpoints for the interactive dashboard.
+ * selftune dashboard server — Bun.serve HTTP server for the SPA dashboard,
+ * skill report HTML, badges, and action endpoints.
*
* Endpoints:
- * GET / — Serve dashboard HTML shell + live mode flag
- * GET /api/data — JSON endpoint returning current telemetry data
- * GET /api/events — SSE stream sending data updates every 5 seconds
+ * GET / — Serve dashboard SPA shell
+ * GET /api/health — Dashboard server health probe
+ * GET /api/v2/overview — SQLite-backed overview payload
+ * GET /api/v2/skills/:name — SQLite-backed per-skill report
* POST /api/actions/watch — Trigger `selftune watch` for a skill
* POST /api/actions/evolve — Trigger `selftune evolve` for a skill
* POST /api/actions/rollback — Trigger `selftune rollback` for a skill
- * GET /api/v2/overview — SQLite-backed overview payload
- * GET /api/v2/skills/:name — SQLite-backed per-skill report
+ * GET /badge/:name — Skill health badge
+ * GET /report/:name — Skill health report HTML
*/
import type { Database } from "bun:sqlite";
@@ -21,7 +22,7 @@ import { findSkillBadgeData } from "./badge/badge-data.js";
import type { BadgeFormat } from "./badge/badge-svg.js";
import { formatBadgeOutput, renderBadgeSvg } from "./badge/badge-svg.js";
import { EVOLUTION_AUDIT_LOG, QUERY_LOG, TELEMETRY_LOG } from "./constants.js";
-import { getLastDeployedProposal } from "./evolution/audit.js";
+import type { OverviewResponse, SkillReportResponse } from "./dashboard-contract.js";
import { readEvidenceTrail } from "./evolution/evidence.js";
import { openDb } from "./localdb/db.js";
import { materializeIncremental } from "./localdb/materialize.js";
@@ -31,37 +32,30 @@ import {
getSkillReportPayload,
getSkillsList,
} from "./localdb/queries.js";
-import { readDecisions } from "./memory/writer.js";
-import { computeMonitoringSnapshot } from "./monitoring/watch.js";
import { doctor } from "./observability.js";
import type { StatusResult } from "./status.js";
-import { computeStatus, DEFAULT_WINDOW_SESSIONS } from "./status.js";
+import { computeStatus } from "./status.js";
import type {
EvolutionAuditEntry,
EvolutionEvidenceEntry,
QueryLogRecord,
SessionTelemetryRecord,
- SkillUsageRecord,
} from "./types.js";
import { readJsonl } from "./utils/jsonl.js";
-import {
- filterActionableQueryRecords,
- filterActionableSkillUsageRecords,
-} from "./utils/query-filter.js";
import { readEffectiveSkillUsageRecords } from "./utils/skill-log.js";
export interface DashboardServerOptions {
port?: number;
host?: string;
+ spaDir?: string;
openBrowser?: boolean;
- dataLoader?: () => DashboardData;
statusLoader?: () => StatusResult;
evidenceLoader?: () => EvolutionEvidenceEntry[];
+ overviewLoader?: () => OverviewResponse;
+ skillReportLoader?: (skillName: string) => SkillReportResponse | null;
actionRunner?: typeof runAction;
}
-const LIVE_CACHE_TTL_MS = 30_000;
-
/** Read selftune version from package.json once at startup */
let selftuneVersion = "unknown";
try {
@@ -71,60 +65,6 @@ try {
// fallback already set
}
-interface DashboardData {
- telemetry: SessionTelemetryRecord[];
- skills: SkillUsageRecord[];
- queries: QueryLogRecord[];
- evolution: EvolutionAuditEntry[];
- evidence: EvolutionEvidenceEntry[];
- decisions: import("./types.js").DecisionRecord[];
- computed: {
- snapshots: Record>;
- unmatched: Array<{ timestamp: string; session_id: string; query: string }>;
- pendingProposals: EvolutionAuditEntry[];
- };
-}
-
-interface LiveDashboardPayload {
- telemetry: Array<
- Pick<
- SessionTelemetryRecord,
- "timestamp" | "session_id" | "skills_triggered" | "errors_encountered" | "total_tool_calls"
- >
- >;
- skills: Array<
- Pick<
- SkillUsageRecord,
- "timestamp" | "session_id" | "skill_name" | "skill_path" | "query" | "triggered" | "source"
- >
- >;
- queries: Array>;
- evolution: Array>;
- evidence: Array>;
- decisions: DashboardData["decisions"];
- computed: DashboardData["computed"] & { unmatched_count: number };
- counts: {
- telemetry: number;
- skills: number;
- queries: number;
- evolution: number;
- evidence: number;
- decisions: number;
- };
-}
-
-function findViewerHTML(): string {
- const candidates = [
- join(dirname(import.meta.dir), "..", "dashboard", "index.html"),
- join(dirname(import.meta.dir), "dashboard", "index.html"),
- resolve("dashboard", "index.html"),
- ];
- for (const c of candidates) {
- if (existsSync(c)) return c;
- }
- throw new Error("Could not find dashboard/index.html. Ensure it exists in the selftune repo.");
-}
-
function findSpaDir(): string | null {
const candidates = [
join(dirname(import.meta.dir), "..", "apps", "local-dashboard", "dist"),
@@ -137,6 +77,14 @@ function findSpaDir(): string | null {
return null;
}
+function decodePathSegment(segment: string): string | null {
+ try {
+ return decodeURIComponent(segment);
+ } catch {
+ return null;
+ }
+}
+
const MIME_TYPES: Record = {
".html": "text/html; charset=utf-8",
".js": "application/javascript; charset=utf-8",
@@ -150,73 +98,6 @@ const MIME_TYPES: Record = {
".ico": "image/x-icon",
};
-function collectData(): DashboardData {
- const telemetry = readJsonl(TELEMETRY_LOG);
- const skills = filterActionableSkillUsageRecords(readEffectiveSkillUsageRecords());
- const queries = readJsonl(QUERY_LOG);
- const actionableQueries = filterActionableQueryRecords(queries);
- const evolution = readJsonl(EVOLUTION_AUDIT_LOG);
- const evidence = readEvidenceTrail();
- const decisions = readDecisions();
-
- // Compute per-skill monitoring snapshots
- const skillNames = [...new Set(skills.map((r) => r.skill_name))];
- const snapshots: Record> = {};
- for (const name of skillNames) {
- const lastDeployed = getLastDeployedProposal(name);
- const baselinePassRate = lastDeployed?.eval_snapshot?.pass_rate ?? 0.5;
- snapshots[name] = computeMonitoringSnapshot(
- name,
- telemetry,
- skills,
- actionableQueries,
- DEFAULT_WINDOW_SESSIONS,
- baselinePassRate,
- );
- }
-
- // Compute unmatched queries
- const triggeredQueries = new Set(
- skills
- .filter((r) => r.triggered && typeof r.query === "string")
- .map((r) => r.query.toLowerCase().trim()),
- );
- const unmatched = actionableQueries
- .filter((q) => !triggeredQueries.has(q.query.toLowerCase().trim()))
- .map((q) => ({
- timestamp: q.timestamp,
- session_id: q.session_id,
- query: q.query,
- }));
-
- // Compute pending proposals (reuse already-loaded evolution entries)
- const proposalStatus: Record = {};
- for (const e of evolution) {
- if (!proposalStatus[e.proposal_id]) proposalStatus[e.proposal_id] = [];
- proposalStatus[e.proposal_id].push(e.action);
- }
- const terminalActions = new Set(["deployed", "rejected", "rolled_back"]);
- const seenProposals = new Set();
- const pendingProposals = evolution.filter((e) => {
- if (e.action !== "created" && e.action !== "validated") return false;
- if (seenProposals.has(e.proposal_id)) return false;
- const actions = proposalStatus[e.proposal_id] || [];
- const isPending = !actions.some((a: string) => terminalActions.has(a));
- if (isPending) seenProposals.add(e.proposal_id);
- return isPending;
- });
-
- return {
- telemetry,
- skills,
- queries: actionableQueries,
- evolution,
- evidence,
- decisions,
- computed: { snapshots, unmatched, pendingProposals },
- };
-}
-
function computeStatusFromLogs(): StatusResult {
const telemetry = readJsonl(TELEMETRY_LOG);
const skillRecords = readEffectiveSkillUsageRecords();
@@ -226,56 +107,6 @@ function computeStatusFromLogs(): StatusResult {
return computeStatus(telemetry, skillRecords, queryRecords, auditEntries, doctorResult);
}
-function buildLivePayload(data: DashboardData): LiveDashboardPayload {
- return {
- telemetry: data.telemetry.map((record) => ({
- timestamp: record.timestamp,
- session_id: record.session_id,
- skills_triggered: record.skills_triggered,
- errors_encountered: record.errors_encountered,
- total_tool_calls: record.total_tool_calls,
- })),
- skills: data.skills.map((record) => ({
- timestamp: record.timestamp,
- session_id: record.session_id,
- skill_name: record.skill_name,
- skill_path: record.skill_path,
- query: record.query,
- triggered: record.triggered,
- source: record.source,
- })),
- queries: [],
- evolution: data.evolution.map((record) => ({
- timestamp: record.timestamp,
- proposal_id: record.proposal_id,
- action: record.action,
- details: record.details,
- })),
- evidence: [],
- decisions: data.decisions,
- computed: {
- ...data.computed,
- unmatched: data.computed.unmatched.slice(0, 500),
- unmatched_count: data.computed.unmatched.length,
- },
- counts: {
- telemetry: data.telemetry.length,
- skills: data.skills.length,
- queries: data.queries.length,
- evolution: data.evolution.length,
- evidence: data.evidence.length,
- decisions: data.decisions.length,
- },
- };
-}
-
-function buildLiveHTML(): string {
- const template = readFileSync(findViewerHTML(), "utf-8");
- const liveFlag = "";
-
- return template.replace("