Himaan1998Y · Copilot · Apr 5, 2026 · Apr 5, 2026
diff --git a/.github/workflows/measurement-validation.yml b/.github/workflows/measurement-validation.yml
@@ -0,0 +1,95 @@
+name: Measurement Validation
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v5
+
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install --frozen-lockfile
+
+      - name: TypeScript type-check
+        run: bun run check
+
+      - name: Run unit tests
+        run: bun test src/layout.test.ts
+
+      - name: Performance trends (chrome)
+        run: bun run validator:trends --browser=chrome --json > /tmp/perf-chrome.json || true
+
+      - name: Regression detection
+        id: regression
+        run: |
+          bun run validator:regression-detect --json > /tmp/regressions.json 2>&1 || true
+          cat /tmp/regressions.json
+
+      - name: Upload validation artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: measurement-validation-results
+          path: |
+            /tmp/perf-chrome.json
+            /tmp/regressions.json
+          if-no-files-found: warn
+
+      - name: Post PR summary
+        if: github.event_name == 'pull_request'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs')
+            let perfSummary = '_(no data)_'
+            let regressionSummary = '_(no data)_'
+
+            try {
+              const perf = JSON.parse(fs.readFileSync('/tmp/perf-chrome.json', 'utf-8'))
+              const degraded = (perf.metrics || []).filter(m => m.trend === 'degrading')
+              perfSummary = degraded.length === 0
+                ? '✅ All benchmarks within expected range'
+                : `⚠️ ${degraded.length} degraded benchmark(s)`
+            } catch {}
+
+            try {
+              const reg = JSON.parse(fs.readFileSync('/tmp/regressions.json', 'utf-8'))
+              const total =
+                (reg.accuracyRegressions || []).length +
+                (reg.performanceRegressions || []).length
+              regressionSummary = reg.hasBlocker
+                ? `❌ Critical regression(s) detected — ${total} issue(s)`
+                : total > 0
+                  ? `⚠️ ${total} regression(s) detected`
+                  : '✅ No regressions detected'
+            } catch {}
+
+            const body = [
+              '## 📊 Measurement Validator Results',
+              '',
+              `**Performance (Chrome):** ${perfSummary}`,
+              `**Regressions:** ${regressionSummary}`,
+              '',
+              `_Workflow run: [${context.runId}](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})_`,
+            ].join('\n')
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body,
+            })
diff --git a/.gitignore b/.gitignore
@@ -33,3 +33,11 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
 
 # Finder (MacOS) folder config
 .DS_Store
+
+# measurement-validator runtime files
+.measurement-results.db
+.measurement-results.db-shm
+.measurement-results.db-wal
+
+# npm lockfile (project uses bun.lock)
+package-lock.json
diff --git a/docs/measurement-validator/README.md b/docs/measurement-validator/README.md
@@ -0,0 +1,211 @@
+# Measurement Validator — Phase 4 Documentation
+
+## Overview
+
+The measurement-validator Phase 4 components add GitHub CI integration,
+performance tracking, regression detection, a live dashboard server, SQLite
+persistence, and Slack notifications on top of the existing accuracy and
+benchmark infrastructure.
+
+All components are built with TypeScript and Bun's built-in APIs — no
+extra runtime dependencies are needed beyond what is already in
+`package.json`.
+
+---
+
+## Components
+
+### 1. GitHub Actions Workflow
+
+**File:** `.github/workflows/measurement-validation.yml`
+
+Runs automatically on every push to `main` and on every pull request:
+
+- TypeScript type-check (`bun run check`)
+- Unit tests (`bun test src/layout.test.ts`)
+- Performance trends for Chrome
+- Regression detection across configured browsers
+- Uploads JSON artifacts (performance + regressions)
+- Posts a summary comment to open PRs
+
+### 2. Performance Tracker
+
+**File:** `src/measurement-validator/performance-tracker.ts`
+
+Loads benchmark snapshots from `benchmarks/<browser>.json`, compares each
+entry against a baseline stored in `.measurement-baseline.json`, and
+produces a `PerformanceReport`.
+
+```typescript
+import { trackPerformance, writeBaseline, formatPerformanceReport } from './performance-tracker.js'
+
+// Compare current benchmarks against baseline
+const report = await trackPerformance('chrome', { warnPct: 10, criticalPct: 25 })
+console.log(formatPerformanceReport(report))
+
+// Write a new baseline from current snapshots
+await writeBaseline(['chrome', 'safari'])
+```
+
+### 3. Regression Detector
+
+**File:** `src/measurement-validator/regression-detector.ts`
+
+Detects accuracy and performance regressions across multiple browsers.
+
+```typescript
+import { detectRegressions, formatRegressionReport } from './regression-detector.js'
+
+const report = await detectRegressions(['chrome', 'safari', 'firefox'])
+console.log(formatRegressionReport(report))
+
+if (report.hasBlocker) process.exit(1)
+```
+
+### 4. Dashboard Server
+
+**File:** `src/measurement-validator/dashboard-server.ts`
+
+An HTTP server (Bun.serve) that exposes the accuracy/benchmark/status data
+as a JSON API and serves an embedded HTML dashboard.
+
+```typescript
+import { DashboardServer } from './dashboard-server.js'
+
+const server = new DashboardServer({ port: 3001 })
+server.start()
+// http://localhost:3001 — dashboard UI
+// http://localhost:3001/api/status — status JSON
+// http://localhost:3001/api/accuracy/chrome — accuracy data
+```
+
+**API endpoints:**
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/` | HTML dashboard |
+| GET | `/health` | Liveness check |
+| GET | `/api/status` | `status/dashboard.json` |
+| GET | `/api/accuracy/:browser` | `accuracy/<browser>.json` |
+| GET | `/api/benchmarks/:browser` | `benchmarks/<browser>.json` |
+| GET | `/api/runs` | Recent validation runs (SQLite) |
+| GET | `/api/runs/summaries` | High-level trend summaries |
+| POST | `/api/runs` | Insert a new run record |
+
+### 5. Results Database
+
+**File:** `src/measurement-validator/results-database.ts`
+
+SQLite persistence via Bun's built-in `bun:sqlite`.  Stores validation run
+records with accuracy, benchmark, and regression data.
+
+```typescript
+import { ResultsDatabase } from './results-database.js'
+
+const db = new ResultsDatabase()
+
+db.insertRun({
+  runAt: new Date().toISOString(),
+  browser: 'chrome',
+  accuracyTotal: 7680,
+  accuracyMatches: 7680,
+  benchmarkJson: JSON.stringify(benchmarkReport),
+  regressionJson: JSON.stringify(regressionReport),
+  tags: 'pr:123',
+})
+
+const recent = db.queryRuns({ browser: 'chrome', limit: 20 })
+const summaries = db.querySummaries({ since: '2026-01-01T00:00:00Z' })
+db.close()
+```
+
+### 6. Slack Notifier
+
+**File:** `src/measurement-validator/slack-notifier.ts`
+
+Sends formatted Slack messages via an Incoming Webhook URL.  Reads the URL
+from `SLACK_WEBHOOK_URL` environment variable when using the factory helper.
+
+```typescript
+import { SlackNotifier, createSlackNotifierFromEnv } from './slack-notifier.js'
+
+const notifier = createSlackNotifierFromEnv() // reads SLACK_WEBHOOK_URL
+if (notifier) {
+  await notifier.notifyRegressionReport(report)
+  await notifier.notifyPerformanceReport(perfReport)
+  await notifier.notifyText('Custom message')
+}
+```
+
+---
+
+## CLI Scripts
+
+### `bun run validator:dashboard`
+
+Start the dashboard HTTP server.
+
+```
+bun run validator:dashboard [--port=3001] [--host=127.0.0.1] [--no-db]
+```
+
+### `bun run validator:trends`
+
+Print performance trend report.
+
+```
+bun run validator:trends [--browser=chrome] [--warn=10] [--critical=25] [--json]
+```
+
+### `bun run validator:watch`
+
+Watch the `accuracy/` and `benchmarks/` directories and re-run regression
+detection whenever a snapshot file changes.
+
+```
+bun run validator:watch [--browsers=chrome,safari,firefox] [--slack-webhook=<url>]
+```
+
+### `bun run validator:regression-detect`
+
+Run one-shot regression detection (used in CI).
+
+```
+bun run validator:regression-detect [--browsers=chrome] [--json] [--fail-on-critical]
+```
+
+---
+
+## Configuration
+
+### Performance Baseline
+
+Write a baseline from the current benchmark snapshots:
+
+```bash
+bun -e "import('./src/measurement-validator/performance-tracker.js').then(m => m.writeBaseline(['chrome', 'safari']))"
+```
+
+This creates `.measurement-baseline.json` which is checked into version
+control.  Commit it alongside any intentional performance changes.
+
+### Slack Webhook
+
+Set the `SLACK_WEBHOOK_URL` environment variable (e.g. in a GitHub Actions
+secret) to enable Slack notifications.  The notifier is disabled silently
+when the variable is absent.
+
+---
+
+## Data Files
+
+| File | Purpose |
+|------|---------|
+| `accuracy/chrome.json` | Chrome accuracy snapshot (baseline) |
+| `accuracy/safari.json` | Safari accuracy snapshot (baseline) |
+| `accuracy/firefox.json` | Firefox accuracy snapshot (baseline) |
+| `benchmarks/chrome.json` | Chrome benchmark snapshot |
+| `benchmarks/safari.json` | Safari benchmark snapshot |
+| `status/dashboard.json` | Aggregated status dashboard |
+| `.measurement-baseline.json` | Performance baseline (generated, commit after intentional changes) |
+| `.measurement-results.db` | SQLite results history (not committed) |
diff --git a/package.json b/package.json
@@ -29,6 +29,7 @@
     "src",
     "!src/layout.test.ts",
     "!src/test-data.ts",
+    "!src/measurement-validator",
     "pages/demos",
     "pages/assets"
   ],
@@ -68,7 +69,11 @@
     "site:build": "rm -rf site && bun run scripts/build-demo-site.ts",
     "start": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --host=$HOST:$PORT",
     "start:lan": "HOST=0.0.0.0 bun run start",
-    "start:watch": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --watch --no-clear-screen --host=$HOST:$PORT"
+    "start:watch": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --watch --no-clear-screen --host=$HOST:$PORT",
+    "validator:dashboard": "bun run scripts/validator-dashboard.ts",
+    "validator:trends": "bun run scripts/validator-trends.ts",
+    "validator:watch": "bun run scripts/validator-watch.ts",
+    "validator:regression-detect": "bun run scripts/validator-regression-detect.ts"
   },
   "devDependencies": {
     "@types/bun": "latest",

diff --git a/scripts/validator-dashboard.ts b/scripts/validator-dashboard.ts
@@ -0,0 +1,35 @@
+#!/usr/bin/env bun
+// validator-dashboard.ts — start the measurement-validator HTTP dashboard.
+//
+// Usage:
+//   bun run scripts/validator-dashboard.ts [--port=3001] [--host=127.0.0.1] [--no-db]
+//
+// Flags:
+//   --port=N        Port to listen on (default 3001)
+//   --host=H        Hostname/IP to bind (default 127.0.0.1)
+//   --no-db         Disable SQLite persistence (serve read-only data only)
+
+import { DashboardServer } from '../src/measurement-validator/dashboard-server.js'
+
+function parseFlag(name: string): string | null {
+  const prefix = `--${name}=`
+  const arg = process.argv.find(v => v.startsWith(prefix))
+  return arg !== undefined ? arg.slice(prefix.length) : null
+}
+
+function hasFlag(name: string): boolean {
+  return process.argv.includes(`--${name}`)
+}
+
+const port = Number(parseFlag('port') ?? 3001)
+const host = parseFlag('host') ?? '127.0.0.1'
+const enableDatabase = !hasFlag('no-db')
+
+const server = new DashboardServer({ port, host, enableDatabase })
+server.start()
+
+process.on('SIGINT', () => {
+  console.log('\nShutting down dashboard server…')
+  server.stop()
+  process.exit(0)
+})