From 9c5d95a23d41d5d45f528fd492a1ac1321577355 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 5 Apr 2026 10:34:06 +0000
Subject: [PATCH 1/2] Initial plan


From 427328a874e21b694d4329d233ed595456799828 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 5 Apr 2026 10:46:42 +0000
Subject: [PATCH 2/2] Phase 4: GitHub Integration & Advanced Features
 (measurement-validator)

Agent-Logs-Url: https://github.com/Himaan1998Y/pretext/sessions/aedd6bf4-3029-4234-a28d-a885603e3d95

Co-authored-by: Himaan1998Y <210527591+Himaan1998Y@users.noreply.github.com>
---
 .github/workflows/measurement-validation.yml  |  95 +++++++
 .gitignore                                    |   8 +
 docs/measurement-validator/README.md          | 211 ++++++++++++++
 package.json                                  |   7 +-
 scripts/validator-dashboard.ts                |  35 +++
 scripts/validator-regression-detect.ts        |  84 ++++++
 scripts/validator-trends.ts                   |  39 +++
 scripts/validator-watch.ts                    |  86 ++++++
 src/measurement-validator/dashboard-server.ts | 260 ++++++++++++++++++
 .../performance-tracker.ts                    | 160 +++++++++++
 .../regression-detector.ts                    | 209 ++++++++++++++
 src/measurement-validator/results-database.ts | 155 +++++++++++
 src/measurement-validator/slack-notifier.ts   | 149 ++++++++++
 src/measurement-validator/types.ts            |  80 ++++++
 tsconfig.build.json                           |   2 +-
 15 files changed, 1578 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/measurement-validation.yml
 create mode 100644 docs/measurement-validator/README.md
 create mode 100644 scripts/validator-dashboard.ts
 create mode 100644 scripts/validator-regression-detect.ts
 create mode 100644 scripts/validator-trends.ts
 create mode 100644 scripts/validator-watch.ts
 create mode 100644 src/measurement-validator/dashboard-server.ts
 create mode 100644 src/measurement-validator/performance-tracker.ts
 create mode 100644 src/measurement-validator/regression-detector.ts
 create mode 100644 src/measurement-validator/results-database.ts
 create mode 100644 src/measurement-validator/slack-notifier.ts
 create mode 100644 src/measurement-validator/types.ts

diff --git a/.github/workflows/measurement-validation.yml b/.github/workflows/measurement-validation.yml
new file mode 100644
index 00000000..365edbc0
--- /dev/null
+++ b/.github/workflows/measurement-validation.yml
@@ -0,0 +1,95 @@
+name: Measurement Validation
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v5
+
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install --frozen-lockfile
+
+      - name: TypeScript type-check
+        run: bun run check
+
+      - name: Run unit tests
+        run: bun test src/layout.test.ts
+
+      - name: Performance trends (chrome)
+        run: bun run validator:trends --browser=chrome --json > /tmp/perf-chrome.json || true
+
+      - name: Regression detection
+        id: regression
+        run: |
+          bun run validator:regression-detect --json > /tmp/regressions.json 2>&1 || true
+          cat /tmp/regressions.json
+
+      - name: Upload validation artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: measurement-validation-results
+          path: |
+            /tmp/perf-chrome.json
+            /tmp/regressions.json
+          if-no-files-found: warn
+
+      - name: Post PR summary
+        if: github.event_name == 'pull_request'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs')
+            let perfSummary = '_(no data)_'
+            let regressionSummary = '_(no data)_'
+
+            try {
+              const perf = JSON.parse(fs.readFileSync('/tmp/perf-chrome.json', 'utf-8'))
+              const degraded = (perf.metrics || []).filter(m => m.trend === 'degrading')
+              perfSummary = degraded.length === 0
+                ? '✅ All benchmarks within expected range'
+                : `⚠️ ${degraded.length} degraded benchmark(s)`
+            } catch {}
+
+            try {
+              const reg = JSON.parse(fs.readFileSync('/tmp/regressions.json', 'utf-8'))
+              const total =
+                (reg.accuracyRegressions || []).length +
+                (reg.performanceRegressions || []).length
+              regressionSummary = reg.hasBlocker
+                ? `❌ Critical regression(s) detected — ${total} issue(s)`
+                : total > 0
+                  ? `⚠️ ${total} regression(s) detected`
+                  : '✅ No regressions detected'
+            } catch {}
+
+            const body = [
+              '## 📊 Measurement Validator Results',
+              '',
+              `**Performance (Chrome):** ${perfSummary}`,
+              `**Regressions:** ${regressionSummary}`,
+              '',
+              `_Workflow run: [${context.runId}](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})_`,
+            ].join('\n')
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body,
+            })
diff --git a/.gitignore b/.gitignore
index 7428ea11..5f58d4df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,11 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
 
 # Finder (MacOS) folder config
 .DS_Store
+
+# measurement-validator runtime files
+.measurement-results.db
+.measurement-results.db-shm
+.measurement-results.db-wal
+
+# npm lockfile (project uses bun.lock)
+package-lock.json
diff --git a/docs/measurement-validator/README.md b/docs/measurement-validator/README.md
new file mode 100644
index 00000000..f39a126b
--- /dev/null
+++ b/docs/measurement-validator/README.md
@@ -0,0 +1,211 @@
+# Measurement Validator — Phase 4 Documentation
+
+## Overview
+
+The measurement-validator Phase 4 components add GitHub CI integration,
+performance tracking, regression detection, a live dashboard server, SQLite
+persistence, and Slack notifications on top of the existing accuracy and
+benchmark infrastructure.
+
+All components are built with TypeScript and Bun's built-in APIs — no
+extra runtime dependencies are needed beyond what is already in
+`package.json`.
+
+---
+
+## Components
+
+### 1. GitHub Actions Workflow
+
+**File:** `.github/workflows/measurement-validation.yml`
+
+Runs automatically on every push to `main` and on every pull request:
+
+- TypeScript type-check (`bun run check`)
+- Unit tests (`bun test src/layout.test.ts`)
+- Performance trends for Chrome
+- Regression detection across configured browsers
+- Uploads JSON artifacts (performance + regressions)
+- Posts a summary comment to open PRs
+
+### 2. Performance Tracker
+
+**File:** `src/measurement-validator/performance-tracker.ts`
+
+Loads benchmark snapshots from `benchmarks/<browser>.json`, compares each
+entry against a baseline stored in `.measurement-baseline.json`, and
+produces a `PerformanceReport`.
+
+```typescript
+import { trackPerformance, writeBaseline, formatPerformanceReport } from './performance-tracker.js'
+
+// Compare current benchmarks against baseline
+const report = await trackPerformance('chrome', { warnPct: 10, criticalPct: 25 })
+console.log(formatPerformanceReport(report))
+
+// Write a new baseline from current snapshots
+await writeBaseline(['chrome', 'safari'])
+```
+
+### 3. Regression Detector
+
+**File:** `src/measurement-validator/regression-detector.ts`
+
+Detects accuracy and performance regressions across multiple browsers.
+
+```typescript
+import { detectRegressions, formatRegressionReport } from './regression-detector.js'
+
+const report = await detectRegressions(['chrome', 'safari', 'firefox'])
+console.log(formatRegressionReport(report))
+
+if (report.hasBlocker) process.exit(1)
+```
+
+### 4. Dashboard Server
+
+**File:** `src/measurement-validator/dashboard-server.ts`
+
+An HTTP server (Bun.serve) that exposes the accuracy/benchmark/status data
+as a JSON API and serves an embedded HTML dashboard.
+
+```typescript
+import { DashboardServer } from './dashboard-server.js'
+
+const server = new DashboardServer({ port: 3001 })
+server.start()
+// http://localhost:3001 — dashboard UI
+// http://localhost:3001/api/status — status JSON
+// http://localhost:3001/api/accuracy/chrome — accuracy data
+```
+
+**API endpoints:**
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/` | HTML dashboard |
+| GET | `/health` | Liveness check |
+| GET | `/api/status` | `status/dashboard.json` |
+| GET | `/api/accuracy/:browser` | `accuracy/<browser>.json` |
+| GET | `/api/benchmarks/:browser` | `benchmarks/<browser>.json` |
+| GET | `/api/runs` | Recent validation runs (SQLite) |
+| GET | `/api/runs/summaries` | High-level trend summaries |
+| POST | `/api/runs` | Insert a new run record |
+
+### 5. Results Database
+
+**File:** `src/measurement-validator/results-database.ts`
+
+SQLite persistence via Bun's built-in `bun:sqlite`.  Stores validation run
+records with accuracy, benchmark, and regression data.
+
+```typescript
+import { ResultsDatabase } from './results-database.js'
+
+const db = new ResultsDatabase()
+
+db.insertRun({
+  runAt: new Date().toISOString(),
+  browser: 'chrome',
+  accuracyTotal: 7680,
+  accuracyMatches: 7680,
+  benchmarkJson: JSON.stringify(benchmarkReport),
+  regressionJson: JSON.stringify(regressionReport),
+  tags: 'pr:123',
+})
+
+const recent = db.queryRuns({ browser: 'chrome', limit: 20 })
+const summaries = db.querySummaries({ since: '2026-01-01T00:00:00Z' })
+db.close()
+```
+
+### 6. Slack Notifier
+
+**File:** `src/measurement-validator/slack-notifier.ts`
+
+Sends formatted Slack messages via an Incoming Webhook URL.  Reads the URL
+from `SLACK_WEBHOOK_URL` environment variable when using the factory helper.
+
+```typescript
+import { SlackNotifier, createSlackNotifierFromEnv } from './slack-notifier.js'
+
+const notifier = createSlackNotifierFromEnv() // reads SLACK_WEBHOOK_URL
+if (notifier) {
+  await notifier.notifyRegressionReport(report)
+  await notifier.notifyPerformanceReport(perfReport)
+  await notifier.notifyText('Custom message')
+}
+```
+
+---
+
+## CLI Scripts
+
+### `bun run validator:dashboard`
+
+Start the dashboard HTTP server.
+
+```
+bun run validator:dashboard [--port=3001] [--host=127.0.0.1] [--no-db]
+```
+
+### `bun run validator:trends`
+
+Print performance trend report.
+
+```
+bun run validator:trends [--browser=chrome] [--warn=10] [--critical=25] [--json]
+```
+
+### `bun run validator:watch`
+
+Watch the `accuracy/` and `benchmarks/` directories and re-run regression
+detection whenever a snapshot file changes.
+
+```
+bun run validator:watch [--browsers=chrome,safari,firefox] [--slack-webhook=<url>]
+```
+
+### `bun run validator:regression-detect`
+
+Run one-shot regression detection (used in CI).
+
+```
+bun run validator:regression-detect [--browsers=chrome] [--json] [--fail-on-critical]
+```
+
+---
+
+## Configuration
+
+### Performance Baseline
+
+Write a baseline from the current benchmark snapshots:
+
+```bash
+bun -e "import('./src/measurement-validator/performance-tracker.js').then(m => m.writeBaseline(['chrome', 'safari']))"
+```
+
+This creates `.measurement-baseline.json` which is checked into version
+control.  Commit it alongside any intentional performance changes.
+
+### Slack Webhook
+
+Set the `SLACK_WEBHOOK_URL` environment variable (e.g. in a GitHub Actions
+secret) to enable Slack notifications.  The notifier is disabled silently
+when the variable is absent.
+
+---
+
+## Data Files
+
+| File | Purpose |
+|------|---------|
+| `accuracy/chrome.json` | Chrome accuracy snapshot (baseline) |
+| `accuracy/safari.json` | Safari accuracy snapshot (baseline) |
+| `accuracy/firefox.json` | Firefox accuracy snapshot (baseline) |
+| `benchmarks/chrome.json` | Chrome benchmark snapshot |
+| `benchmarks/safari.json` | Safari benchmark snapshot |
+| `status/dashboard.json` | Aggregated status dashboard |
+| `.measurement-baseline.json` | Performance baseline (generated, commit after intentional changes) |
+| `.measurement-results.db` | SQLite results history (not committed) |
diff --git a/package.json b/package.json
index 0b28a0e4..57a780dc 100644
--- a/package.json
+++ b/package.json
@@ -29,6 +29,7 @@
     "src",
     "!src/layout.test.ts",
     "!src/test-data.ts",
+    "!src/measurement-validator",
     "pages/demos",
     "pages/assets"
   ],
@@ -68,7 +69,11 @@
     "site:build": "rm -rf site && bun run scripts/build-demo-site.ts",
     "start": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --host=$HOST:$PORT",
     "start:lan": "HOST=0.0.0.0 bun run start",
-    "start:watch": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --watch --no-clear-screen --host=$HOST:$PORT"
+    "start:watch": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --watch --no-clear-screen --host=$HOST:$PORT",
+    "validator:dashboard": "bun run scripts/validator-dashboard.ts",
+    "validator:trends": "bun run scripts/validator-trends.ts",
+    "validator:watch": "bun run scripts/validator-watch.ts",
+    "validator:regression-detect": "bun run scripts/validator-regression-detect.ts"
   },
   "devDependencies": {
     "@types/bun": "latest",
diff --git a/scripts/validator-dashboard.ts b/scripts/validator-dashboard.ts
new file mode 100644
index 00000000..3462d677
--- /dev/null
+++ b/scripts/validator-dashboard.ts
@@ -0,0 +1,35 @@
+#!/usr/bin/env bun
+// validator-dashboard.ts — start the measurement-validator HTTP dashboard.
+//
+// Usage:
+//   bun run scripts/validator-dashboard.ts [--port=3001] [--host=127.0.0.1] [--no-db]
+//
+// Flags:
+//   --port=N        Port to listen on (default 3001)
+//   --host=H        Hostname/IP to bind (default 127.0.0.1)
+//   --no-db         Disable SQLite persistence (serve read-only data only)
+
+import { DashboardServer } from '../src/measurement-validator/dashboard-server.js'
+
+function parseFlag(name: string): string | null {
+  const prefix = `--${name}=`
+  const arg = process.argv.find(v => v.startsWith(prefix))
+  return arg !== undefined ? arg.slice(prefix.length) : null
+}
+
+function hasFlag(name: string): boolean {
+  return process.argv.includes(`--${name}`)
+}
+
+const port = Number(parseFlag('port') ?? 3001)
+const host = parseFlag('host') ?? '127.0.0.1'
+const enableDatabase = !hasFlag('no-db')
+
+const server = new DashboardServer({ port, host, enableDatabase })
+server.start()
+
+process.on('SIGINT', () => {
+  console.log('\nShutting down dashboard server…')
+  server.stop()
+  process.exit(0)
+})
diff --git a/scripts/validator-regression-detect.ts b/scripts/validator-regression-detect.ts
new file mode 100644
index 00000000..0d08862e
--- /dev/null
+++ b/scripts/validator-regression-detect.ts
@@ -0,0 +1,84 @@
+#!/usr/bin/env bun
+// validator-regression-detect.ts — run regression detection and report results.
+//
+// Usage:
+//   bun run scripts/validator-regression-detect.ts [--browsers=chrome,safari,firefox]
+//                                                  [--json]
+//                                                  [--slack-webhook=<url>]
+//                                                  [--fail-on-critical]
+//
+// Flags:
+//   --browsers=B        Comma-separated browser list (default: chrome)
+//   --warn=N            Perf warning threshold in % (default 10)
+//   --critical=N        Perf critical threshold in % (default 25)
+//   --json              Emit JSON output instead of human-readable text
+//   --slack-webhook=URL Send Slack notification
+//   --fail-on-critical  Exit with code 1 when critical regressions are found
+
+import {
+  detectRegressions,
+  formatRegressionReport,
+} from '../src/measurement-validator/regression-detector.js'
+import {
+  createSlackNotifierFromEnv,
+  SlackNotifier,
+} from '../src/measurement-validator/slack-notifier.js'
+import { ResultsDatabase } from '../src/measurement-validator/results-database.js'
+import type { BrowserName } from '../src/measurement-validator/types.js'
+
+function parseFlag(name: string): string | null {
+  const prefix = `--${name}=`
+  const arg = process.argv.find(v => v.startsWith(prefix))
+  return arg !== undefined ? arg.slice(prefix.length) : null
+}
+
+const browsersArg = parseFlag('browsers') ?? 'chrome'
+const browsers = browsersArg.split(',').map(b => b.trim()) as BrowserName[]
+const warnPct = Number(parseFlag('warn') ?? 10)
+const criticalPct = Number(parseFlag('critical') ?? 25)
+const emitJson = process.argv.includes('--json')
+const failOnCritical = process.argv.includes('--fail-on-critical')
+const slackUrl = parseFlag('slack-webhook')
+
+const report = await detectRegressions(browsers, { perfWarnPct: warnPct, perfCriticalPct: criticalPct })
+
+if (emitJson) {
+  console.log(JSON.stringify(report, null, 2))
+} else {
+  console.log(formatRegressionReport(report))
+}
+
+// Persist to SQLite if database is available
+try {
+  const db = new ResultsDatabase()
+  for (const browser of browsers) {
+    db.insertRun({
+      runAt: report.generatedAt,
+      browser,
+      accuracyTotal: 0,
+      accuracyMatches: 0,
+      benchmarkJson: '{}',
+      regressionJson: JSON.stringify(report),
+      tags: `browser:${browser}`,
+    })
+  }
+  db.close()
+} catch {
+  // Non-fatal — DB may not be set up in all environments.
+}
+
+// Send Slack notification if configured.
+const notifier: SlackNotifier | null =
+  slackUrl != null ? new SlackNotifier(slackUrl) : createSlackNotifierFromEnv()
+
+if (notifier != null) {
+  try {
+    await notifier.notifyRegressionReport(report)
+  } catch (err) {
+    console.error('Slack notification failed:', err)
+  }
+}
+
+if (failOnCritical && report.hasBlocker) {
+  process.exit(1)
+}
diff --git a/scripts/validator-trends.ts b/scripts/validator-trends.ts
new file mode 100644
index 00000000..b7b673f4
--- /dev/null
+++ b/scripts/validator-trends.ts
@@ -0,0 +1,39 @@
+#!/usr/bin/env bun
+// validator-trends.ts — print performance trends from the benchmark snapshots.
+//
+// Usage:
+//   bun run scripts/validator-trends.ts [--browser=chrome] [--warn=10] [--critical=25]
+//
+// Flags:
+//   --browser=B     chrome | safari | firefox (default: chrome)
+//   --warn=N        Percent degradation threshold for warnings (default 10)
+//   --critical=N    Percent degradation threshold for critical flags (default 25)
+//   --json          Emit JSON instead of human-readable text
+
+import {
+  formatPerformanceReport,
+  trackPerformance,
+} from '../src/measurement-validator/performance-tracker.js'
+import type { BrowserName } from '../src/measurement-validator/types.js'
+
+function parseFlag(name: string): string | null {
+  const prefix = `--${name}=`
+  const arg = process.argv.find(v => v.startsWith(prefix))
+  return arg !== undefined ? arg.slice(prefix.length) : null
+}
+
+const browser = (parseFlag('browser') ?? 'chrome') as BrowserName
+const warnPct = Number(parseFlag('warn') ?? 10)
+const criticalPct = Number(parseFlag('critical') ?? 25)
+const emitJson = process.argv.includes('--json')
+
+const report = await trackPerformance(browser, { warnPct, criticalPct })
+
+if (emitJson) {
+  console.log(JSON.stringify(report, null, 2))
+} else {
+  console.log(formatPerformanceReport(report))
+  if (report.regressionCount > 0) {
+    process.exit(1)
+  }
+}
diff --git a/scripts/validator-watch.ts b/scripts/validator-watch.ts
new file mode 100644
index 00000000..e79145db
--- /dev/null
+++ b/scripts/validator-watch.ts
@@ -0,0 +1,86 @@
+#!/usr/bin/env bun
+// validator-watch.ts — watch benchmark/accuracy snapshot files and re-run
+// regression detection whenever a file changes.
+//
+// Usage:
+//   bun run scripts/validator-watch.ts [--browsers=chrome,safari,firefox]
+//                                      [--slack-webhook=<url>]
+//
+// Flags:
+//   --browsers=B    Comma-separated list of browsers to watch (default: chrome)
+//   --slack-webhook=URL  Send notifications via Slack when regressions are found
+
+import { watch } from 'node:fs'
+import { join } from 'node:path'
+import {
+  detectRegressions,
+  formatRegressionReport,
+} from '../src/measurement-validator/regression-detector.js'
+import {
+  createSlackNotifierFromEnv,
+  SlackNotifier,
+} from '../src/measurement-validator/slack-notifier.js'
+import type { BrowserName } from '../src/measurement-validator/types.js'
+
+function parseFlag(name: string): string | null {
+  const prefix = `--${name}=`
+  const arg = process.argv.find(v => v.startsWith(prefix))
+  return arg !== undefined ? arg.slice(prefix.length) : null
+}
+
+const browsersArg = parseFlag('browsers') ?? 'chrome'
+const browsers = browsersArg.split(',').map(b => b.trim()) as BrowserName[]
+const slackUrl = parseFlag('slack-webhook')
+const notifier: SlackNotifier | null =
+  slackUrl != null ? new SlackNotifier(slackUrl) : createSlackNotifierFromEnv()
+
+const repoRoot = join(import.meta.dir, '..')
+const watchPaths = [
+  join(repoRoot, 'accuracy'),
+  join(repoRoot, 'benchmarks'),
+]
+
+let debounceTimer: ReturnType<typeof setTimeout> | null = null
+
+async function runCheck(): Promise<void> {
+  console.log(`[${new Date().toISOString()}] Running regression check for: ${browsers.join(', ')}`)
+  const report = await detectRegressions(browsers)
+  const text = formatRegressionReport(report)
+  console.log(text)
+
+  if (notifier != null && (report.hasBlocker || report.performanceRegressions.length > 0)) {
+    try {
+      await notifier.notifyRegressionReport(report)
+      console.log('Slack notification sent.')
+    } catch (err) {
+      console.error('Failed to send Slack notification:', err)
+    }
+  }
+}
+
+function scheduleCheck(): void {
+  if (debounceTimer != null) clearTimeout(debounceTimer)
+  debounceTimer = setTimeout(() => {
+    runCheck().catch(err => console.error('Regression check failed:', err))
+  }, 500)
+}
+
+// Run once immediately on start.
+await runCheck()
+
+// Watch the accuracy and benchmarks directories for changes.
+for (const watchPath of watchPaths) {
+  try {
+    watch(watchPath, { recursive: false }, (_event, filename) => {
+      if (filename?.endsWith('.json')) {
+        console.log(`[watch] Changed: ${watchPath}/${filename}`)
+        scheduleCheck()
+      }
+    })
+    console.log(`Watching ${watchPath}`)
+  } catch {
+    // Directory may not exist — silently skip.
+  }
+}
+
+console.log('Press Ctrl+C to stop.')
diff --git a/src/measurement-validator/dashboard-server.ts b/src/measurement-validator/dashboard-server.ts
new file mode 100644
index 00000000..046439cc
--- /dev/null
+++ b/src/measurement-validator/dashboard-server.ts
@@ -0,0 +1,260 @@
+// Dashboard HTTP server for the measurement-validator.
+//
+// Serves a JSON API over the checked-in accuracy/benchmark/status data and an
+// optional SQLite results history.  Built on Bun.serve() — no external HTTP
+// framework required.
+//
+// API endpoints:
+//   GET /api/status          — status/dashboard.json
+//   GET /api/accuracy/:browser  — accuracy/<browser>.json
+//   GET /api/benchmarks/:browser — benchmarks/<browser>.json
+//   GET /api/runs            — recent validation runs from SQLite (if DB enabled)
+//   GET /api/runs/summaries  — high-level trend summaries
+//   POST /api/runs           — insert a new run record
+//   GET /health              — liveness check
+//   GET /                    — embedded dashboard HTML
+//
+// Usage:
+//   import { DashboardServer } from './dashboard-server.js'
+//   const server = new DashboardServer({ port: 3001 })
+//   server.start()
+
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import { ResultsDatabase } from './results-database.js'
+import type { QueryOptions } from './results-database.js'
+import type { BrowserName, ValidationRunRecord } from './types.js'
+
+export type DashboardServerOptions = {
+  port?: number
+  host?: string
+  /** Enable the SQLite results database. Defaults to true. */
+  enableDatabase?: boolean
+  /** Path to the SQLite file. Defaults to .measurement-results.db in repoRoot. */
+  dbPath?: string
+  /** Repository root for resolving data files. */
+  repoRoot?: string
+}
+
+const BROWSERS: BrowserName[] = ['chrome', 'safari', 'firefox']
+
+function jsonResponse(data: unknown, status = 200): Response {
+  return new Response(JSON.stringify(data, null, 2), {
+    status,
+    headers: { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' },
+  })
+}
+
+function notFound(message: string): Response {
+  return jsonResponse({ error: message }, 404)
+}
+
+function loadJsonFile(path: string): unknown {
+  try {
+    return JSON.parse(readFileSync(path, 'utf-8'))
+  } catch {
+    return null
+  }
+}
+
+function buildDashboardHtml(repoRoot: string): string {
+  const statusPath = join(repoRoot, 'status', 'dashboard.json')
+  const status = loadJsonFile(statusPath)
+
+  const browsers = BROWSERS.map(b => {
+    const acc = loadJsonFile(join(repoRoot, 'accuracy', `${b}.json`)) as
+      | { total?: number; matchCount?: number }
+      | null
+    return {
+      name: b,
+      total: acc?.total ?? 0,
+      matches: acc?.matchCount ?? 0,
+    }
+  })
+
+  const tableRows = browsers
+    .map(
+      b =>
+        `<tr>
+           <td>${b.name}</td>
+           <td>${b.matches}</td>
+           <td>${b.total}</td>
+           <td>${b.total > 0 ? ((b.matches / b.total) * 100).toFixed(2) : 'n/a'}%</td>
+         </tr>`,
+    )
+    .join('\n')
+
+  return `<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Measurement Validator Dashboard</title>
+  <style>
+    body { font-family: system-ui, sans-serif; margin: 2rem; color: #222; background: #f9f9f9; }
+    h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
+    h2 { font-size: 1.1rem; margin-top: 2rem; margin-bottom: 0.5rem; color: #444; }
+    table { border-collapse: collapse; min-width: 500px; background: #fff; box-shadow: 0 1px 3px #0001; }
+    th, td { border: 1px solid #ddd; padding: 0.5rem 1rem; text-align: left; }
+    th { background: #f0f0f0; }
+    pre { background: #fff; padding: 1rem; border: 1px solid #ddd; overflow-x: auto; font-size: 0.85rem; }
+    .ok { color: #2a7a2a; } .warn { color: #b26a00; } .err { color: #b00; }
+    footer { margin-top: 2rem; font-size: 0.8rem; color: #999; }
+  </style>
+</head>
+<body>
+  <h1>📊 Measurement Validator Dashboard</h1>
+
+  <h2>Browser Accuracy</h2>
+  <table>
+    <thead><tr><th>Browser</th><th>Matches</th><th>Total</th><th>Accuracy</th></tr></thead>
+    <tbody>${tableRows}</tbody>
+  </table>
+
+  <h2>Status Dashboard JSON</h2>
+  <pre id="status-json">${JSON.stringify(status, null, 2)}</pre>
+
+  <h2>Recent Validation Runs</h2>
+  <div id="runs-table"><em>Loading…</em></div>
+
+  <footer>Auto-refreshes every 30 s · <a href="/api/status">Raw JSON</a></footer>
+
+  <script>
+    async function loadRuns() {
+      try {
+        const res = await fetch('/api/runs/summaries?limit=20')
+        const data = await res.json()
+        if (!Array.isArray(data) || data.length === 0) {
+          document.getElementById('runs-table').textContent = 'No runs stored yet.'
+          return
+        }
+        const rows = data.map(r =>
+          '<tr><td>' + r.runAt + '</td><td>' + r.browser +
+          '</td><td>' + r.accuracyPct.toFixed(2) + '%</td><td>' + r.regressionCount + '</td></tr>'
+        ).join('')
+        document.getElementById('runs-table').innerHTML =
+          '<table><thead><tr><th>Run At</th><th>Browser</th><th>Accuracy</th><th>Regressions</th></tr></thead><tbody>' + rows + '</tbody></table>'
+      } catch (e) {
+        document.getElementById('runs-table').textContent = 'Error loading runs: ' + e.message
+      }
+    }
+    loadRuns()
+    setInterval(loadRuns, 30000)
+  </script>
+</body>
+</html>`
+}
+
+export class DashboardServer {
+  private options: Required<DashboardServerOptions>
+  private db: ResultsDatabase | null = null
+  private server: ReturnType<typeof Bun.serve> | null = null
+
+  constructor(options: DashboardServerOptions = {}) {
+    this.options = {
+      port: options.port ?? 3001,
+      host: options.host ?? '127.0.0.1',
+      enableDatabase: options.enableDatabase ?? true,
+      dbPath: options.dbPath ?? '',
+      repoRoot: options.repoRoot ?? join(import.meta.dir, '..', '..'),
+    }
+  }
+
+  start(): void {
+    if (this.options.enableDatabase) {
+      this.db = new ResultsDatabase(
+        this.options.dbPath !== '' ? this.options.dbPath : undefined,
+      )
+    }
+
+    this.server = Bun.serve({
+      port: this.options.port,
+      hostname: this.options.host,
+      fetch: (req: Request): Response | Promise<Response> => this.handleRequest(req),
+    })
+
+    console.log(
+      `Dashboard server running at http://${this.options.host}:${this.options.port}`,
+    )
+  }
+
+  stop(): void {
+    void this.server?.stop()
+    this.db?.close()
+  }
+
+  private handleRequest(req: Request): Response | Promise<Response> {
+    const url = new URL(req.url)
+    const { pathname } = url
+    const { repoRoot } = this.options
+
+    if (req.method === 'GET') {
+      if (pathname === '/' || pathname === '/dashboard') {
+        return new Response(buildDashboardHtml(repoRoot), {
+          headers: { 'Content-Type': 'text/html' },
+        })
+      }
+      if (pathname === '/health') {
+        return jsonResponse({ status: 'ok', ts: new Date().toISOString() })
+      }
+      if (pathname === '/api/status') {
+        const data = loadJsonFile(join(repoRoot, 'status', 'dashboard.json'))
+        return data != null ? jsonResponse(data) : notFound('status/dashboard.json not found')
+      }
+      if (pathname.startsWith('/api/accuracy/')) {
+        const browser = pathname.slice('/api/accuracy/'.length) as BrowserName
+        if (!BROWSERS.includes(browser)) return notFound(`Unknown browser: ${browser}`)
+        const data = loadJsonFile(join(repoRoot, 'accuracy', `${browser}.json`))
+        return data != null ? jsonResponse(data) : notFound(`accuracy/${browser}.json not found`)
+      }
+      if (pathname.startsWith('/api/benchmarks/')) {
+        const browser = pathname.slice('/api/benchmarks/'.length) as BrowserName
+        if (!BROWSERS.includes(browser)) return notFound(`Unknown browser: ${browser}`)
+        const data = loadJsonFile(join(repoRoot, 'benchmarks', `${browser}.json`))
+        return data != null ? jsonResponse(data) : notFound(`benchmarks/${browser}.json not found`)
+      }
+      if (pathname === '/api/runs') {
+        if (this.db == null) return jsonResponse({ error: 'Database not enabled' }, 503)
+        const browser = url.searchParams.get('browser') as BrowserName | null
+        const since = url.searchParams.get('since')
+        const limit = Number(url.searchParams.get('limit') ?? '100')
+        const tag = url.searchParams.get('tag')
+        const queryOpts: QueryOptions = { limit }
+        if (browser != null) queryOpts.browser = browser
+        if (since != null) queryOpts.since = since
+        if (tag != null) queryOpts.tag = tag
+        const runs = this.db.queryRuns(queryOpts)
+        return jsonResponse(runs)
+      }
+      if (pathname === '/api/runs/summaries') {
+        if (this.db == null) return jsonResponse({ error: 'Database not enabled' }, 503)
+        const browser = url.searchParams.get('browser') as BrowserName | null
+        const since = url.searchParams.get('since')
+        const limit = Number(url.searchParams.get('limit') ?? '50')
+        const summaryOpts: QueryOptions = { limit }
+        if (browser != null) summaryOpts.browser = browser
+        if (since != null) summaryOpts.since = since
+        const summaries = this.db.querySummaries(summaryOpts)
+        return jsonResponse(summaries)
+      }
+    }
+
+    if (req.method === 'POST' && pathname === '/api/runs') {
+      return this.handlePostRun(req)
+    }
+
+    return notFound(`No route for ${req.method} ${pathname}`)
+  }
+
+  private async handlePostRun(req: Request): Promise<Response> {
+    if (this.db == null) return jsonResponse({ error: 'Database not enabled' }, 503)
+    let body: Omit<ValidationRunRecord, 'id'>
+    try {
+      body = (await req.json()) as Omit<ValidationRunRecord, 'id'>
+    } catch {
+      return jsonResponse({ error: 'Invalid JSON body' }, 400)
+    }
+    const id = this.db.insertRun(body)
+    return jsonResponse({ id }, 201)
+  }
+}
diff --git a/src/measurement-validator/performance-tracker.ts b/src/measurement-validator/performance-tracker.ts
new file mode 100644
index 00000000..bff764aa
--- /dev/null
+++ b/src/measurement-validator/performance-tracker.ts
@@ -0,0 +1,160 @@
+// Performance tracker for the measurement-validator.
+//
+// Loads benchmark snapshots from the checked-in `benchmarks/` directory,
+// compares each entry against a baseline, and emits a structured
+// PerformanceReport showing deltas and trend labels.
+//
+// Usage:
+//   import { trackPerformance } from './performance-tracker.js'
+//   const report = await trackPerformance('chrome', { warnPct: 10, criticalPct: 25 })
+
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import type {
+  BenchmarkEntry,
+  BenchmarkSnapshot,
+  BrowserName,
+  PerformanceMetrics,
+  PerformanceReport,
+} from './types.js'
+
+export type TrackOptions = {
+  /** Percent increase that triggers a 'degrading' label. Default 10. */
+  warnPct?: number
+  /** Percent increase that counts as a regression in the report count. Default 25. */
+  criticalPct?: number
+  /**
+   * Root of the repository. Defaults to two levels up from this file so it
+   * works whether the code is run from source or from `dist/`.
+   */
+  repoRoot?: string
+}
+
+function collectEntries(snapshot: BenchmarkSnapshot): BenchmarkEntry[] {
+  return [
+    ...(snapshot.results ?? []),
+    ...(snapshot.richResults ?? []),
+    ...(snapshot.richInlineResults ?? []),
+    ...(snapshot.richPreWrapResults ?? []),
+    ...(snapshot.richLongResults ?? []),
+  ]
+}
+
+function loadSnapshot(repoRoot: string, browser: BrowserName): BenchmarkSnapshot {
+  const filePath = join(repoRoot, 'benchmarks', `${browser}.json`)
+  const raw = readFileSync(filePath, 'utf-8')
+  return JSON.parse(raw) as BenchmarkSnapshot
+}
+
+function loadBaseline(repoRoot: string, browser: BrowserName): Map<string, number> {
+  const baselineFile = join(repoRoot, '.measurement-baseline.json')
+  try {
+    const raw = readFileSync(baselineFile, 'utf-8')
+    const data = JSON.parse(raw) as Record<string, Record<string, number>>
+    const browserData = data[browser]
+    if (browserData == null) return new Map()
+    return new Map(Object.entries(browserData))
+  } catch {
+    return new Map()
+  }
+}
+
+function classifyTrend(
+  deltaPct: number,
+  warnPct: number,
+): PerformanceMetrics['trend'] {
+  if (deltaPct <= -1) return 'improving'
+  if (deltaPct >= warnPct) return 'degrading'
+  return 'stable'
+}
+
+/**
+ * Load the benchmark snapshot for `browser`, compare each entry against the
+ * checked-in baseline (if any), and return a PerformanceReport.
+ */
+export async function trackPerformance(
+  browser: BrowserName,
+  options: TrackOptions = {},
+): Promise<PerformanceReport> {
+  const {
+    warnPct = 10,
+    criticalPct = 25,
+    repoRoot = join(import.meta.dir, '..', '..'),
+  } = options
+
+  const snapshot = loadSnapshot(repoRoot, browser)
+  const baseline = loadBaseline(repoRoot, browser)
+
+  const entries = collectEntries(snapshot)
+  const metrics: PerformanceMetrics[] = entries.map(entry => {
+    const baselineMs = baseline.get(entry.label) ?? entry.ms
+    const deltaMs = entry.ms - baselineMs
+    const deltaPct = baselineMs === 0 ? 0 : (deltaMs / baselineMs) * 100
+    return {
+      label: entry.label,
+      baselineMs,
+      currentMs: entry.ms,
+      deltaMs,
+      deltaPct,
+      trend: classifyTrend(deltaPct, warnPct),
+    }
+  })
+
+  const regressionCount = metrics.filter(m => m.deltaPct >= criticalPct).length
+
+  return {
+    generatedAt: new Date().toISOString(),
+    browser,
+    metrics,
+    regressionCount,
+  }
+}
+
+/**
+ * Write a new baseline file from the current benchmark snapshots.
+ * Call this after a clean run to lock in today's numbers as the reference.
+ */
+export async function writeBaseline(
+  browsers: BrowserName[],
+  options: Pick<TrackOptions, 'repoRoot'> = {},
+): Promise<void> {
+  const { repoRoot = join(import.meta.dir, '..', '..') } = options
+  const baseline: Record<string, Record<string, number>> = {}
+
+  for (const browser of browsers) {
+    try {
+      const snapshot = loadSnapshot(repoRoot, browser)
+      const entries = collectEntries(snapshot)
+      baseline[browser] = Object.fromEntries(entries.map(e => [e.label, e.ms]))
+    } catch {
+      // Skip browsers whose snapshot is not present.
+    }
+  }
+
+  const baselineFile = join(repoRoot, '.measurement-baseline.json')
+  const { writeFileSync } = await import('node:fs')
+  writeFileSync(baselineFile, JSON.stringify(baseline, null, 2) + '\n', 'utf-8')
+}
+
+/**
+ * Format a PerformanceReport as a human-readable text block suitable for
+ * console output or Slack messages.
+ */
+export function formatPerformanceReport(report: PerformanceReport): string {
+  const lines: string[] = [
+    `Performance report — ${report.browser} — ${report.generatedAt}`,
+    '',
+  ]
+  for (const m of report.metrics) {
+    const sign = m.deltaMs >= 0 ? '+' : ''
+    const icon = m.trend === 'improving' ? '✅' : m.trend === 'degrading' ? '⚠️' : '✅'
+    lines.push(
+      `  ${icon} ${m.label}: ${m.currentMs.toFixed(3)}ms (${sign}${m.deltaPct.toFixed(1)}%)`,
+    )
+  }
+  if (report.regressionCount > 0) {
+    lines.push('')
+    lines.push(`⚠️  ${report.regressionCount} regression(s) detected`)
+  }
+  return lines.join('\n')
+}
diff --git a/src/measurement-validator/regression-detector.ts b/src/measurement-validator/regression-detector.ts
new file mode 100644
index 00000000..69f1d0ae
--- /dev/null
+++ b/src/measurement-validator/regression-detector.ts
@@ -0,0 +1,209 @@
+// Regression detector for the measurement-validator.
+//
+// Compares the current accuracy and benchmark snapshots against the checked-in
+// baselines and emits a RegressionReport that the GitHub Actions workflow and
+// dashboard server can consume.
+//
+// Usage:
+//   import { detectRegressions } from './regression-detector.js'
+//   const report = await detectRegressions(['chrome', 'safari', 'firefox'])
+
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import type {
+  AccuracyRegression,
+  AccuracySnapshot,
+  BenchmarkEntry,
+  BenchmarkSnapshot,
+  BrowserName,
+  PerformanceRegression,
+  RegressionReport,
+  RegressionSeverity,
+} from './types.js'
+
+export type DetectOptions = {
+  /**
+   * Percent accuracy drop that is flagged as a warning (0-100).
+   * Default: any regression (> 0 mismatches that weren't there before).
+   */
+  accuracyWarnDelta?: number
+  /** Percent benchmark slowdown that triggers a warning. Default 10. */
+  perfWarnPct?: number
+  /** Percent benchmark slowdown that triggers a critical flag. Default 25. */
+  perfCriticalPct?: number
+  /**
+   * Override current accuracy match counts per browser so the detector can
+   * compare live browser-checker results against the checked-in baseline.
+   * When omitted the detector compares the checked-in snapshot against itself
+   * (always clean) — useful for CI runs that do not have browser access.
+   */
+  currentAccuracy?: Partial<Record<BrowserName, { matchCount: number; total: number }>>
+  /** Repository root. Defaults to two levels above this file. */
+  repoRoot?: string
+}
+
+function loadJson<T>(path: string): T | null {
+  try {
+    const raw = readFileSync(path, 'utf-8')
+    return JSON.parse(raw) as T
+  } catch {
+    return null
+  }
+}
+
+function severityFromAccuracyDelta(delta: number): RegressionSeverity {
+  if (delta === 0) return 'ok'
+  if (delta < 10) return 'warning'
+  return 'critical'
+}
+
+function severityFromPerfDelta(
+  deltaPct: number,
+  warnPct: number,
+  criticalPct: number,
+): RegressionSeverity {
+  if (deltaPct < warnPct) return 'ok'
+  if (deltaPct < criticalPct) return 'warning'
+  return 'critical'
+}
+
+function collectEntries(snapshot: BenchmarkSnapshot): BenchmarkEntry[] {
+  return [
+    ...(snapshot.results ?? []),
+    ...(snapshot.richResults ?? []),
+    ...(snapshot.richInlineResults ?? []),
+    ...(snapshot.richPreWrapResults ?? []),
+    ...(snapshot.richLongResults ?? []),
+  ]
+}
+
+/**
+ * Compare the current accuracy and benchmark snapshots against the checked-in
+ * baseline data and return a RegressionReport.
+ *
+ * Accuracy baseline comes from `accuracy/<browser>.json` (the files checked
+ * into the repo). Performance baseline comes from `.measurement-baseline.json`
+ * (written by `writeBaseline()` in performance-tracker.ts).
+ */
+export async function detectRegressions(
+  browsers: BrowserName[],
+  options: DetectOptions = {},
+): Promise<RegressionReport> {
+  const {
+    accuracyWarnDelta = 0,
+    perfWarnPct = 10,
+    perfCriticalPct = 25,
+    currentAccuracy,
+    repoRoot = join(import.meta.dir, '..', '..'),
+  } = options
+
+  const accuracyRegressions: AccuracyRegression[] = []
+  const performanceRegressions: PerformanceRegression[] = []
+
+  // Load the performance baseline (may not exist on first run)
+  const baselineFile = join(repoRoot, '.measurement-baseline.json')
+  const baselineData = loadJson<Record<string, Record<string, number>>>(baselineFile) ?? {}
+
+  for (const browser of browsers) {
+    // --- Accuracy ---
+    // The checked-in `accuracy/<browser>.json` is the baseline.
+    // When `currentAccuracy` is provided (e.g. from a live browser checker run),
+    // compare it against the checked-in baseline to detect regressions.
+    // When omitted (CI without browser access) no accuracy regression is reported.
+    const accuracyPath = join(repoRoot, 'accuracy', `${browser}.json`)
+    const baseline = loadJson<AccuracySnapshot>(accuracyPath)
+    const current = currentAccuracy?.[browser]
+    if (baseline != null && current != null) {
+      const delta = baseline.matchCount - current.matchCount
+      if (delta > accuracyWarnDelta) {
+        accuracyRegressions.push({
+          browser,
+          baselineMatchCount: baseline.matchCount,
+          currentMatchCount: current.matchCount,
+          baselineTotal: baseline.total,
+          currentTotal: current.total,
+          delta,
+          severity: severityFromAccuracyDelta(delta),
+        })
+      }
+    }
+
+    // --- Performance ---
+    const benchmarkPath = join(repoRoot, 'benchmarks', `${browser}.json`)
+    const benchmark = loadJson<BenchmarkSnapshot>(benchmarkPath)
+    if (benchmark != null) {
+      const browserBaseline = baselineData[browser] ?? {}
+      const entries = collectEntries(benchmark)
+      for (const entry of entries) {
+        const baselineMs = browserBaseline[entry.label]
+        if (baselineMs == null) continue
+        const deltaPct = baselineMs === 0 ? 0 : ((entry.ms - baselineMs) / baselineMs) * 100
+        const severity = severityFromPerfDelta(deltaPct, perfWarnPct, perfCriticalPct)
+        if (severity !== 'ok') {
+          performanceRegressions.push({
+            label: entry.label,
+            browser,
+            baselineMs,
+            currentMs: entry.ms,
+            deltaPct,
+            severity,
+          })
+        }
+      }
+    }
+  }
+
+  const hasBlocker =
+    accuracyRegressions.some(r => r.severity === 'critical') ||
+    performanceRegressions.some(r => r.severity === 'critical')
+
+  return {
+    generatedAt: new Date().toISOString(),
+    accuracyRegressions,
+    performanceRegressions,
+    hasBlocker,
+  }
+}
+
+/**
+ * Format a RegressionReport as a human-readable text summary.
+ */
+export function formatRegressionReport(report: RegressionReport): string {
+  const lines: string[] = [`Regression report — ${report.generatedAt}`, '']
+
+  if (report.accuracyRegressions.length === 0 && report.performanceRegressions.length === 0) {
+    lines.push('✅ No regressions detected')
+    return lines.join('\n')
+  }
+
+  if (report.accuracyRegressions.length > 0) {
+    lines.push('Accuracy regressions:')
+    for (const r of report.accuracyRegressions) {
+      const icon = r.severity === 'critical' ? '❌' : '⚠️'
+      lines.push(
+        `  ${icon} ${r.browser}: ${r.currentMatchCount}/${r.currentTotal} matches ` +
+          `(was ${r.baselineMatchCount}/${r.baselineTotal}, Δ−${r.delta})`,
+      )
+    }
+    lines.push('')
+  }
+
+  if (report.performanceRegressions.length > 0) {
+    lines.push('Performance regressions:')
+    for (const r of report.performanceRegressions) {
+      const icon = r.severity === 'critical' ? '❌' : '⚠️'
+      const sign = r.deltaPct >= 0 ? '+' : ''
+      lines.push(
+        `  ${icon} [${r.browser}] ${r.label}: ${r.currentMs.toFixed(3)}ms ` +
+          `(was ${r.baselineMs.toFixed(3)}ms, ${sign}${r.deltaPct.toFixed(1)}%)`,
+      )
+    }
+  }
+
+  if (report.hasBlocker) {
+    lines.push('')
+    lines.push('❌ Build should be blocked: critical regression(s) detected')
+  }
+
+  return lines.join('\n')
+}
diff --git a/src/measurement-validator/results-database.ts b/src/measurement-validator/results-database.ts
new file mode 100644
index 00000000..3d0c989c
--- /dev/null
+++ b/src/measurement-validator/results-database.ts
@@ -0,0 +1,155 @@
+// SQLite persistence for the measurement-validator.
+//
+// Stores validation run records in a local SQLite database using Bun's
+// built-in `bun:sqlite` module — zero extra dependencies.
+//
+// Usage:
+//   import { ResultsDatabase } from './results-database.js'
+//   const db = new ResultsDatabase()
+//   await db.insertRun(record)
+//   const runs = db.queryRuns({ browser: 'chrome', limit: 50 })
+//   db.close()
+
+import { Database } from 'bun:sqlite'
+import { randomUUID } from 'node:crypto'
+import { join } from 'node:path'
+import type { BrowserName, ValidationRunRecord } from './types.js'
+
+export type QueryOptions = {
+  browser?: BrowserName
+  /** ISO timestamp — return only runs at or after this time. */
+  since?: string
+  /** Maximum number of rows to return (default 100). */
+  limit?: number
+  /** Free-text tag that must appear in the `tags` field. */
+  tag?: string
+}
+
+export type RunSummary = {
+  runAt: string
+  browser: BrowserName
+  accuracyPct: number
+  regressionCount: number
+}
+
+const CREATE_TABLE_SQL = `
+CREATE TABLE IF NOT EXISTS validation_runs (
+  id           TEXT PRIMARY KEY,
+  run_at       TEXT NOT NULL,
+  browser      TEXT NOT NULL,
+  accuracy_total   INTEGER NOT NULL,
+  accuracy_matches INTEGER NOT NULL,
+  benchmark_json   TEXT NOT NULL DEFAULT '{}',
+  regression_json  TEXT NOT NULL DEFAULT '{}',
+  tags         TEXT NOT NULL DEFAULT ''
+)`
+
+export class ResultsDatabase {
+  private db: Database
+
+  constructor(dbPath?: string) {
+    const resolvedPath =
+      dbPath ?? join(import.meta.dir, '..', '..', '.measurement-results.db')
+    this.db = new Database(resolvedPath, { create: true })
+    this.db.run(CREATE_TABLE_SQL)
+  }
+
+  /** Insert a new validation run record. Generates an ID if one is not provided. */
+  insertRun(record: Omit<ValidationRunRecord, 'id'> & { id?: string }): string {
+    const id = record.id ?? randomUUID()
+    this.db.run(
+      `INSERT INTO validation_runs
+         (id, run_at, browser, accuracy_total, accuracy_matches,
+          benchmark_json, regression_json, tags)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+      [
+        id,
+        record.runAt,
+        record.browser,
+        record.accuracyTotal,
+        record.accuracyMatches,
+        record.benchmarkJson,
+        record.regressionJson,
+        record.tags,
+      ],
+    )
+    return id
+  }
+
+  /** Retrieve validation runs with optional filters. */
+  queryRuns(options: QueryOptions = {}): ValidationRunRecord[] {
+    const { browser, since, limit = 100, tag } = options
+    const conditions: string[] = []
+    const params: (string | number)[] = []
+
+    if (browser != null) {
+      conditions.push('browser = ?')
+      params.push(browser)
+    }
+    if (since != null) {
+      conditions.push('run_at >= ?')
+      params.push(since)
+    }
+    if (tag != null) {
+      conditions.push('tags LIKE ?')
+      params.push(`%${tag}%`)
+    }
+
+    const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : ''
+    const sql = `SELECT id, run_at, browser, accuracy_total, accuracy_matches,
+                        benchmark_json, regression_json, tags
+                 FROM validation_runs
+                 ${where}
+                 ORDER BY run_at DESC
+                 LIMIT ?`
+
+    const rows = this.db.query(sql).all(...params, limit) as Array<Record<string, unknown>>
+    return rows.map(row => ({
+      id: row['id'] as string,
+      runAt: row['run_at'] as string,
+      browser: row['browser'] as BrowserName,
+      accuracyTotal: row['accuracy_total'] as number,
+      accuracyMatches: row['accuracy_matches'] as number,
+      benchmarkJson: row['benchmark_json'] as string,
+      regressionJson: row['regression_json'] as string,
+      tags: row['tags'] as string,
+    }))
+  }
+
+  /** Return high-level summaries suitable for the dashboard trends view. */
+  querySummaries(options: QueryOptions = {}): RunSummary[] {
+    const runs = this.queryRuns(options)
+    return runs.map(r => {
+      let regressionCount = 0
+      try {
+        const parsed = JSON.parse(r.regressionJson) as {
+          performanceRegressions?: unknown[]
+          accuracyRegressions?: unknown[]
+        }
+        regressionCount =
+          (parsed.performanceRegressions?.length ?? 0) +
+          (parsed.accuracyRegressions?.length ?? 0)
+      } catch {
+        // ignore parse errors
+      }
+      const accuracyPct =
+        r.accuracyTotal > 0 ? (r.accuracyMatches / r.accuracyTotal) * 100 : 100
+      return {
+        runAt: r.runAt,
+        browser: r.browser,
+        accuracyPct,
+        regressionCount,
+      }
+    })
+  }
+
+  /** Delete all runs older than the given ISO timestamp. */
+  pruneOlderThan(timestamp: string): number {
+    const result = this.db.run('DELETE FROM validation_runs WHERE run_at < ?', [timestamp])
+    return result.changes
+  }
+
+  close(): void {
+    this.db.close()
+  }
+}
diff --git a/src/measurement-validator/slack-notifier.ts b/src/measurement-validator/slack-notifier.ts
new file mode 100644
index 00000000..177ab48a
--- /dev/null
+++ b/src/measurement-validator/slack-notifier.ts
@@ -0,0 +1,149 @@
+// Slack notifier for the measurement-validator.
+//
+// Sends webhook notifications to a Slack channel when validation runs
+// complete or when regressions are detected.  Uses Slack's Incoming
+// Webhooks API — no Slack SDK dependency needed.
+//
+// Usage:
+//   import { SlackNotifier } from './slack-notifier.js'
+//   const notifier = new SlackNotifier(process.env.SLACK_WEBHOOK_URL)
+//   await notifier.notifyRegressionReport(report)
+
+import type { PerformanceReport, RegressionReport } from './types.js'
+
+export type SlackBlock =
+  | { type: 'header'; text: { type: 'plain_text'; text: string } }
+  | { type: 'section'; text: { type: 'mrkdwn'; text: string } }
+  | { type: 'divider' }
+
+export type SlackPayload = {
+  text: string
+  blocks?: SlackBlock[]
+}
+
+export class SlackNotifier {
+  private webhookUrl: string
+
+  constructor(webhookUrl: string) {
+    this.webhookUrl = webhookUrl
+  }
+
+  /** Low-level send: POST a SlackPayload to the configured webhook URL. */
+  async send(payload: SlackPayload): Promise<void> {
+    const response = await fetch(this.webhookUrl, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(payload),
+    })
+    if (!response.ok) {
+      throw new Error(
+        `Slack webhook returned ${response.status}: ${await response.text()}`,
+      )
+    }
+  }
+
+  /** Notify about a completed regression detection report. */
+  async notifyRegressionReport(report: RegressionReport): Promise<void> {
+    const totalIssues =
+      report.accuracyRegressions.length + report.performanceRegressions.length
+
+    const statusIcon = report.hasBlocker ? '❌' : totalIssues > 0 ? '⚠️' : '✅'
+    const statusText = report.hasBlocker
+      ? 'Critical regressions detected — build blocked'
+      : totalIssues > 0
+        ? `${totalIssues} regression(s) detected`
+        : 'All checks passed'
+
+    const blocks: SlackBlock[] = [
+      {
+        type: 'header',
+        text: {
+          type: 'plain_text',
+          text: `${statusIcon} Measurement Validator — ${statusText}`,
+        },
+      },
+    ]
+
+    if (report.accuracyRegressions.length > 0) {
+      const lines = report.accuracyRegressions.map(
+        r =>
+          `• *${r.browser}*: ${r.currentMatchCount}/${r.currentTotal} matches ` +
+          `(Δ −${r.delta}, ${r.severity})`,
+      )
+      blocks.push({ type: 'section', text: { type: 'mrkdwn', text: `*Accuracy*\n${lines.join('\n')}` } })
+    }
+
+    if (report.performanceRegressions.length > 0) {
+      const lines = report.performanceRegressions.map(r => {
+        const sign = r.deltaPct >= 0 ? '+' : ''
+        return (
+          `• *[${r.browser}]* ${r.label}: ` +
+          `${r.currentMs.toFixed(3)}ms (${sign}${r.deltaPct.toFixed(1)}%, ${r.severity})`
+        )
+      })
+      blocks.push({
+        type: 'section',
+        text: { type: 'mrkdwn', text: `*Performance*\n${lines.join('\n')}` },
+      })
+    }
+
+    blocks.push({ type: 'divider' })
+    blocks.push({
+      type: 'section',
+      text: { type: 'mrkdwn', text: `_Generated at ${report.generatedAt}_` },
+    })
+
+    await this.send({ text: `${statusIcon} Measurement Validator: ${statusText}`, blocks })
+  }
+
+  /** Notify about a performance tracking report. */
+  async notifyPerformanceReport(report: PerformanceReport): Promise<void> {
+    const degraded = report.metrics.filter(m => m.trend === 'degrading')
+    const statusIcon = report.regressionCount > 0 ? '⚠️' : '✅'
+    const statusText =
+      report.regressionCount > 0
+        ? `${report.regressionCount} performance regression(s) — ${report.browser}`
+        : `Performance OK — ${report.browser}`
+
+    const lines = degraded.map(m => {
+      const sign = m.deltaPct >= 0 ? '+' : ''
+      return `• ${m.label}: ${m.currentMs.toFixed(3)}ms (${sign}${m.deltaPct.toFixed(1)}%)`
+    })
+
+    const body =
+      lines.length > 0
+        ? `*Degraded benchmarks*\n${lines.join('\n')}`
+        : '✅ All benchmarks within expected range.'
+
+    await this.send({
+      text: `${statusIcon} Performance report (${report.browser}): ${statusText}`,
+      blocks: [
+        {
+          type: 'header',
+          text: { type: 'plain_text', text: `${statusIcon} Performance Report — ${report.browser}` },
+        },
+        { type: 'section', text: { type: 'mrkdwn', text: body } },
+        { type: 'divider' },
+        {
+          type: 'section',
+          text: { type: 'mrkdwn', text: `_Generated at ${report.generatedAt}_` },
+        },
+      ],
+    })
+  }
+
+  /** Send a plain text message. */
+  async notifyText(text: string): Promise<void> {
+    await this.send({ text })
+  }
+}
+
+/**
+ * Convenience factory that reads the webhook URL from `SLACK_WEBHOOK_URL`
+ * environment variable and returns null if it is not set.
+ */
+export function createSlackNotifierFromEnv(): SlackNotifier | null {
+  const url = process.env['SLACK_WEBHOOK_URL']
+  if (url == null || url.trim() === '') return null
+  return new SlackNotifier(url)
+}
diff --git a/src/measurement-validator/types.ts b/src/measurement-validator/types.ts
new file mode 100644
index 00000000..31eb8e04
--- /dev/null
+++ b/src/measurement-validator/types.ts
@@ -0,0 +1,80 @@
+// Shared types for the measurement-validator Phase 4 components.
+
+export type BrowserName = 'chrome' | 'safari' | 'firefox'
+
+export type AccuracySnapshot = {
+  status: string
+  total: number
+  matchCount: number
+  mismatchCount: number
+}
+
+export type BenchmarkEntry = {
+  label: string
+  ms: number
+  desc: string
+}
+
+export type BenchmarkSnapshot = {
+  status: string
+  results?: BenchmarkEntry[]
+  richResults?: BenchmarkEntry[]
+  richInlineResults?: BenchmarkEntry[]
+  richPreWrapResults?: BenchmarkEntry[]
+  richLongResults?: BenchmarkEntry[]
+}
+
+export type PerformanceMetrics = {
+  label: string
+  baselineMs: number
+  currentMs: number
+  deltaMs: number
+  deltaPct: number
+  trend: 'improving' | 'stable' | 'degrading'
+}
+
+export type PerformanceReport = {
+  generatedAt: string
+  browser: BrowserName
+  metrics: PerformanceMetrics[]
+  regressionCount: number
+}
+
+export type RegressionSeverity = 'ok' | 'warning' | 'critical'
+
+export type AccuracyRegression = {
+  browser: BrowserName
+  baselineMatchCount: number
+  currentMatchCount: number
+  baselineTotal: number
+  currentTotal: number
+  delta: number
+  severity: RegressionSeverity
+}
+
+export type PerformanceRegression = {
+  label: string
+  browser: BrowserName
+  baselineMs: number
+  currentMs: number
+  deltaPct: number
+  severity: RegressionSeverity
+}
+
+export type RegressionReport = {
+  generatedAt: string
+  accuracyRegressions: AccuracyRegression[]
+  performanceRegressions: PerformanceRegression[]
+  hasBlocker: boolean
+}
+
+export type ValidationRunRecord = {
+  id: string
+  runAt: string
+  browser: BrowserName
+  accuracyTotal: number
+  accuracyMatches: number
+  benchmarkJson: string
+  regressionJson: string
+  tags: string
+}
diff --git a/tsconfig.build.json b/tsconfig.build.json
index a7ce060d..95e31e71 100644
--- a/tsconfig.build.json
+++ b/tsconfig.build.json
@@ -9,5 +9,5 @@
     "declaration": true
   },
   "include": ["src/**/*.ts"],
-  "exclude": ["src/layout.test.ts", "src/test-data.ts"]
+  "exclude": ["src/layout.test.ts", "src/test-data.ts", "src/measurement-validator/**"]
 }