From 9c5d95a23d41d5d45f528fd492a1ac1321577355 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 5 Apr 2026 10:34:06 +0000 Subject: [PATCH 1/2] Initial plan From 427328a874e21b694d4329d233ed595456799828 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 5 Apr 2026 10:46:42 +0000 Subject: [PATCH 2/2] Phase 4: GitHub Integration & Advanced Features (measurement-validator) Agent-Logs-Url: https://github.com/Himaan1998Y/pretext/sessions/aedd6bf4-3029-4234-a28d-a885603e3d95 Co-authored-by: Himaan1998Y <210527591+Himaan1998Y@users.noreply.github.com> --- .github/workflows/measurement-validation.yml | 95 +++++++ .gitignore | 8 + docs/measurement-validator/README.md | 211 ++++++++++++++ package.json | 7 +- scripts/validator-dashboard.ts | 35 +++ scripts/validator-regression-detect.ts | 84 ++++++ scripts/validator-trends.ts | 39 +++ scripts/validator-watch.ts | 86 ++++++ src/measurement-validator/dashboard-server.ts | 260 ++++++++++++++++++ .../performance-tracker.ts | 160 +++++++++++ .../regression-detector.ts | 209 ++++++++++++++ src/measurement-validator/results-database.ts | 155 +++++++++++ src/measurement-validator/slack-notifier.ts | 149 ++++++++++ src/measurement-validator/types.ts | 80 ++++++ tsconfig.build.json | 2 +- 15 files changed, 1578 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/measurement-validation.yml create mode 100644 docs/measurement-validator/README.md create mode 100644 scripts/validator-dashboard.ts create mode 100644 scripts/validator-regression-detect.ts create mode 100644 scripts/validator-trends.ts create mode 100644 scripts/validator-watch.ts create mode 100644 src/measurement-validator/dashboard-server.ts create mode 100644 src/measurement-validator/performance-tracker.ts create mode 100644 src/measurement-validator/regression-detector.ts create mode 100644 src/measurement-validator/results-database.ts create mode 100644 src/measurement-validator/slack-notifier.ts create mode 100644 src/measurement-validator/types.ts diff --git a/.github/workflows/measurement-validation.yml b/.github/workflows/measurement-validation.yml new file mode 100644 index 00000000..365edbc0 --- /dev/null +++ b/.github/workflows/measurement-validation.yml @@ -0,0 +1,95 @@ +name: Measurement Validation + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + pull-requests: write + +jobs: + validate: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v5 + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: TypeScript type-check + run: bun run check + + - name: Run unit tests + run: bun test src/layout.test.ts + + - name: Performance trends (chrome) + run: bun run validator:trends --browser=chrome --json > /tmp/perf-chrome.json || true + + - name: Regression detection + id: regression + run: | + bun run validator:regression-detect --json > /tmp/regressions.json 2>&1 || true + cat /tmp/regressions.json + + - name: Upload validation artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: measurement-validation-results + path: | + /tmp/perf-chrome.json + /tmp/regressions.json + if-no-files-found: warn + + - name: Post PR summary + if: github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs') + let perfSummary = '_(no data)_' + let regressionSummary = '_(no data)_' + + try { + const perf = JSON.parse(fs.readFileSync('/tmp/perf-chrome.json', 'utf-8')) + const degraded = (perf.metrics || []).filter(m => m.trend === 'degrading') + perfSummary = degraded.length === 0 + ? '✅ All benchmarks within expected range' + : `⚠️ ${degraded.length} degraded benchmark(s)` + } catch {} + + try { + const reg = JSON.parse(fs.readFileSync('/tmp/regressions.json', 'utf-8')) + const total = + (reg.accuracyRegressions || []).length + + (reg.performanceRegressions || []).length + regressionSummary = reg.hasBlocker + ? `❌ Critical regression(s) detected — ${total} issue(s)` + : total > 0 + ? `⚠️ ${total} regression(s) detected` + : '✅ No regressions detected' + } catch {} + + const body = [ + '## 📊 Measurement Validator Results', + '', + `**Performance (Chrome):** ${perfSummary}`, + `**Regressions:** ${regressionSummary}`, + '', + `_Workflow run: [${context.runId}](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})_`, + ].join('\n') + + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body, + }) diff --git a/.gitignore b/.gitignore index 7428ea11..5f58d4df 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,11 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json # Finder (MacOS) folder config .DS_Store + +# measurement-validator runtime files +.measurement-results.db +.measurement-results.db-shm +.measurement-results.db-wal + +# npm lockfile (project uses bun.lock) +package-lock.json diff --git a/docs/measurement-validator/README.md b/docs/measurement-validator/README.md new file mode 100644 index 00000000..f39a126b --- /dev/null +++ b/docs/measurement-validator/README.md @@ -0,0 +1,211 @@ +# Measurement Validator — Phase 4 Documentation + +## Overview + +The measurement-validator Phase 4 components add GitHub CI integration, +performance tracking, regression detection, a live dashboard server, SQLite +persistence, and Slack notifications on top of the existing accuracy and +benchmark infrastructure. + +All components are built with TypeScript and Bun's built-in APIs — no +extra runtime dependencies are needed beyond what is already in +`package.json`. + +--- + +## Components + +### 1. GitHub Actions Workflow + +**File:** `.github/workflows/measurement-validation.yml` + +Runs automatically on every push to `main` and on every pull request: + +- TypeScript type-check (`bun run check`) +- Unit tests (`bun test src/layout.test.ts`) +- Performance trends for Chrome +- Regression detection across configured browsers +- Uploads JSON artifacts (performance + regressions) +- Posts a summary comment to open PRs + +### 2. Performance Tracker + +**File:** `src/measurement-validator/performance-tracker.ts` + +Loads benchmark snapshots from `benchmarks/.json`, compares each +entry against a baseline stored in `.measurement-baseline.json`, and +produces a `PerformanceReport`. + +```typescript +import { trackPerformance, writeBaseline, formatPerformanceReport } from './performance-tracker.js' + +// Compare current benchmarks against baseline +const report = await trackPerformance('chrome', { warnPct: 10, criticalPct: 25 }) +console.log(formatPerformanceReport(report)) + +// Write a new baseline from current snapshots +await writeBaseline(['chrome', 'safari']) +``` + +### 3. Regression Detector + +**File:** `src/measurement-validator/regression-detector.ts` + +Detects accuracy and performance regressions across multiple browsers. + +```typescript +import { detectRegressions, formatRegressionReport } from './regression-detector.js' + +const report = await detectRegressions(['chrome', 'safari', 'firefox']) +console.log(formatRegressionReport(report)) + +if (report.hasBlocker) process.exit(1) +``` + +### 4. Dashboard Server + +**File:** `src/measurement-validator/dashboard-server.ts` + +An HTTP server (Bun.serve) that exposes the accuracy/benchmark/status data +as a JSON API and serves an embedded HTML dashboard. + +```typescript +import { DashboardServer } from './dashboard-server.js' + +const server = new DashboardServer({ port: 3001 }) +server.start() +// http://localhost:3001 — dashboard UI +// http://localhost:3001/api/status — status JSON +// http://localhost:3001/api/accuracy/chrome — accuracy data +``` + +**API endpoints:** + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/` | HTML dashboard | +| GET | `/health` | Liveness check | +| GET | `/api/status` | `status/dashboard.json` | +| GET | `/api/accuracy/:browser` | `accuracy/.json` | +| GET | `/api/benchmarks/:browser` | `benchmarks/.json` | +| GET | `/api/runs` | Recent validation runs (SQLite) | +| GET | `/api/runs/summaries` | High-level trend summaries | +| POST | `/api/runs` | Insert a new run record | + +### 5. Results Database + +**File:** `src/measurement-validator/results-database.ts` + +SQLite persistence via Bun's built-in `bun:sqlite`. Stores validation run +records with accuracy, benchmark, and regression data. + +```typescript +import { ResultsDatabase } from './results-database.js' + +const db = new ResultsDatabase() + +db.insertRun({ + runAt: new Date().toISOString(), + browser: 'chrome', + accuracyTotal: 7680, + accuracyMatches: 7680, + benchmarkJson: JSON.stringify(benchmarkReport), + regressionJson: JSON.stringify(regressionReport), + tags: 'pr:123', +}) + +const recent = db.queryRuns({ browser: 'chrome', limit: 20 }) +const summaries = db.querySummaries({ since: '2026-01-01T00:00:00Z' }) +db.close() +``` + +### 6. Slack Notifier + +**File:** `src/measurement-validator/slack-notifier.ts` + +Sends formatted Slack messages via an Incoming Webhook URL. Reads the URL +from `SLACK_WEBHOOK_URL` environment variable when using the factory helper. + +```typescript +import { SlackNotifier, createSlackNotifierFromEnv } from './slack-notifier.js' + +const notifier = createSlackNotifierFromEnv() // reads SLACK_WEBHOOK_URL +if (notifier) { + await notifier.notifyRegressionReport(report) + await notifier.notifyPerformanceReport(perfReport) + await notifier.notifyText('Custom message') +} +``` + +--- + +## CLI Scripts + +### `bun run validator:dashboard` + +Start the dashboard HTTP server. + +``` +bun run validator:dashboard [--port=3001] [--host=127.0.0.1] [--no-db] +``` + +### `bun run validator:trends` + +Print performance trend report. + +``` +bun run validator:trends [--browser=chrome] [--warn=10] [--critical=25] [--json] +``` + +### `bun run validator:watch` + +Watch the `accuracy/` and `benchmarks/` directories and re-run regression +detection whenever a snapshot file changes. + +``` +bun run validator:watch [--browsers=chrome,safari,firefox] [--slack-webhook=] +``` + +### `bun run validator:regression-detect` + +Run one-shot regression detection (used in CI). + +``` +bun run validator:regression-detect [--browsers=chrome] [--json] [--fail-on-critical] +``` + +--- + +## Configuration + +### Performance Baseline + +Write a baseline from the current benchmark snapshots: + +```bash +bun -e "import('./src/measurement-validator/performance-tracker.js').then(m => m.writeBaseline(['chrome', 'safari']))" +``` + +This creates `.measurement-baseline.json` which is checked into version +control. Commit it alongside any intentional performance changes. + +### Slack Webhook + +Set the `SLACK_WEBHOOK_URL` environment variable (e.g. in a GitHub Actions +secret) to enable Slack notifications. The notifier is disabled silently +when the variable is absent. + +--- + +## Data Files + +| File | Purpose | +|------|---------| +| `accuracy/chrome.json` | Chrome accuracy snapshot (baseline) | +| `accuracy/safari.json` | Safari accuracy snapshot (baseline) | +| `accuracy/firefox.json` | Firefox accuracy snapshot (baseline) | +| `benchmarks/chrome.json` | Chrome benchmark snapshot | +| `benchmarks/safari.json` | Safari benchmark snapshot | +| `status/dashboard.json` | Aggregated status dashboard | +| `.measurement-baseline.json` | Performance baseline (generated, commit after intentional changes) | +| `.measurement-results.db` | SQLite results history (not committed) | diff --git a/package.json b/package.json index 0b28a0e4..57a780dc 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,7 @@ "src", "!src/layout.test.ts", "!src/test-data.ts", + "!src/measurement-validator", "pages/demos", "pages/assets" ], @@ -68,7 +69,11 @@ "site:build": "rm -rf site && bun run scripts/build-demo-site.ts", "start": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --host=$HOST:$PORT", "start:lan": "HOST=0.0.0.0 bun run start", - "start:watch": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --watch --no-clear-screen --host=$HOST:$PORT" + "start:watch": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --watch --no-clear-screen --host=$HOST:$PORT", + "validator:dashboard": "bun run scripts/validator-dashboard.ts", + "validator:trends": "bun run scripts/validator-trends.ts", + "validator:watch": "bun run scripts/validator-watch.ts", + "validator:regression-detect": "bun run scripts/validator-regression-detect.ts" }, "devDependencies": { "@types/bun": "latest", diff --git a/scripts/validator-dashboard.ts b/scripts/validator-dashboard.ts new file mode 100644 index 00000000..3462d677 --- /dev/null +++ b/scripts/validator-dashboard.ts @@ -0,0 +1,35 @@ +#!/usr/bin/env bun +// validator-dashboard.ts — start the measurement-validator HTTP dashboard. +// +// Usage: +// bun run scripts/validator-dashboard.ts [--port=3001] [--host=127.0.0.1] [--no-db] +// +// Flags: +// --port=N Port to listen on (default 3001) +// --host=H Hostname/IP to bind (default 127.0.0.1) +// --no-db Disable SQLite persistence (serve read-only data only) + +import { DashboardServer } from '../src/measurement-validator/dashboard-server.js' + +function parseFlag(name: string): string | null { + const prefix = `--${name}=` + const arg = process.argv.find(v => v.startsWith(prefix)) + return arg !== undefined ? arg.slice(prefix.length) : null +} + +function hasFlag(name: string): boolean { + return process.argv.includes(`--${name}`) +} + +const port = Number(parseFlag('port') ?? 3001) +const host = parseFlag('host') ?? '127.0.0.1' +const enableDatabase = !hasFlag('no-db') + +const server = new DashboardServer({ port, host, enableDatabase }) +server.start() + +process.on('SIGINT', () => { + console.log('\nShutting down dashboard server…') + server.stop() + process.exit(0) +}) diff --git a/scripts/validator-regression-detect.ts b/scripts/validator-regression-detect.ts new file mode 100644 index 00000000..0d08862e --- /dev/null +++ b/scripts/validator-regression-detect.ts @@ -0,0 +1,84 @@ +#!/usr/bin/env bun +// validator-regression-detect.ts — run regression detection and report results. +// +// Usage: +// bun run scripts/validator-regression-detect.ts [--browsers=chrome,safari,firefox] +// [--json] +// [--slack-webhook=] +// [--fail-on-critical] +// +// Flags: +// --browsers=B Comma-separated browser list (default: chrome) +// --warn=N Perf warning threshold in % (default 10) +// --critical=N Perf critical threshold in % (default 25) +// --json Emit JSON output instead of human-readable text +// --slack-webhook=URL Send Slack notification +// --fail-on-critical Exit with code 1 when critical regressions are found + +import { + detectRegressions, + formatRegressionReport, +} from '../src/measurement-validator/regression-detector.js' +import { + createSlackNotifierFromEnv, + SlackNotifier, +} from '../src/measurement-validator/slack-notifier.js' +import { ResultsDatabase } from '../src/measurement-validator/results-database.js' +import type { BrowserName } from '../src/measurement-validator/types.js' + +function parseFlag(name: string): string | null { + const prefix = `--${name}=` + const arg = process.argv.find(v => v.startsWith(prefix)) + return arg !== undefined ? arg.slice(prefix.length) : null +} + +const browsersArg = parseFlag('browsers') ?? 'chrome' +const browsers = browsersArg.split(',').map(b => b.trim()) as BrowserName[] +const warnPct = Number(parseFlag('warn') ?? 10) +const criticalPct = Number(parseFlag('critical') ?? 25) +const emitJson = process.argv.includes('--json') +const failOnCritical = process.argv.includes('--fail-on-critical') +const slackUrl = parseFlag('slack-webhook') + +const report = await detectRegressions(browsers, { perfWarnPct: warnPct, perfCriticalPct: criticalPct }) + +if (emitJson) { + console.log(JSON.stringify(report, null, 2)) +} else { + console.log(formatRegressionReport(report)) +} + +// Persist to SQLite if database is available +try { + const db = new ResultsDatabase() + for (const browser of browsers) { + db.insertRun({ + runAt: report.generatedAt, + browser, + accuracyTotal: 0, + accuracyMatches: 0, + benchmarkJson: '{}', + regressionJson: JSON.stringify(report), + tags: `browser:${browser}`, + }) + } + db.close() +} catch { + // Non-fatal — DB may not be set up in all environments. +} + +// Send Slack notification if configured. +const notifier: SlackNotifier | null = + slackUrl != null ? new SlackNotifier(slackUrl) : createSlackNotifierFromEnv() + +if (notifier != null) { + try { + await notifier.notifyRegressionReport(report) + } catch (err) { + console.error('Slack notification failed:', err) + } +} + +if (failOnCritical && report.hasBlocker) { + process.exit(1) +} diff --git a/scripts/validator-trends.ts b/scripts/validator-trends.ts new file mode 100644 index 00000000..b7b673f4 --- /dev/null +++ b/scripts/validator-trends.ts @@ -0,0 +1,39 @@ +#!/usr/bin/env bun +// validator-trends.ts — print performance trends from the benchmark snapshots. +// +// Usage: +// bun run scripts/validator-trends.ts [--browser=chrome] [--warn=10] [--critical=25] +// +// Flags: +// --browser=B chrome | safari | firefox (default: chrome) +// --warn=N Percent degradation threshold for warnings (default 10) +// --critical=N Percent degradation threshold for critical flags (default 25) +// --json Emit JSON instead of human-readable text + +import { + formatPerformanceReport, + trackPerformance, +} from '../src/measurement-validator/performance-tracker.js' +import type { BrowserName } from '../src/measurement-validator/types.js' + +function parseFlag(name: string): string | null { + const prefix = `--${name}=` + const arg = process.argv.find(v => v.startsWith(prefix)) + return arg !== undefined ? arg.slice(prefix.length) : null +} + +const browser = (parseFlag('browser') ?? 'chrome') as BrowserName +const warnPct = Number(parseFlag('warn') ?? 10) +const criticalPct = Number(parseFlag('critical') ?? 25) +const emitJson = process.argv.includes('--json') + +const report = await trackPerformance(browser, { warnPct, criticalPct }) + +if (emitJson) { + console.log(JSON.stringify(report, null, 2)) +} else { + console.log(formatPerformanceReport(report)) + if (report.regressionCount > 0) { + process.exit(1) + } +} diff --git a/scripts/validator-watch.ts b/scripts/validator-watch.ts new file mode 100644 index 00000000..e79145db --- /dev/null +++ b/scripts/validator-watch.ts @@ -0,0 +1,86 @@ +#!/usr/bin/env bun +// validator-watch.ts — watch benchmark/accuracy snapshot files and re-run +// regression detection whenever a file changes. +// +// Usage: +// bun run scripts/validator-watch.ts [--browsers=chrome,safari,firefox] +// [--slack-webhook=] +// +// Flags: +// --browsers=B Comma-separated list of browsers to watch (default: chrome) +// --slack-webhook=URL Send notifications via Slack when regressions are found + +import { watch } from 'node:fs' +import { join } from 'node:path' +import { + detectRegressions, + formatRegressionReport, +} from '../src/measurement-validator/regression-detector.js' +import { + createSlackNotifierFromEnv, + SlackNotifier, +} from '../src/measurement-validator/slack-notifier.js' +import type { BrowserName } from '../src/measurement-validator/types.js' + +function parseFlag(name: string): string | null { + const prefix = `--${name}=` + const arg = process.argv.find(v => v.startsWith(prefix)) + return arg !== undefined ? arg.slice(prefix.length) : null +} + +const browsersArg = parseFlag('browsers') ?? 'chrome' +const browsers = browsersArg.split(',').map(b => b.trim()) as BrowserName[] +const slackUrl = parseFlag('slack-webhook') +const notifier: SlackNotifier | null = + slackUrl != null ? new SlackNotifier(slackUrl) : createSlackNotifierFromEnv() + +const repoRoot = join(import.meta.dir, '..') +const watchPaths = [ + join(repoRoot, 'accuracy'), + join(repoRoot, 'benchmarks'), +] + +let debounceTimer: ReturnType | null = null + +async function runCheck(): Promise { + console.log(`[${new Date().toISOString()}] Running regression check for: ${browsers.join(', ')}`) + const report = await detectRegressions(browsers) + const text = formatRegressionReport(report) + console.log(text) + + if (notifier != null && (report.hasBlocker || report.performanceRegressions.length > 0)) { + try { + await notifier.notifyRegressionReport(report) + console.log('Slack notification sent.') + } catch (err) { + console.error('Failed to send Slack notification:', err) + } + } +} + +function scheduleCheck(): void { + if (debounceTimer != null) clearTimeout(debounceTimer) + debounceTimer = setTimeout(() => { + runCheck().catch(err => console.error('Regression check failed:', err)) + }, 500) +} + +// Run once immediately on start. +await runCheck() + +// Watch the accuracy and benchmarks directories for changes. +for (const watchPath of watchPaths) { + try { + watch(watchPath, { recursive: false }, (_event, filename) => { + if (filename?.endsWith('.json')) { + console.log(`[watch] Changed: ${watchPath}/${filename}`) + scheduleCheck() + } + }) + console.log(`Watching ${watchPath}`) + } catch { + // Directory may not exist — silently skip. + } +} + +console.log('Press Ctrl+C to stop.') diff --git a/src/measurement-validator/dashboard-server.ts b/src/measurement-validator/dashboard-server.ts new file mode 100644 index 00000000..046439cc --- /dev/null +++ b/src/measurement-validator/dashboard-server.ts @@ -0,0 +1,260 @@ +// Dashboard HTTP server for the measurement-validator. +// +// Serves a JSON API over the checked-in accuracy/benchmark/status data and an +// optional SQLite results history. Built on Bun.serve() — no external HTTP +// framework required. +// +// API endpoints: +// GET /api/status — status/dashboard.json +// GET /api/accuracy/:browser — accuracy/.json +// GET /api/benchmarks/:browser — benchmarks/.json +// GET /api/runs — recent validation runs from SQLite (if DB enabled) +// GET /api/runs/summaries — high-level trend summaries +// POST /api/runs — insert a new run record +// GET /health — liveness check +// GET / — embedded dashboard HTML +// +// Usage: +// import { DashboardServer } from './dashboard-server.js' +// const server = new DashboardServer({ port: 3001 }) +// server.start() + +import { readFileSync } from 'node:fs' +import { join } from 'node:path' +import { ResultsDatabase } from './results-database.js' +import type { QueryOptions } from './results-database.js' +import type { BrowserName, ValidationRunRecord } from './types.js' + +export type DashboardServerOptions = { + port?: number + host?: string + /** Enable the SQLite results database. Defaults to true. */ + enableDatabase?: boolean + /** Path to the SQLite file. Defaults to .measurement-results.db in repoRoot. */ + dbPath?: string + /** Repository root for resolving data files. */ + repoRoot?: string +} + +const BROWSERS: BrowserName[] = ['chrome', 'safari', 'firefox'] + +function jsonResponse(data: unknown, status = 200): Response { + return new Response(JSON.stringify(data, null, 2), { + status, + headers: { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' }, + }) +} + +function notFound(message: string): Response { + return jsonResponse({ error: message }, 404) +} + +function loadJsonFile(path: string): unknown { + try { + return JSON.parse(readFileSync(path, 'utf-8')) + } catch { + return null + } +} + +function buildDashboardHtml(repoRoot: string): string { + const statusPath = join(repoRoot, 'status', 'dashboard.json') + const status = loadJsonFile(statusPath) + + const browsers = BROWSERS.map(b => { + const acc = loadJsonFile(join(repoRoot, 'accuracy', `${b}.json`)) as + | { total?: number; matchCount?: number } + | null + return { + name: b, + total: acc?.total ?? 0, + matches: acc?.matchCount ?? 0, + } + }) + + const tableRows = browsers + .map( + b => + ` + ${b.name} + ${b.matches} + ${b.total} + ${b.total > 0 ? ((b.matches / b.total) * 100).toFixed(2) : 'n/a'}% + `, + ) + .join('\n') + + return ` + + + + + Measurement Validator Dashboard + + + +

📊 Measurement Validator Dashboard

+ +

Browser Accuracy

+ + + ${tableRows} +
BrowserMatchesTotalAccuracy
+ +

Status Dashboard JSON

+
${JSON.stringify(status, null, 2)}
+ +

Recent Validation Runs

+
Loading…
+ + + + + +` +} + +export class DashboardServer { + private options: Required + private db: ResultsDatabase | null = null + private server: ReturnType | null = null + + constructor(options: DashboardServerOptions = {}) { + this.options = { + port: options.port ?? 3001, + host: options.host ?? '127.0.0.1', + enableDatabase: options.enableDatabase ?? true, + dbPath: options.dbPath ?? '', + repoRoot: options.repoRoot ?? join(import.meta.dir, '..', '..'), + } + } + + start(): void { + if (this.options.enableDatabase) { + this.db = new ResultsDatabase( + this.options.dbPath !== '' ? this.options.dbPath : undefined, + ) + } + + this.server = Bun.serve({ + port: this.options.port, + hostname: this.options.host, + fetch: (req: Request): Response | Promise => this.handleRequest(req), + }) + + console.log( + `Dashboard server running at http://${this.options.host}:${this.options.port}`, + ) + } + + stop(): void { + void this.server?.stop() + this.db?.close() + } + + private handleRequest(req: Request): Response | Promise { + const url = new URL(req.url) + const { pathname } = url + const { repoRoot } = this.options + + if (req.method === 'GET') { + if (pathname === '/' || pathname === '/dashboard') { + return new Response(buildDashboardHtml(repoRoot), { + headers: { 'Content-Type': 'text/html' }, + }) + } + if (pathname === '/health') { + return jsonResponse({ status: 'ok', ts: new Date().toISOString() }) + } + if (pathname === '/api/status') { + const data = loadJsonFile(join(repoRoot, 'status', 'dashboard.json')) + return data != null ? jsonResponse(data) : notFound('status/dashboard.json not found') + } + if (pathname.startsWith('/api/accuracy/')) { + const browser = pathname.slice('/api/accuracy/'.length) as BrowserName + if (!BROWSERS.includes(browser)) return notFound(`Unknown browser: ${browser}`) + const data = loadJsonFile(join(repoRoot, 'accuracy', `${browser}.json`)) + return data != null ? jsonResponse(data) : notFound(`accuracy/${browser}.json not found`) + } + if (pathname.startsWith('/api/benchmarks/')) { + const browser = pathname.slice('/api/benchmarks/'.length) as BrowserName + if (!BROWSERS.includes(browser)) return notFound(`Unknown browser: ${browser}`) + const data = loadJsonFile(join(repoRoot, 'benchmarks', `${browser}.json`)) + return data != null ? jsonResponse(data) : notFound(`benchmarks/${browser}.json not found`) + } + if (pathname === '/api/runs') { + if (this.db == null) return jsonResponse({ error: 'Database not enabled' }, 503) + const browser = url.searchParams.get('browser') as BrowserName | null + const since = url.searchParams.get('since') + const limit = Number(url.searchParams.get('limit') ?? '100') + const tag = url.searchParams.get('tag') + const queryOpts: QueryOptions = { limit } + if (browser != null) queryOpts.browser = browser + if (since != null) queryOpts.since = since + if (tag != null) queryOpts.tag = tag + const runs = this.db.queryRuns(queryOpts) + return jsonResponse(runs) + } + if (pathname === '/api/runs/summaries') { + if (this.db == null) return jsonResponse({ error: 'Database not enabled' }, 503) + const browser = url.searchParams.get('browser') as BrowserName | null + const since = url.searchParams.get('since') + const limit = Number(url.searchParams.get('limit') ?? '50') + const summaryOpts: QueryOptions = { limit } + if (browser != null) summaryOpts.browser = browser + if (since != null) summaryOpts.since = since + const summaries = this.db.querySummaries(summaryOpts) + return jsonResponse(summaries) + } + } + + if (req.method === 'POST' && pathname === '/api/runs') { + return this.handlePostRun(req) + } + + return notFound(`No route for ${req.method} ${pathname}`) + } + + private async handlePostRun(req: Request): Promise { + if (this.db == null) return jsonResponse({ error: 'Database not enabled' }, 503) + let body: Omit + try { + body = (await req.json()) as Omit + } catch { + return jsonResponse({ error: 'Invalid JSON body' }, 400) + } + const id = this.db.insertRun(body) + return jsonResponse({ id }, 201) + } +} diff --git a/src/measurement-validator/performance-tracker.ts b/src/measurement-validator/performance-tracker.ts new file mode 100644 index 00000000..bff764aa --- /dev/null +++ b/src/measurement-validator/performance-tracker.ts @@ -0,0 +1,160 @@ +// Performance tracker for the measurement-validator. +// +// Loads benchmark snapshots from the checked-in `benchmarks/` directory, +// compares each entry against a baseline, and emits a structured +// PerformanceReport showing deltas and trend labels. +// +// Usage: +// import { trackPerformance } from './performance-tracker.js' +// const report = await trackPerformance('chrome', { warnPct: 10, criticalPct: 25 }) + +import { readFileSync } from 'node:fs' +import { join } from 'node:path' +import type { + BenchmarkEntry, + BenchmarkSnapshot, + BrowserName, + PerformanceMetrics, + PerformanceReport, +} from './types.js' + +export type TrackOptions = { + /** Percent increase that triggers a 'degrading' label. Default 10. */ + warnPct?: number + /** Percent increase that counts as a regression in the report count. Default 25. */ + criticalPct?: number + /** + * Root of the repository. Defaults to two levels up from this file so it + * works whether the code is run from source or from `dist/`. + */ + repoRoot?: string +} + +function collectEntries(snapshot: BenchmarkSnapshot): BenchmarkEntry[] { + return [ + ...(snapshot.results ?? []), + ...(snapshot.richResults ?? []), + ...(snapshot.richInlineResults ?? []), + ...(snapshot.richPreWrapResults ?? []), + ...(snapshot.richLongResults ?? []), + ] +} + +function loadSnapshot(repoRoot: string, browser: BrowserName): BenchmarkSnapshot { + const filePath = join(repoRoot, 'benchmarks', `${browser}.json`) + const raw = readFileSync(filePath, 'utf-8') + return JSON.parse(raw) as BenchmarkSnapshot +} + +function loadBaseline(repoRoot: string, browser: BrowserName): Map { + const baselineFile = join(repoRoot, '.measurement-baseline.json') + try { + const raw = readFileSync(baselineFile, 'utf-8') + const data = JSON.parse(raw) as Record> + const browserData = data[browser] + if (browserData == null) return new Map() + return new Map(Object.entries(browserData)) + } catch { + return new Map() + } +} + +function classifyTrend( + deltaPct: number, + warnPct: number, +): PerformanceMetrics['trend'] { + if (deltaPct <= -1) return 'improving' + if (deltaPct >= warnPct) return 'degrading' + return 'stable' +} + +/** + * Load the benchmark snapshot for `browser`, compare each entry against the + * checked-in baseline (if any), and return a PerformanceReport. + */ +export async function trackPerformance( + browser: BrowserName, + options: TrackOptions = {}, +): Promise { + const { + warnPct = 10, + criticalPct = 25, + repoRoot = join(import.meta.dir, '..', '..'), + } = options + + const snapshot = loadSnapshot(repoRoot, browser) + const baseline = loadBaseline(repoRoot, browser) + + const entries = collectEntries(snapshot) + const metrics: PerformanceMetrics[] = entries.map(entry => { + const baselineMs = baseline.get(entry.label) ?? entry.ms + const deltaMs = entry.ms - baselineMs + const deltaPct = baselineMs === 0 ? 0 : (deltaMs / baselineMs) * 100 + return { + label: entry.label, + baselineMs, + currentMs: entry.ms, + deltaMs, + deltaPct, + trend: classifyTrend(deltaPct, warnPct), + } + }) + + const regressionCount = metrics.filter(m => m.deltaPct >= criticalPct).length + + return { + generatedAt: new Date().toISOString(), + browser, + metrics, + regressionCount, + } +} + +/** + * Write a new baseline file from the current benchmark snapshots. + * Call this after a clean run to lock in today's numbers as the reference. + */ +export async function writeBaseline( + browsers: BrowserName[], + options: Pick = {}, +): Promise { + const { repoRoot = join(import.meta.dir, '..', '..') } = options + const baseline: Record> = {} + + for (const browser of browsers) { + try { + const snapshot = loadSnapshot(repoRoot, browser) + const entries = collectEntries(snapshot) + baseline[browser] = Object.fromEntries(entries.map(e => [e.label, e.ms])) + } catch { + // Skip browsers whose snapshot is not present. + } + } + + const baselineFile = join(repoRoot, '.measurement-baseline.json') + const { writeFileSync } = await import('node:fs') + writeFileSync(baselineFile, JSON.stringify(baseline, null, 2) + '\n', 'utf-8') +} + +/** + * Format a PerformanceReport as a human-readable text block suitable for + * console output or Slack messages. + */ +export function formatPerformanceReport(report: PerformanceReport): string { + const lines: string[] = [ + `Performance report — ${report.browser} — ${report.generatedAt}`, + '', + ] + for (const m of report.metrics) { + const sign = m.deltaMs >= 0 ? '+' : '' + const icon = m.trend === 'improving' ? '✅' : m.trend === 'degrading' ? '⚠️' : '✅' + lines.push( + ` ${icon} ${m.label}: ${m.currentMs.toFixed(3)}ms (${sign}${m.deltaPct.toFixed(1)}%)`, + ) + } + if (report.regressionCount > 0) { + lines.push('') + lines.push(`⚠️ ${report.regressionCount} regression(s) detected`) + } + return lines.join('\n') +} diff --git a/src/measurement-validator/regression-detector.ts b/src/measurement-validator/regression-detector.ts new file mode 100644 index 00000000..69f1d0ae --- /dev/null +++ b/src/measurement-validator/regression-detector.ts @@ -0,0 +1,209 @@ +// Regression detector for the measurement-validator. +// +// Compares the current accuracy and benchmark snapshots against the checked-in +// baselines and emits a RegressionReport that the GitHub Actions workflow and +// dashboard server can consume. +// +// Usage: +// import { detectRegressions } from './regression-detector.js' +// const report = await detectRegressions(['chrome', 'safari', 'firefox']) + +import { readFileSync } from 'node:fs' +import { join } from 'node:path' +import type { + AccuracyRegression, + AccuracySnapshot, + BenchmarkEntry, + BenchmarkSnapshot, + BrowserName, + PerformanceRegression, + RegressionReport, + RegressionSeverity, +} from './types.js' + +export type DetectOptions = { + /** + * Percent accuracy drop that is flagged as a warning (0-100). + * Default: any regression (> 0 mismatches that weren't there before). + */ + accuracyWarnDelta?: number + /** Percent benchmark slowdown that triggers a warning. Default 10. */ + perfWarnPct?: number + /** Percent benchmark slowdown that triggers a critical flag. Default 25. */ + perfCriticalPct?: number + /** + * Override current accuracy match counts per browser so the detector can + * compare live browser-checker results against the checked-in baseline. + * When omitted the detector compares the checked-in snapshot against itself + * (always clean) — useful for CI runs that do not have browser access. + */ + currentAccuracy?: Partial> + /** Repository root. Defaults to two levels above this file. */ + repoRoot?: string +} + +function loadJson(path: string): T | null { + try { + const raw = readFileSync(path, 'utf-8') + return JSON.parse(raw) as T + } catch { + return null + } +} + +function severityFromAccuracyDelta(delta: number): RegressionSeverity { + if (delta === 0) return 'ok' + if (delta < 10) return 'warning' + return 'critical' +} + +function severityFromPerfDelta( + deltaPct: number, + warnPct: number, + criticalPct: number, +): RegressionSeverity { + if (deltaPct < warnPct) return 'ok' + if (deltaPct < criticalPct) return 'warning' + return 'critical' +} + +function collectEntries(snapshot: BenchmarkSnapshot): BenchmarkEntry[] { + return [ + ...(snapshot.results ?? []), + ...(snapshot.richResults ?? []), + ...(snapshot.richInlineResults ?? []), + ...(snapshot.richPreWrapResults ?? []), + ...(snapshot.richLongResults ?? []), + ] +} + +/** + * Compare the current accuracy and benchmark snapshots against the checked-in + * baseline data and return a RegressionReport. + * + * Accuracy baseline comes from `accuracy/.json` (the files checked + * into the repo). Performance baseline comes from `.measurement-baseline.json` + * (written by `writeBaseline()` in performance-tracker.ts). + */ +export async function detectRegressions( + browsers: BrowserName[], + options: DetectOptions = {}, +): Promise { + const { + accuracyWarnDelta = 0, + perfWarnPct = 10, + perfCriticalPct = 25, + currentAccuracy, + repoRoot = join(import.meta.dir, '..', '..'), + } = options + + const accuracyRegressions: AccuracyRegression[] = [] + const performanceRegressions: PerformanceRegression[] = [] + + // Load the performance baseline (may not exist on first run) + const baselineFile = join(repoRoot, '.measurement-baseline.json') + const baselineData = loadJson>>(baselineFile) ?? {} + + for (const browser of browsers) { + // --- Accuracy --- + // The checked-in `accuracy/.json` is the baseline. + // When `currentAccuracy` is provided (e.g. from a live browser checker run), + // compare it against the checked-in baseline to detect regressions. + // When omitted (CI without browser access) no accuracy regression is reported. + const accuracyPath = join(repoRoot, 'accuracy', `${browser}.json`) + const baseline = loadJson(accuracyPath) + const current = currentAccuracy?.[browser] + if (baseline != null && current != null) { + const delta = baseline.matchCount - current.matchCount + if (delta > accuracyWarnDelta) { + accuracyRegressions.push({ + browser, + baselineMatchCount: baseline.matchCount, + currentMatchCount: current.matchCount, + baselineTotal: baseline.total, + currentTotal: current.total, + delta, + severity: severityFromAccuracyDelta(delta), + }) + } + } + + // --- Performance --- + const benchmarkPath = join(repoRoot, 'benchmarks', `${browser}.json`) + const benchmark = loadJson(benchmarkPath) + if (benchmark != null) { + const browserBaseline = baselineData[browser] ?? {} + const entries = collectEntries(benchmark) + for (const entry of entries) { + const baselineMs = browserBaseline[entry.label] + if (baselineMs == null) continue + const deltaPct = baselineMs === 0 ? 0 : ((entry.ms - baselineMs) / baselineMs) * 100 + const severity = severityFromPerfDelta(deltaPct, perfWarnPct, perfCriticalPct) + if (severity !== 'ok') { + performanceRegressions.push({ + label: entry.label, + browser, + baselineMs, + currentMs: entry.ms, + deltaPct, + severity, + }) + } + } + } + } + + const hasBlocker = + accuracyRegressions.some(r => r.severity === 'critical') || + performanceRegressions.some(r => r.severity === 'critical') + + return { + generatedAt: new Date().toISOString(), + accuracyRegressions, + performanceRegressions, + hasBlocker, + } +} + +/** + * Format a RegressionReport as a human-readable text summary. + */ +export function formatRegressionReport(report: RegressionReport): string { + const lines: string[] = [`Regression report — ${report.generatedAt}`, ''] + + if (report.accuracyRegressions.length === 0 && report.performanceRegressions.length === 0) { + lines.push('✅ No regressions detected') + return lines.join('\n') + } + + if (report.accuracyRegressions.length > 0) { + lines.push('Accuracy regressions:') + for (const r of report.accuracyRegressions) { + const icon = r.severity === 'critical' ? '❌' : '⚠️' + lines.push( + ` ${icon} ${r.browser}: ${r.currentMatchCount}/${r.currentTotal} matches ` + + `(was ${r.baselineMatchCount}/${r.baselineTotal}, Δ−${r.delta})`, + ) + } + lines.push('') + } + + if (report.performanceRegressions.length > 0) { + lines.push('Performance regressions:') + for (const r of report.performanceRegressions) { + const icon = r.severity === 'critical' ? '❌' : '⚠️' + const sign = r.deltaPct >= 0 ? '+' : '' + lines.push( + ` ${icon} [${r.browser}] ${r.label}: ${r.currentMs.toFixed(3)}ms ` + + `(was ${r.baselineMs.toFixed(3)}ms, ${sign}${r.deltaPct.toFixed(1)}%)`, + ) + } + } + + if (report.hasBlocker) { + lines.push('') + lines.push('❌ Build should be blocked: critical regression(s) detected') + } + + return lines.join('\n') +} diff --git a/src/measurement-validator/results-database.ts b/src/measurement-validator/results-database.ts new file mode 100644 index 00000000..3d0c989c --- /dev/null +++ b/src/measurement-validator/results-database.ts @@ -0,0 +1,155 @@ +// SQLite persistence for the measurement-validator. +// +// Stores validation run records in a local SQLite database using Bun's +// built-in `bun:sqlite` module — zero extra dependencies. +// +// Usage: +// import { ResultsDatabase } from './results-database.js' +// const db = new ResultsDatabase() +// await db.insertRun(record) +// const runs = db.queryRuns({ browser: 'chrome', limit: 50 }) +// db.close() + +import { Database } from 'bun:sqlite' +import { randomUUID } from 'node:crypto' +import { join } from 'node:path' +import type { BrowserName, ValidationRunRecord } from './types.js' + +export type QueryOptions = { + browser?: BrowserName + /** ISO timestamp — return only runs at or after this time. */ + since?: string + /** Maximum number of rows to return (default 100). */ + limit?: number + /** Free-text tag that must appear in the `tags` field. */ + tag?: string +} + +export type RunSummary = { + runAt: string + browser: BrowserName + accuracyPct: number + regressionCount: number +} + +const CREATE_TABLE_SQL = ` +CREATE TABLE IF NOT EXISTS validation_runs ( + id TEXT PRIMARY KEY, + run_at TEXT NOT NULL, + browser TEXT NOT NULL, + accuracy_total INTEGER NOT NULL, + accuracy_matches INTEGER NOT NULL, + benchmark_json TEXT NOT NULL DEFAULT '{}', + regression_json TEXT NOT NULL DEFAULT '{}', + tags TEXT NOT NULL DEFAULT '' +)` + +export class ResultsDatabase { + private db: Database + + constructor(dbPath?: string) { + const resolvedPath = + dbPath ?? join(import.meta.dir, '..', '..', '.measurement-results.db') + this.db = new Database(resolvedPath, { create: true }) + this.db.run(CREATE_TABLE_SQL) + } + + /** Insert a new validation run record. Generates an ID if one is not provided. */ + insertRun(record: Omit & { id?: string }): string { + const id = record.id ?? randomUUID() + this.db.run( + `INSERT INTO validation_runs + (id, run_at, browser, accuracy_total, accuracy_matches, + benchmark_json, regression_json, tags) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + [ + id, + record.runAt, + record.browser, + record.accuracyTotal, + record.accuracyMatches, + record.benchmarkJson, + record.regressionJson, + record.tags, + ], + ) + return id + } + + /** Retrieve validation runs with optional filters. */ + queryRuns(options: QueryOptions = {}): ValidationRunRecord[] { + const { browser, since, limit = 100, tag } = options + const conditions: string[] = [] + const params: (string | number)[] = [] + + if (browser != null) { + conditions.push('browser = ?') + params.push(browser) + } + if (since != null) { + conditions.push('run_at >= ?') + params.push(since) + } + if (tag != null) { + conditions.push('tags LIKE ?') + params.push(`%${tag}%`) + } + + const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '' + const sql = `SELECT id, run_at, browser, accuracy_total, accuracy_matches, + benchmark_json, regression_json, tags + FROM validation_runs + ${where} + ORDER BY run_at DESC + LIMIT ?` + + const rows = this.db.query(sql).all(...params, limit) as Array> + return rows.map(row => ({ + id: row['id'] as string, + runAt: row['run_at'] as string, + browser: row['browser'] as BrowserName, + accuracyTotal: row['accuracy_total'] as number, + accuracyMatches: row['accuracy_matches'] as number, + benchmarkJson: row['benchmark_json'] as string, + regressionJson: row['regression_json'] as string, + tags: row['tags'] as string, + })) + } + + /** Return high-level summaries suitable for the dashboard trends view. */ + querySummaries(options: QueryOptions = {}): RunSummary[] { + const runs = this.queryRuns(options) + return runs.map(r => { + let regressionCount = 0 + try { + const parsed = JSON.parse(r.regressionJson) as { + performanceRegressions?: unknown[] + accuracyRegressions?: unknown[] + } + regressionCount = + (parsed.performanceRegressions?.length ?? 0) + + (parsed.accuracyRegressions?.length ?? 0) + } catch { + // ignore parse errors + } + const accuracyPct = + r.accuracyTotal > 0 ? (r.accuracyMatches / r.accuracyTotal) * 100 : 100 + return { + runAt: r.runAt, + browser: r.browser, + accuracyPct, + regressionCount, + } + }) + } + + /** Delete all runs older than the given ISO timestamp. */ + pruneOlderThan(timestamp: string): number { + const result = this.db.run('DELETE FROM validation_runs WHERE run_at < ?', [timestamp]) + return result.changes + } + + close(): void { + this.db.close() + } +} diff --git a/src/measurement-validator/slack-notifier.ts b/src/measurement-validator/slack-notifier.ts new file mode 100644 index 00000000..177ab48a --- /dev/null +++ b/src/measurement-validator/slack-notifier.ts @@ -0,0 +1,149 @@ +// Slack notifier for the measurement-validator. +// +// Sends webhook notifications to a Slack channel when validation runs +// complete or when regressions are detected. Uses Slack's Incoming +// Webhooks API — no Slack SDK dependency needed. +// +// Usage: +// import { SlackNotifier } from './slack-notifier.js' +// const notifier = new SlackNotifier(process.env.SLACK_WEBHOOK_URL) +// await notifier.notifyRegressionReport(report) + +import type { PerformanceReport, RegressionReport } from './types.js' + +export type SlackBlock = + | { type: 'header'; text: { type: 'plain_text'; text: string } } + | { type: 'section'; text: { type: 'mrkdwn'; text: string } } + | { type: 'divider' } + +export type SlackPayload = { + text: string + blocks?: SlackBlock[] +} + +export class SlackNotifier { + private webhookUrl: string + + constructor(webhookUrl: string) { + this.webhookUrl = webhookUrl + } + + /** Low-level send: POST a SlackPayload to the configured webhook URL. */ + async send(payload: SlackPayload): Promise { + const response = await fetch(this.webhookUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(payload), + }) + if (!response.ok) { + throw new Error( + `Slack webhook returned ${response.status}: ${await response.text()}`, + ) + } + } + + /** Notify about a completed regression detection report. */ + async notifyRegressionReport(report: RegressionReport): Promise { + const totalIssues = + report.accuracyRegressions.length + report.performanceRegressions.length + + const statusIcon = report.hasBlocker ? '❌' : totalIssues > 0 ? '⚠️' : '✅' + const statusText = report.hasBlocker + ? 'Critical regressions detected — build blocked' + : totalIssues > 0 + ? `${totalIssues} regression(s) detected` + : 'All checks passed' + + const blocks: SlackBlock[] = [ + { + type: 'header', + text: { + type: 'plain_text', + text: `${statusIcon} Measurement Validator — ${statusText}`, + }, + }, + ] + + if (report.accuracyRegressions.length > 0) { + const lines = report.accuracyRegressions.map( + r => + `• *${r.browser}*: ${r.currentMatchCount}/${r.currentTotal} matches ` + + `(Δ −${r.delta}, ${r.severity})`, + ) + blocks.push({ type: 'section', text: { type: 'mrkdwn', text: `*Accuracy*\n${lines.join('\n')}` } }) + } + + if (report.performanceRegressions.length > 0) { + const lines = report.performanceRegressions.map(r => { + const sign = r.deltaPct >= 0 ? '+' : '' + return ( + `• *[${r.browser}]* ${r.label}: ` + + `${r.currentMs.toFixed(3)}ms (${sign}${r.deltaPct.toFixed(1)}%, ${r.severity})` + ) + }) + blocks.push({ + type: 'section', + text: { type: 'mrkdwn', text: `*Performance*\n${lines.join('\n')}` }, + }) + } + + blocks.push({ type: 'divider' }) + blocks.push({ + type: 'section', + text: { type: 'mrkdwn', text: `_Generated at ${report.generatedAt}_` }, + }) + + await this.send({ text: `${statusIcon} Measurement Validator: ${statusText}`, blocks }) + } + + /** Notify about a performance tracking report. */ + async notifyPerformanceReport(report: PerformanceReport): Promise { + const degraded = report.metrics.filter(m => m.trend === 'degrading') + const statusIcon = report.regressionCount > 0 ? '⚠️' : '✅' + const statusText = + report.regressionCount > 0 + ? `${report.regressionCount} performance regression(s) — ${report.browser}` + : `Performance OK — ${report.browser}` + + const lines = degraded.map(m => { + const sign = m.deltaPct >= 0 ? '+' : '' + return `• ${m.label}: ${m.currentMs.toFixed(3)}ms (${sign}${m.deltaPct.toFixed(1)}%)` + }) + + const body = + lines.length > 0 + ? `*Degraded benchmarks*\n${lines.join('\n')}` + : '✅ All benchmarks within expected range.' + + await this.send({ + text: `${statusIcon} Performance report (${report.browser}): ${statusText}`, + blocks: [ + { + type: 'header', + text: { type: 'plain_text', text: `${statusIcon} Performance Report — ${report.browser}` }, + }, + { type: 'section', text: { type: 'mrkdwn', text: body } }, + { type: 'divider' }, + { + type: 'section', + text: { type: 'mrkdwn', text: `_Generated at ${report.generatedAt}_` }, + }, + ], + }) + } + + /** Send a plain text message. */ + async notifyText(text: string): Promise { + await this.send({ text }) + } +} + +/** + * Convenience factory that reads the webhook URL from `SLACK_WEBHOOK_URL` + * environment variable and returns null if it is not set. + */ +export function createSlackNotifierFromEnv(): SlackNotifier | null { + const url = process.env['SLACK_WEBHOOK_URL'] + if (url == null || url.trim() === '') return null + return new SlackNotifier(url) +} diff --git a/src/measurement-validator/types.ts b/src/measurement-validator/types.ts new file mode 100644 index 00000000..31eb8e04 --- /dev/null +++ b/src/measurement-validator/types.ts @@ -0,0 +1,80 @@ +// Shared types for the measurement-validator Phase 4 components. + +export type BrowserName = 'chrome' | 'safari' | 'firefox' + +export type AccuracySnapshot = { + status: string + total: number + matchCount: number + mismatchCount: number +} + +export type BenchmarkEntry = { + label: string + ms: number + desc: string +} + +export type BenchmarkSnapshot = { + status: string + results?: BenchmarkEntry[] + richResults?: BenchmarkEntry[] + richInlineResults?: BenchmarkEntry[] + richPreWrapResults?: BenchmarkEntry[] + richLongResults?: BenchmarkEntry[] +} + +export type PerformanceMetrics = { + label: string + baselineMs: number + currentMs: number + deltaMs: number + deltaPct: number + trend: 'improving' | 'stable' | 'degrading' +} + +export type PerformanceReport = { + generatedAt: string + browser: BrowserName + metrics: PerformanceMetrics[] + regressionCount: number +} + +export type RegressionSeverity = 'ok' | 'warning' | 'critical' + +export type AccuracyRegression = { + browser: BrowserName + baselineMatchCount: number + currentMatchCount: number + baselineTotal: number + currentTotal: number + delta: number + severity: RegressionSeverity +} + +export type PerformanceRegression = { + label: string + browser: BrowserName + baselineMs: number + currentMs: number + deltaPct: number + severity: RegressionSeverity +} + +export type RegressionReport = { + generatedAt: string + accuracyRegressions: AccuracyRegression[] + performanceRegressions: PerformanceRegression[] + hasBlocker: boolean +} + +export type ValidationRunRecord = { + id: string + runAt: string + browser: BrowserName + accuracyTotal: number + accuracyMatches: number + benchmarkJson: string + regressionJson: string + tags: string +} diff --git a/tsconfig.build.json b/tsconfig.build.json index a7ce060d..95e31e71 100644 --- a/tsconfig.build.json +++ b/tsconfig.build.json @@ -9,5 +9,5 @@ "declaration": true }, "include": ["src/**/*.ts"], - "exclude": ["src/layout.test.ts", "src/test-data.ts"] + "exclude": ["src/layout.test.ts", "src/test-data.ts", "src/measurement-validator/**"] }