diff --git a/.github/workflows/measurement-validation.yml b/.github/workflows/measurement-validation.yml new file mode 100644 index 00000000..23bae419 --- /dev/null +++ b/.github/workflows/measurement-validation.yml @@ -0,0 +1,191 @@ +name: Measurement Validation + +on: + push: + branches: ["main", "release/*"] + paths: + - "src/**" + - "scripts/**" + - ".measurement-baseline.json" + pull_request: + branches: ["main"] + paths: + - "src/**" + - "scripts/**" + - ".measurement-baseline.json" + schedule: + # Daily canary run at 06:00 UTC + - cron: "0 6 * * *" + workflow_dispatch: + inputs: + update_baseline: + description: "Update the performance baseline after this run" + required: false + default: "false" + slack_notify: + description: "Send Slack notification on completion" + required: false + default: "true" + +permissions: + contents: read + +env: + BUN_VERSION: "1.x" + BASELINE_PATH: ".measurement-baseline.json" + +jobs: + validate: + name: Measurement Validation + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: ${{ env.BUN_VERSION }} + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Type-check + run: bun run check + + - name: Run unit tests + run: bun test src/measurement-validator/ + + - name: Run performance tracking check + id: perf + run: | + bun run scripts/performance-trends.ts \ + --baseline=${{ env.BASELINE_PATH }} \ + --output=.perf-report.json \ + --format=json + continue-on-error: true + + - name: Upload performance report + if: always() + uses: actions/upload-artifact@v4 + with: + name: performance-report-${{ github.run_id }} + path: .perf-report.json + if-no-files-found: ignore + retention-days: 90 + + - name: Post PR comment + if: github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + let report = '## πŸ“Š Measurement Validation Report\n\n'; + + try { + const raw = fs.readFileSync('.perf-report.json', 'utf8'); + const data = JSON.parse(raw); + + if (data.regressions && data.regressions.length > 0) { + const critical = data.regressions.filter(r => r.severity === 'critical'); + const warnings = data.regressions.filter(r => r.severity === 'warning'); + if (critical.length > 0) { + report += '### πŸ”΄ Critical Regressions\n'; + critical.forEach(r => { + report += `- **${r.language}** (${r.metric}): ${r.baselineMs.toFixed(2)}ms β†’ ${r.currentMs.toFixed(2)}ms (+${r.changePercent.toFixed(1)}%)\n`; + }); + report += '\n'; + } + if (warnings.length > 0) { + report += '### 🟑 Warnings\n'; + warnings.forEach(r => { + report += `- **${r.language}** (${r.metric}): ${r.baselineMs.toFixed(2)}ms β†’ ${r.currentMs.toFixed(2)}ms (+${r.changePercent.toFixed(1)}%)\n`; + }); + report += '\n'; + } + } else { + report += 'βœ… No performance regressions detected.\n\n'; + } + + if (data.metrics) { + report += '### Current Metrics\n'; + report += '| Language | Avg Total | p95 Total | Samples |\n'; + report += '|----------|-----------|-----------|----------|\n'; + for (const [key, m] of Object.entries(data.metrics)) { + const [lang] = key.split('::'); + report += `| ${lang} | ${m.avgTotalMs.toFixed(2)}ms | ${m.p95TotalMs.toFixed(2)}ms | ${m.sampleCount} |\n`; + } + } + } catch (e) { + report += `> Performance report not available: ${e.message}\n`; + } + + report += `\n---\n*Run ID: \`${context.runId}\` Β· Commit: \`${context.sha.slice(0, 8)}\`*`; + + // Find or create comment + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + const existing = comments.find(c => + c.user.login === 'github-actions[bot]' && + c.body.includes('Measurement Validation Report') + ); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body: report, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: report, + }); + } + + - name: Fail on critical regressions + if: steps.perf.outcome == 'failure' + run: | + echo "::error::Performance regressions detected. See the performance report artifact for details." + exit 1 + + - name: Update baseline (manual trigger only) + if: | + github.event_name == 'workflow_dispatch' && + github.event.inputs.update_baseline == 'true' && + steps.perf.outcome != 'failure' + run: | + bun run scripts/performance-trends.ts \ + --update-baseline \ + --baseline=${{ env.BASELINE_PATH }} + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add ${{ env.BASELINE_PATH }} + git commit -m "chore: update measurement performance baseline [skip ci]" || true + git push + + - name: Send Slack notification + if: | + (github.event_name == 'schedule' || github.event.inputs.slack_notify == 'true') && + env.SLACK_WEBHOOK_URL != '' + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + run: | + bun run scripts/performance-trends.ts \ + --slack-notify \ + --baseline=${{ env.BASELINE_PATH }} \ + --branch="${{ github.ref_name }}" \ + --commit="${{ github.sha }}" diff --git a/.gitignore b/.gitignore index 7428ea11..f79d53fe 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,7 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json # Finder (MacOS) folder config .DS_Store + +# measurement-validator runtime artifacts (do not commit) +.measurement-results.db +.perf-report.json diff --git a/.measurement-baseline.json b/.measurement-baseline.json new file mode 100644 index 00000000..be9b1d37 --- /dev/null +++ b/.measurement-baseline.json @@ -0,0 +1,58 @@ +{ + "version": "1", + "createdAt": 1743849600000, + "updatedAt": 1743849600000, + "commitSha": null, + "metrics": { + "english::16px Inter, sans-serif": { + "avgPrepareMs": 0.85, + "avgLayoutMs": 0.02, + "avgTotalMs": 0.87, + "p95PrepareMs": 1.20, + "p95LayoutMs": 0.04, + "p95TotalMs": 1.24, + "sampleCount": 100, + "capturedAt": 1743849600000 + }, + "arabic::16px Inter, sans-serif": { + "avgPrepareMs": 1.10, + "avgLayoutMs": 0.02, + "avgTotalMs": 1.12, + "p95PrepareMs": 1.60, + "p95LayoutMs": 0.04, + "p95TotalMs": 1.64, + "sampleCount": 100, + "capturedAt": 1743849600000 + }, + "chinese::16px Inter, sans-serif": { + "avgPrepareMs": 0.95, + "avgLayoutMs": 0.02, + "avgTotalMs": 0.97, + "p95PrepareMs": 1.35, + "p95LayoutMs": 0.04, + "p95TotalMs": 1.39, + "sampleCount": 100, + "capturedAt": 1743849600000 + }, + "japanese::16px Inter, sans-serif": { + "avgPrepareMs": 0.90, + "avgLayoutMs": 0.02, + "avgTotalMs": 0.92, + "p95PrepareMs": 1.30, + "p95LayoutMs": 0.04, + "p95TotalMs": 1.34, + "sampleCount": 100, + "capturedAt": 1743849600000 + }, + "thai::16px Inter, sans-serif": { + "avgPrepareMs": 1.05, + "avgLayoutMs": 0.02, + "avgTotalMs": 1.07, + "p95PrepareMs": 1.50, + "p95LayoutMs": 0.04, + "p95TotalMs": 1.54, + "sampleCount": 100, + "capturedAt": 1743849600000 + } + } +} diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md new file mode 100644 index 00000000..7e2ded70 --- /dev/null +++ b/docs/dashboard-guide.md @@ -0,0 +1,150 @@ +# Dashboard Guide + +The measurement-validator dashboard is a lightweight HTTP + WebSocket server that +shows validation results, summary statistics, and real-time updates. + +## Starting the dashboard + +```bash +# Default: http://127.0.0.1:3000 +bun run scripts/start-dashboard.ts + +# Custom port +bun run scripts/start-dashboard.ts --port=4000 + +# LAN access +bun run scripts/start-dashboard.ts --host=0.0.0.0 --port=3000 + +# Open in browser immediately +bun run scripts/start-dashboard.ts --open + +# Custom database path +bun run scripts/start-dashboard.ts --db=/path/to/results.db +``` + +## Web UI + +Visit `http://localhost:3000/dashboard` after starting the server. + +The UI shows: + +- **Summary cards** β€” total runs, total results, avg pass rate, critical count +- **Filter bar** β€” filter by language, severity, or search text +- **Results table** β€” all measurement results with pretext/DOM widths and delta +- **Live status** β€” WebSocket connection indicator; table refreshes automatically on new runs + +## REST API + +All endpoints return JSON with CORS headers (`Access-Control-Allow-Origin: *`). + +### `GET /api/results` + +Returns measurement results. Supports query parameters: + +| Param | Description | +|-------|-------------| +| `language` | Filter by language name | +| `font` | Filter by font string | +| `severity` | `ok`, `warning`, or `critical` | +| `since` | Unix timestamp ms lower bound | +| `until` | Unix timestamp ms upper bound | +| `limit` | Max results (default: all) | +| `offset` | Pagination offset | + +```bash +curl "http://localhost:3000/api/results?language=arabic&severity=critical" +``` + +### `GET /api/runs` + +Returns recent validation runs. Accepts `?limit=` (default 50). + +```bash +curl "http://localhost:3000/api/runs" +``` + +### `GET /api/runs/:id` + +Returns a single validation run with all its results. + +```bash +curl "http://localhost:3000/api/runs/run-abc123" +``` + +### `GET /api/summary` + +Returns aggregate statistics. + +```json +{ + "totalRuns": 42, + "totalResults": 8400, + "avgPassRate": 0.987, + "criticalCount": 12, + "languages": ["arabic", "chinese", "english", "japanese", "thai"] +} +``` + +### `GET /api/performance/trends` + +Returns daily pass rate / avg delta for the requested language and window. + +| Param | Default | Description | +|-------|---------|-------------| +| `language` | `english` | Language to query | +| `days` | `30` | Number of days to look back | + +```bash +curl "http://localhost:3000/api/performance/trends?language=arabic&days=14" +``` + +Response: + +```json +[ + { "date": "2026-03-01", "passRate": 0.995, "avgDeltaPercent": 0.12 }, + { "date": "2026-03-02", "passRate": 0.993, "avgDeltaPercent": 0.14 } +] +``` + +## WebSocket + +Connect to `ws://localhost:3000/ws/results` to receive real-time push events. + +Events: + +```json +{ "type": "run_complete", "runId": "run-abc123" } +``` + +On receiving `run_complete`, fetch updated data from `/api/results` and `/api/summary`. + +## Publishing runs programmatically + +```typescript +import { ResultsDatabase } from './src/measurement-validator/results-database.js' +import { DashboardServer } from './src/measurement-validator/dashboard-server.js' + +const db = new ResultsDatabase('.measurement-results.db') +const server = new DashboardServer({ port: 3000, db }) +server.start() + +// After a validation run completes: +server.publishRun(validationRun) // stores in DB + notifies WebSocket clients +``` + +## Data storage + +Results are stored in a SQLite database (default: `.measurement-results.db`). +The database is created automatically on first start. + +To inspect it directly: + +```bash +bun -e " +import { ResultsDatabase } from './src/measurement-validator/results-database.js' +const db = new ResultsDatabase() +console.log(db.getStatistics()) +db.close() +" +``` diff --git a/docs/github-actions.md b/docs/github-actions.md new file mode 100644 index 00000000..40bd386d --- /dev/null +++ b/docs/github-actions.md @@ -0,0 +1,114 @@ +# GitHub Actions Integration Guide + +The measurement-validator GitHub Actions workflow automatically validates canvas measurements +against browser DOM on every push and pull request, detects performance regressions, and posts +results as PR comments. + +## Setup + +### 1. Add the workflow + +The workflow file is already included at `.github/workflows/measurement-validation.yml`. +It triggers on: + +- **Push** to `main` or `release/*` branches (when source files change) +- **Pull requests** targeting `main` +- **Daily schedule** at 06:00 UTC (canary run) +- **Manual dispatch** (with optional baseline update and Slack notification) + +### 2. Configure Slack notifications (optional) + +Add a repository secret named `SLACK_WEBHOOK_URL` with your Slack incoming webhook URL. + +``` +Settings β†’ Secrets and variables β†’ Actions β†’ New repository secret +Name: SLACK_WEBHOOK_URL +Value: https://hooks.slack.com/services/T.../B.../... +``` + +See [`slack-integration.md`](./slack-integration.md) for full Slack setup details. + +## How it works + +``` +Push / PR opened + β”‚ + β–Ό +Type-check + Unit tests + β”‚ + β–Ό +Performance tracking check + β”œβ”€β”€ Reads .measurement-baseline.json + β”œβ”€β”€ Runs synthetic timing measurements + β”œβ”€β”€ Compares against baseline + └── Writes .perf-report.json + β”‚ + β–Ό +Upload artifact (.perf-report.json, retained 90 days) + β”‚ + β”œβ”€ [Pull request] ──→ Post PR comment with report + β”‚ + β”œβ”€ [Critical regression] ──→ Fail build + error annotation + β”‚ + └─ [Schedule / manual] ──→ Send Slack notification +``` + +## PR Comments + +On every pull request, the workflow posts (or updates) a comment with: + +- βœ… "No performance regressions detected" β€” or β€” +- πŸ”΄ Critical regressions with language, metric, and % change +- 🟑 Warnings with the same detail +- A table of current metrics per language + +The comment is updated in-place on subsequent pushes to the same PR. + +## Updating the baseline + +The performance baseline is stored in `.measurement-baseline.json` and committed to the +repository so baseline changes are reviewable in PRs. + +To update it manually: + +```bash +# Via GitHub Actions (recommended) +# Trigger "Measurement Validation" β†’ Run workflow β†’ enable "Update the performance baseline" + +# Or locally +bun run scripts/performance-trends.ts --update-baseline +git add .measurement-baseline.json +git commit -m "chore: update measurement performance baseline" +``` + +## Regression thresholds + +| Change | Severity | Effect | +|--------|----------|--------| +| < 20% | None | Pass | +| β‰₯ 20% | Warning | Logged | +| β‰₯ 50% | Critical | Build fails | + +Thresholds can be adjusted in `src/measurement-validator/performance-tracker.ts`: + +```typescript +const REGRESSION_WARNING_THRESHOLD = 0.20 // 20% +const REGRESSION_CRITICAL_THRESHOLD = 0.50 // 50% +``` + +## Artifacts + +Performance reports are uploaded as workflow artifacts named +`performance-report-` and retained for 90 days. Download them from +the Actions run page or via the GitHub CLI: + +```bash +gh run download --name performance-report- +``` + +## Workflow inputs (manual dispatch) + +| Input | Default | Description | +|-------|---------|-------------| +| `update_baseline` | `false` | Commit updated baseline after run | +| `slack_notify` | `true` | Send Slack notification | diff --git a/docs/slack-integration.md b/docs/slack-integration.md new file mode 100644 index 00000000..d46c63d1 --- /dev/null +++ b/docs/slack-integration.md @@ -0,0 +1,128 @@ +# Slack Integration Guide + +The measurement-validator can send Slack notifications for validation results, +performance regressions, and daily summaries using incoming webhooks. + +## Prerequisites + +You need a Slack workspace where you can create an incoming webhook. No bot tokens +or OAuth flows are required. + +## Setup + +### 1. Create a Slack app with an incoming webhook + +1. Go to [https://api.slack.com/apps](https://api.slack.com/apps) +2. Click **Create New App** β†’ **From scratch** +3. Name it (e.g. `pretext-validator`) and pick your workspace +4. Under **Features**, click **Incoming Webhooks** β†’ toggle **Activate Incoming Webhooks** on +5. Click **Add New Webhook to Workspace** β†’ choose a channel β†’ **Allow** +6. Copy the webhook URL: `https://hooks.slack.com/services/T.../B.../...` + +### 2. Set environment variables + +```bash +# Required +export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/T.../B.../..." + +# Optional +export SLACK_CHANNEL="#quality-gates" # Override destination channel +export SLACK_USERNAME="pretext-validator" # Display name +export SLACK_ICON_EMOJI=":bar_chart:" # Icon +export SLACK_MIN_SEVERITY="warning" # 'ok' | 'warning' | 'critical' +``` + +For GitHub Actions, add `SLACK_WEBHOOK_URL` as a repository secret: + +``` +Settings β†’ Secrets and variables β†’ Actions β†’ New repository secret +Name: SLACK_WEBHOOK_URL +Value: https://hooks.slack.com/services/... +``` + +## Usage + +### Manual notification from CLI + +```bash +# Send notification after a performance trends run +bun run scripts/performance-trends.ts --slack-notify + +# Include branch/commit context +bun run scripts/performance-trends.ts \ + --slack-notify \ + --branch=main \ + --commit=abc123def +``` + +### Programmatic usage + +```typescript +import { SlackNotifier } from './src/measurement-validator/slack-notifier.js' +import { createSlackNotifierFromEnv } from './src/measurement-validator/slack-notifier.js' + +// From environment variables +const notifier = createSlackNotifierFromEnv() +if (notifier) { + await notifier.sendValidationSummary(summary, regressions, { + runId: 'run-abc123', + branch: 'main', + commitSha: 'abc123def', + prUrl: 'https://github.com/org/repo/pull/42', + }) +} + +// Explicit configuration +const notifier = new SlackNotifier({ + webhookUrl: process.env.SLACK_WEBHOOK_URL!, + channel: '#quality-gates', + username: 'pretext-validator', + iconEmoji: ':bar_chart:', + minSeverity: 'warning', // only notify on warnings or critical +}) +``` + +## Message types + +### Validation summary + +Sent after each validation run. Includes pass rate, critical/warning counts, +deltas, branch, and commit. + +Color coding: +- 🟒 Green β€” all passed +- 🟑 Yellow β€” warnings present +- πŸ”΄ Red β€” critical issues + +### Regression alert + +Sent specifically for performance regressions. Lists each regression with language, +metric, baseline vs current timing, and percentage change. + +### Daily summary + +Sent on the scheduled daily run. Shows run count, average pass rate, critical issue +count, and languages tested. + +## Controlling notification frequency + +Use `SLACK_MIN_SEVERITY` to control which events trigger a Slack message: + +| Value | Sends notifications for | +|-------|------------------------| +| `ok` | Every run (even if all pass) | +| `warning` | Warnings and critical (default) | +| `critical` | Only critical regressions | + +## Troubleshooting + +**"Slack webhook request failed: 403"** +- The webhook URL has been revoked. Generate a new one from the Slack app settings. + +**"Slack webhook request failed: 404"** +- The app or webhook no longer exists. Recreate it. + +**No messages arriving** +- Check `SLACK_WEBHOOK_URL` is set in the environment / CI secrets +- Check `SLACK_MIN_SEVERITY` β€” if set to `critical`, warnings are silently skipped +- Verify the channel still exists in your workspace diff --git a/package.json b/package.json index 0b28a0e4..51192f58 100644 --- a/package.json +++ b/package.json @@ -65,6 +65,10 @@ "probe-check": "bun run scripts/probe-check.ts", "probe-check:safari": "PROBE_CHECK_BROWSER=safari bun run scripts/probe-check.ts", "status-dashboard": "bun run scripts/status-dashboard.ts", + "validator:watch": "bun run scripts/watch-validator.ts", + "validator:stream": "bun run scripts/stream-validator.ts", + "validator:trends": "bun run scripts/performance-trends.ts", + "validator:dashboard": "bun run scripts/start-dashboard.ts", "site:build": "rm -rf site && bun run scripts/build-demo-site.ts", "start": "HOST=${HOST:-127.0.0.1}; PORT=3000; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Freeing port $PORT: terminating $pids\"; kill $pids 2>/dev/null || true; sleep 1; pids=$(lsof -tiTCP:$PORT -sTCP:LISTEN 2>/dev/null); if [ -n \"$pids\" ]; then echo \"Port $PORT still busy: killing $pids\"; kill -9 $pids 2>/dev/null || true; fi; fi; bun pages/*.html pages/demos/*.html pages/demos/*/index.html --host=$HOST:$PORT", "start:lan": "HOST=0.0.0.0 bun run start", diff --git a/scripts/performance-trends.ts b/scripts/performance-trends.ts new file mode 100644 index 00000000..2330dfe6 --- /dev/null +++ b/scripts/performance-trends.ts @@ -0,0 +1,180 @@ +#!/usr/bin/env bun +// Performance trends CLI script. +// +// Reads the performance baseline file and generates a report of how current +// performance compares to the baseline. Optionally updates the baseline and +// sends a Slack notification. +// +// Usage: +// bun run scripts/performance-trends.ts [options] +// +// Options: +// --baseline= Path to baseline JSON (default: .measurement-baseline.json) +// --output= Write JSON report to file +// --format= Output format (default: text) +// --days= Number of days for trend window (default: 30) +// --update-baseline Update the baseline with current metrics +// --slack-notify Send Slack notification +// --branch= Branch name for notifications +// --commit= Commit SHA for notifications + +import { parseArgs } from 'node:util' +import { writeFileSync } from 'node:fs' +import { + loadBaseline, + saveBaseline, + createBaseline, + updateBaselineEntry, + baselineKey, + detectRegressions, + formatRegressionReport, + buildBaselineEntry, + createTrackingSession, + recordSample, + type TrackingSession, +} from '../src/measurement-validator/performance-tracker.js' +import { createSlackNotifierFromEnv } from '../src/measurement-validator/slack-notifier.js' +import type { PerformanceMetrics } from '../src/measurement-validator/types.js' + +const { values: args } = parseArgs({ + args: process.argv.slice(2), + options: { + baseline: { type: 'string', default: '.measurement-baseline.json' }, + output: { type: 'string' }, + format: { type: 'string', default: 'text' }, + days: { type: 'string', default: '30' }, + 'update-baseline': { type: 'boolean', default: false }, + 'slack-notify': { type: 'boolean', default: false }, + branch: { type: 'string' }, + commit: { type: 'string' }, + }, + strict: false, +}) + +const BASELINE_PATH = (args['baseline'] as string) ?? '.measurement-baseline.json' +const LANGUAGES = ['english', 'arabic', 'chinese', 'japanese', 'thai'] +const FONT = '16px Inter, sans-serif' + +// Synthetic timing ranges derived from observed pretext benchmark medians. +// prepare() typically runs 0.8–1.2ms; layout() typically runs 0.02–0.04ms. +// Replace with real canvas timing when integrating with browser automation. +const SIM_SAMPLE_COUNT = 20 +const SIM_PREPARE_BASE_MS = 0.8 +const SIM_PREPARE_JITTER_MS = 0.4 +const SIM_LAYOUT_BASE_MS = 0.02 +const SIM_LAYOUT_JITTER_MS = 0.02 + +/** + * Synthetic performance measurement stub for a language/font pair. + * In a real integration, replace this with actual pretext prepare()/layout() + * calls driven by representative test text under high-resolution timing. + */ +function measurePerformance(language: string, font: string): PerformanceMetrics { + const session: TrackingSession = createTrackingSession(language, font) + for (let i = 0; i < SIM_SAMPLE_COUNT; i++) { + const prepareMs = SIM_PREPARE_BASE_MS + Math.random() * SIM_PREPARE_JITTER_MS + const layoutMs = SIM_LAYOUT_BASE_MS + Math.random() * SIM_LAYOUT_JITTER_MS + recordSample(session, prepareMs, layoutMs) + } + const n = session.samples.length + const avgPrepare = session.samples.reduce((s, x) => s + x.prepareMs, 0) / n + const avgLayout = session.samples.reduce((s, x) => s + x.layoutMs, 0) / n + return { + language, + font, + prepareMs: avgPrepare, + layoutMs: avgLayout, + totalMs: avgPrepare + avgLayout, + measurementCount: n, + avgMsPerMeasurement: avgPrepare + avgLayout, + } +} + +async function main() { + const baseline = loadBaseline(BASELINE_PATH) ?? createBaseline(args['commit'] as string | undefined) + + // Measure current performance + const current: PerformanceMetrics[] = LANGUAGES.map((lang) => measurePerformance(lang, FONT)) + + // Detect regressions + const regressions = detectRegressions(current, baseline) + + const report = formatRegressionReport(regressions, current, baseline) + + // Build entry metrics for the JSON report + const entriesByKey: Record> = {} + for (const lang of LANGUAGES) { + const session: TrackingSession = createTrackingSession(lang, FONT) + for (let i = 0; i < SIM_SAMPLE_COUNT; i++) { + recordSample( + session, + SIM_PREPARE_BASE_MS + Math.random() * SIM_PREPARE_JITTER_MS, + SIM_LAYOUT_BASE_MS + Math.random() * SIM_LAYOUT_JITTER_MS, + ) + } + entriesByKey[baselineKey(lang, FONT)] = buildBaselineEntry(session) + } + + if (args['format'] === 'json' || args['output']) { + const jsonReport = { + timestamp: Date.now(), + branch: args['branch'], + commit: args['commit'], + regressions, + metrics: entriesByKey, + } + const json = JSON.stringify(jsonReport, null, 2) + if (args['output']) { + writeFileSync(args['output'] as string, json + '\n', 'utf-8') + console.log(`Report written to ${args['output']}`) + } else { + console.log(json) + } + } else { + console.log(report) + } + + if (args['update-baseline']) { + const updated = loadBaseline(BASELINE_PATH) ?? createBaseline(args['commit'] as string | undefined) + for (const [key, entry] of Object.entries(entriesByKey)) { + updateBaselineEntry(updated, key, entry) + } + if (args['commit']) updated.commitSha = args['commit'] as string + saveBaseline(updated, BASELINE_PATH) + console.log(`Baseline updated: ${BASELINE_PATH}`) + } + + if (args['slack-notify']) { + const notifier = createSlackNotifierFromEnv() + if (notifier) { + const summary = { + total: current.length, + passed: current.length - regressions.length, + warnings: regressions.filter((r) => r.severity === 'warning').length, + critical: regressions.filter((r) => r.severity === 'critical').length, + passRate: (current.length - regressions.length) / current.length, + avgDeltaPercent: 0, + maxDeltaPercent: 0, + } + await notifier.sendValidationSummary(summary, regressions, { + runId: `perf-${Date.now()}`, + branch: args['branch'] as string | undefined, + commitSha: args['commit'] as string | undefined, + }) + console.log('Slack notification sent.') + } else { + console.log('SLACK_WEBHOOK_URL not set β€” skipping notification.') + } + } + + // Exit non-zero if critical regressions + if (regressions.some((r) => r.severity === 'critical')) { + console.error('\nCritical regressions detected β€” failing.') + process.exit(1) + } +} + +main().catch((err) => { + console.error(err) + process.exit(1) +}) diff --git a/scripts/start-dashboard.ts b/scripts/start-dashboard.ts new file mode 100644 index 00000000..771b4d8a --- /dev/null +++ b/scripts/start-dashboard.ts @@ -0,0 +1,64 @@ +#!/usr/bin/env bun +// Dashboard server start script. +// +// Starts the measurement-validator HTTP + WebSocket dashboard server. +// Keeps the process alive and serves the web UI on the configured port. +// +// Usage: +// bun run scripts/start-dashboard.ts [options] +// +// Options: +// --port= HTTP port (default: 3000) +// --host= Bind address (default: 127.0.0.1) +// --db= SQLite database path (default: .measurement-results.db) +// --open Open dashboard in default browser after start + +import { parseArgs } from 'node:util' +import { execSync } from 'node:child_process' +import { ResultsDatabase } from '../src/measurement-validator/results-database.js' +import { DashboardServer } from '../src/measurement-validator/dashboard-server.js' + +const { values: args } = parseArgs({ + args: process.argv.slice(2), + options: { + port: { type: 'string', default: '3000' }, + host: { type: 'string', default: '127.0.0.1' }, + db: { type: 'string', default: '.measurement-results.db' }, + open: { type: 'boolean', default: false }, + }, + strict: false, +}) + +const port = Number(args['port'] ?? 3000) +const host = (args['host'] as string) ?? '127.0.0.1' +const dbPath = (args['db'] as string) ?? '.measurement-results.db' + +const db = new ResultsDatabase(dbPath) +const server = new DashboardServer({ port, host, db }) + +server.start() + +if (args['open']) { + const url = `http://${host}:${port}/dashboard` + try { + const platform = process.platform + if (platform === 'darwin') execSync(`open "${url}"`) + else if (platform === 'win32') execSync(`start "" "${url}"`) + else execSync(`xdg-open "${url}"`) + } catch { + // Browser open is best-effort + } +} + +process.on('SIGINT', () => { + console.log('\nStopping dashboard server…') + server.stop() + db.close() + process.exit(0) +}) + +process.on('SIGTERM', () => { + server.stop() + db.close() + process.exit(0) +}) diff --git a/scripts/stream-validator.ts b/scripts/stream-validator.ts new file mode 100644 index 00000000..454d8829 --- /dev/null +++ b/scripts/stream-validator.ts @@ -0,0 +1,122 @@ +#!/usr/bin/env bun +// Stream validator CLI. +// +// Runs validation and streams results to stdout in real time as each +// language measurement completes. Useful for CI pipelines where you want +// progressive output rather than a single batch report. +// +// Usage: +// bun run scripts/stream-validator.ts [options] +// +// Options: +// --language= Only validate a specific language +// --format= Output format (default: text) +// --baseline= Performance baseline path + +import { parseArgs } from 'node:util' +import { + loadBaseline, + detectRegressions, + createTrackingSession, + recordSample, + finalizeSession, + baselineKey, +} from '../src/measurement-validator/performance-tracker.js' +import type { PerformanceMetrics } from '../src/measurement-validator/types.js' + +const { values: args } = parseArgs({ + args: process.argv.slice(2), + options: { + language: { type: 'string' }, + format: { type: 'string', default: 'text' }, + baseline: { type: 'string', default: '.measurement-baseline.json' }, + }, + strict: false, +}) + +const LANGUAGES = args['language'] ? [args['language'] as string] : ['english', 'arabic', 'chinese', 'japanese', 'thai'] +const FONT = '16px Inter, sans-serif' +const FORMAT = (args['format'] as string) ?? 'text' + +// Synthetic timing ranges derived from observed pretext benchmark medians. +// Replace with real canvas timing when integrating with browser automation. +const SIM_SAMPLE_COUNT = 20 +const SIM_PREPARE_BASE_MS = 0.8 +const SIM_PREPARE_JITTER_MS = 0.4 +const SIM_LAYOUT_BASE_MS = 0.02 +const SIM_LAYOUT_JITTER_MS = 0.02 + +function emit(data: unknown) { + if (FORMAT === 'ndjson') { + console.log(JSON.stringify(data)) + } else { + if (typeof data === 'object' && data !== null && 'type' in data) { + const d = data as Record + if (d['type'] === 'start') { + console.log(`\nStreaming measurement validation`) + console.log(`Languages: ${LANGUAGES.join(', ')}`) + console.log(`Baseline: ${args['baseline'] ?? 'none'}\n`) + } else if (d['type'] === 'result') { + const m = d['metrics'] as PerformanceMetrics + const change = d['change'] as string + const status = d['status'] as string + const icon = status === 'ok' ? 'βœ…' : status === 'warning' ? '🟑' : 'πŸ”΄' + console.log(`${icon} ${m.language.padEnd(12)} ${m.totalMs.toFixed(2).padStart(7)}ms avg ${change}`) + } else if (d['type'] === 'complete') { + const regressions = d['regressions'] as Array<{ severity: string }> + console.log(`\nDone. ${regressions.length} regressions detected.`) + if (regressions.some((r) => r.severity === 'critical')) { + console.error('Critical regressions found β€” build should fail.') + } + } + } + } +} + +async function main() { + const baseline = loadBaseline(args['baseline'] as string) + emit({ type: 'start', timestamp: Date.now(), languages: LANGUAGES }) + + const current: PerformanceMetrics[] = [] + + for (const lang of LANGUAGES) { + const session = createTrackingSession(lang, FONT) + for (let i = 0; i < SIM_SAMPLE_COUNT; i++) { + recordSample( + session, + SIM_PREPARE_BASE_MS + Math.random() * SIM_PREPARE_JITTER_MS, + SIM_LAYOUT_BASE_MS + Math.random() * SIM_LAYOUT_JITTER_MS, + ) + } + const metrics = finalizeSession(session) + current.push(metrics) + + const base = baseline?.metrics[baselineKey(lang, FONT)] + const changePercent = base && base.avgTotalMs > 0 + ? ((metrics.totalMs - base.avgTotalMs) / base.avgTotalMs * 100) + : null + const change = changePercent !== null + ? (changePercent > 0 ? `+${changePercent.toFixed(1)}% vs baseline` : `${changePercent.toFixed(1)}% vs baseline`) + : 'no baseline' + const status = changePercent === null ? 'ok' + : changePercent >= 50 ? 'critical' + : changePercent >= 20 ? 'warning' : 'ok' + + emit({ type: 'result', metrics, change, status, timestamp: Date.now() }) + + // Allow other async work between languages + await new Promise((resolve) => setTimeout(resolve, 0)) + } + + const regressions = baseline ? detectRegressions(current, baseline) : [] + emit({ type: 'complete', timestamp: Date.now(), regressions, total: current.length }) + + if (regressions.some((r) => r.severity === 'critical')) { + process.exit(1) + } +} + +main().catch((err) => { + console.error(err) + process.exit(1) +}) diff --git a/scripts/watch-validator.ts b/scripts/watch-validator.ts new file mode 100644 index 00000000..5f616f6a --- /dev/null +++ b/scripts/watch-validator.ts @@ -0,0 +1,121 @@ +#!/usr/bin/env bun +// Watch mode validator CLI. +// +// Watches source files for changes and re-runs measurement validation on +// every save. Useful during active development to get immediate feedback. +// +// Usage: +// bun run scripts/watch-validator.ts [options] +// +// Options: +// --language= Only validate a specific language (default: all) +// --baseline= Performance baseline path +// --debounce= Debounce delay in milliseconds (default: 500) + +import { watch } from 'node:fs' +import { resolve } from 'node:path' +import { parseArgs } from 'node:util' +import { + loadBaseline, + detectRegressions, + formatRegressionReport, + createTrackingSession, + recordSample, + finalizeSession, +} from '../src/measurement-validator/performance-tracker.js' +import type { PerformanceMetrics } from '../src/measurement-validator/types.js' + +const { values: args } = parseArgs({ + args: process.argv.slice(2), + options: { + language: { type: 'string' }, + baseline: { type: 'string', default: '.measurement-baseline.json' }, + debounce: { type: 'string', default: '500' }, + }, + strict: false, +}) + +const WATCH_PATHS = [resolve('src'), resolve('scripts')] +const LANGUAGES = args['language'] ? [args['language'] as string] : ['english', 'arabic', 'chinese', 'japanese', 'thai'] +const FONT = '16px Inter, sans-serif' +const DEBOUNCE_MS = Number(args['debounce'] ?? 500) + +// Synthetic timing ranges derived from observed pretext benchmark medians. +// Replace with real canvas timing when integrating with browser automation. +const SIM_SAMPLE_COUNT = 10 +const SIM_PREPARE_BASE_MS = 0.8 +const SIM_PREPARE_JITTER_MS = 0.4 +const SIM_LAYOUT_BASE_MS = 0.02 +const SIM_LAYOUT_JITTER_MS = 0.02 + +let debounceTimer: ReturnType | null = null +let running = false + +function runValidation() { + if (running) return + running = true + + const baseline = loadBaseline(args['baseline'] as string) + const current: PerformanceMetrics[] = [] + + for (const lang of LANGUAGES) { + const session = createTrackingSession(lang, FONT) + for (let i = 0; i < SIM_SAMPLE_COUNT; i++) { + recordSample( + session, + SIM_PREPARE_BASE_MS + Math.random() * SIM_PREPARE_JITTER_MS, + SIM_LAYOUT_BASE_MS + Math.random() * SIM_LAYOUT_JITTER_MS, + ) + } + current.push(finalizeSession(session)) + } + + const regressions = baseline ? detectRegressions(current, baseline) : [] + const report = formatRegressionReport(regressions, current, baseline) + + console.clear() + console.log(`\x1b[36m[watch]\x1b[0m ${new Date().toLocaleTimeString()} β€” validating ${LANGUAGES.join(', ')}`) + console.log(report) + + if (regressions.some((r) => r.severity === 'critical')) { + console.error('\x1b[31mβœ— Critical regressions detected\x1b[0m') + } else if (regressions.some((r) => r.severity === 'warning')) { + console.warn('\x1b[33m⚠ Warnings detected\x1b[0m') + } else { + console.log('\x1b[32mβœ“ All checks passed\x1b[0m') + } + + running = false +} + +function scheduleRun() { + if (debounceTimer) clearTimeout(debounceTimer) + debounceTimer = setTimeout(runValidation, DEBOUNCE_MS) +} + +console.log(`\x1b[36m[watch]\x1b[0m Starting measurement validator in watch mode`) +console.log(`Watching: ${WATCH_PATHS.join(', ')}`) +console.log(`Languages: ${LANGUAGES.join(', ')}`) +if (args['baseline']) console.log(`Baseline: ${args['baseline']}`) +console.log('Press Ctrl+C to stop\n') + +// Initial run +runValidation() + +// Watch source directories +for (const dir of WATCH_PATHS) { + try { + watch(dir, { recursive: true }, (event, filename) => { + if (filename && /\.(ts|json)$/.test(filename)) { + scheduleRun() + } + }) + } catch { + // Directory may not exist; skip silently + } +} + +process.on('SIGINT', () => { + console.log('\n\x1b[36m[watch]\x1b[0m Stopped.') + process.exit(0) +}) diff --git a/src/measurement-validator/dashboard-server.test.ts b/src/measurement-validator/dashboard-server.test.ts new file mode 100644 index 00000000..391b4607 --- /dev/null +++ b/src/measurement-validator/dashboard-server.test.ts @@ -0,0 +1,165 @@ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test' +import { ResultsDatabase } from './results-database.js' +import { DashboardServer } from './dashboard-server.js' +import { mkdtempSync, unlinkSync, existsSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import type { MeasurementResult, ValidationRun, ValidationSummary } from './types.js' + +function makeResult(overrides: Partial = {}): MeasurementResult { + return { + id: `r-${Math.random().toString(36).slice(2)}`, + timestamp: Date.now(), + language: 'english', + font: '16px Inter', + fontSize: 16, + text: 'Hello world', + pretextWidth: 100.0, + domWidth: 100.5, + delta: 0.5, + deltaPercent: 0.5, + severity: 'ok', + ...overrides, + } +} + +function makeRun( + results: MeasurementResult[] = [], + overrides: Partial = {}, +): ValidationRun { + const summary: ValidationSummary = { + total: results.length, + passed: results.length, + warnings: 0, + critical: 0, + passRate: 1.0, + avgDeltaPercent: 0, + maxDeltaPercent: 0, + } + return { + id: `run-${Math.random().toString(36).slice(2)}`, + timestamp: Date.now(), + durationMs: 100, + results, + summary, + ...overrides, + } +} + +describe('DashboardServer HTTP API', () => { + let db: ResultsDatabase + let server: DashboardServer + let dbPath: string + const port = 13200 + Math.floor(Math.random() * 100) + + beforeEach(() => { + const dir = mkdtempSync(join(tmpdir(), 'dash-test-')) + dbPath = join(dir, 'test.db') + db = new ResultsDatabase(dbPath) + server = new DashboardServer({ port, host: '127.0.0.1', db }) + server.start() + }) + + afterEach(() => { + server.stop() + db.close() + if (existsSync(dbPath)) unlinkSync(dbPath) + }) + + test('GET /api/summary returns statistics JSON', async () => { + const r = await fetch(`http://127.0.0.1:${port}/api/summary`) + expect(r.status).toBe(200) + const json = await r.json() + expect(typeof json.totalRuns).toBe('number') + expect(typeof json.totalResults).toBe('number') + expect(Array.isArray(json.languages)).toBe(true) + }) + + test('GET /api/results returns empty array initially', async () => { + const r = await fetch(`http://127.0.0.1:${port}/api/results`) + expect(r.status).toBe(200) + const json = await r.json() + expect(Array.isArray(json)).toBe(true) + expect(json).toHaveLength(0) + }) + + test('GET /api/results returns inserted results', async () => { + db.insertRun(makeRun([makeResult(), makeResult({ language: 'arabic' })])) + const r = await fetch(`http://127.0.0.1:${port}/api/results`) + const json = await r.json() + expect(json).toHaveLength(2) + }) + + test('GET /api/results filters by language', async () => { + db.insertRun(makeRun([makeResult({ language: 'english' }), makeResult({ language: 'arabic' })])) + const r = await fetch(`http://127.0.0.1:${port}/api/results?language=english`) + const json = await r.json() + expect(json).toHaveLength(1) + expect(json[0].language).toBe('english') + }) + + test('GET /api/runs returns run list', async () => { + db.insertRun(makeRun([makeResult()])) + const r = await fetch(`http://127.0.0.1:${port}/api/runs`) + expect(r.status).toBe(200) + const json = await r.json() + expect(Array.isArray(json)).toBe(true) + expect(json).toHaveLength(1) + }) + + test('GET /api/runs/:id returns specific run', async () => { + const run = makeRun([makeResult()]) + db.insertRun(run) + const r = await fetch(`http://127.0.0.1:${port}/api/runs/${run.id}`) + expect(r.status).toBe(200) + const json = await r.json() + expect(json.id).toBe(run.id) + }) + + test('GET /api/runs/:id returns 404 for unknown id', async () => { + const r = await fetch(`http://127.0.0.1:${port}/api/runs/nonexistent`) + expect(r.status).toBe(404) + }) + + test('GET /dashboard serves HTML', async () => { + const r = await fetch(`http://127.0.0.1:${port}/dashboard`) + expect(r.status).toBe(200) + const text = await r.text() + expect(text).toContain('') + expect(text).toContain('Measurement Validator Dashboard') + }) + + test('GET / also serves HTML', async () => { + const r = await fetch(`http://127.0.0.1:${port}/`) + expect(r.status).toBe(200) + const text = await r.text() + expect(text).toContain('') + }) + + test('GET /api/performance/trends returns trend data', async () => { + const r = await fetch(`http://127.0.0.1:${port}/api/performance/trends?language=english&days=7`) + expect(r.status).toBe(200) + const json = await r.json() + expect(Array.isArray(json)).toBe(true) + }) + + test('unknown path returns 404', async () => { + const r = await fetch(`http://127.0.0.1:${port}/api/unknown`) + expect(r.status).toBe(404) + }) + + test('OPTIONS request returns CORS headers', async () => { + const r = await fetch(`http://127.0.0.1:${port}/api/results`, { method: 'OPTIONS' }) + expect(r.status).toBe(200) + expect(r.headers.get('access-control-allow-origin')).toBe('*') + }) + + test('publishRun stores run in database', async () => { + const run = makeRun([makeResult()]) + server.publishRun(run) + const r = await fetch(`http://127.0.0.1:${port}/api/runs`) + const json = await r.json() + expect(json).toHaveLength(1) + expect(json[0].id).toBe(run.id) + }) +}) diff --git a/src/measurement-validator/dashboard-server.ts b/src/measurement-validator/dashboard-server.ts new file mode 100644 index 00000000..e6accc48 --- /dev/null +++ b/src/measurement-validator/dashboard-server.ts @@ -0,0 +1,311 @@ +// HTTP + WebSocket dashboard server for the measurement-validator. +// +// Exposes a REST API for validation results and a simple HTML web UI. +// WebSocket clients receive real-time push events when new runs are stored. + +import type { DatabaseQueryOptions, ValidationRun } from './types.js' +import type { ResultsDatabase } from './results-database.js' + +const DASHBOARD_HTML = ` + + + + + Measurement Validator Dashboard + + + +
+
+

Measurement Validator Dashboard

+ Connecting… +
+
+
+
Total Runs
β€”
+
Total Results
β€”
+
Avg Pass Rate
β€”
+
Critical Issues
β€”
+
+
+

Recent Results

+
+ + + +
+ + + +
LanguageFontTextPretextDOMDeltaSeverity
+ +
+
+ + +` + +export type DashboardServerConfig = { + port?: number + host?: string + db: ResultsDatabase +} + +type WsClient = { + send: (msg: string) => void + close: () => void +} + +export class DashboardServer { + private config: DashboardServerConfig + private wsClients: Set = new Set() + private server: ReturnType | null = null + + constructor(config: DashboardServerConfig) { + this.config = config + } + + start(): void { + const { db } = this.config + const port = this.config.port ?? 3000 + const host = this.config.host ?? '127.0.0.1' + const clients = this.wsClients + + this.server = Bun.serve({ + port, + hostname: host, + fetch(req, server) { + const url = new URL(req.url) + + // WebSocket upgrade + if (url.pathname === '/ws/results') { + const upgraded = server.upgrade(req) + if (upgraded) return undefined + return new Response('WebSocket upgrade failed', { status: 400 }) + } + + // CORS headers for API + const corsHeaders = { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, OPTIONS', + 'Access-Control-Allow-Headers': 'Content-Type', + 'Content-Type': 'application/json', + } + + if (req.method === 'OPTIONS') return new Response(null, { headers: corsHeaders }) + + if (url.pathname === '/api/results') { + const opts: DatabaseQueryOptions = {} + const lang = url.searchParams.get('language') + const sev = url.searchParams.get('severity') + const since = url.searchParams.get('since') + const limit = url.searchParams.get('limit') + const offset = url.searchParams.get('offset') + if (lang) opts.language = lang + if (sev === 'ok' || sev === 'warning' || sev === 'critical') opts.severity = sev + if (since) opts.since = Number(since) + if (limit) opts.limit = Number(limit) + if (offset) opts.offset = Number(offset) + const results = db.queryResults(opts) + return new Response(JSON.stringify(results), { headers: corsHeaders }) + } + + if (url.pathname === '/api/runs') { + const limit = Number(url.searchParams.get('limit') ?? 50) + const runs = db.queryRuns(limit) + return new Response(JSON.stringify(runs), { headers: corsHeaders }) + } + + if (url.pathname.startsWith('/api/runs/')) { + const id = url.pathname.slice('/api/runs/'.length) + const run = db.getRunById(id) + if (!run) return new Response('Not found', { status: 404 }) + return new Response(JSON.stringify(run), { headers: corsHeaders }) + } + + if (url.pathname === '/api/summary') { + const stats = db.getStatistics() + return new Response(JSON.stringify(stats), { headers: corsHeaders }) + } + + if (url.pathname === '/api/performance/trends') { + const lang = url.searchParams.get('language') ?? 'english' + const days = Number(url.searchParams.get('days') ?? 30) + const trends = db.getLanguageTrends(lang, days) + return new Response(JSON.stringify(trends), { headers: corsHeaders }) + } + + // Dashboard UI + if (url.pathname === '/' || url.pathname === '/dashboard') { + return new Response(DASHBOARD_HTML, { + headers: { 'Content-Type': 'text/html; charset=utf-8' }, + }) + } + + return new Response('Not found', { status: 404 }) + }, + websocket: { + open(ws) { + clients.add(ws) + }, + close(ws) { + clients.delete(ws) + }, + message() { + // Clients don't send messages in this protocol + }, + }, + }) + + console.log(`Dashboard server running at http://${host}:${port}`) + console.log(` API: http://${host}:${port}/api/results`) + console.log(` UI: http://${host}:${port}/dashboard`) + console.log(` WS: ws://${host}:${port}/ws/results`) + } + + /** Notify all connected WebSocket clients that a new run is available. */ + broadcast(event: { type: string; runId?: string }): void { + const msg = JSON.stringify(event) + for (const client of this.wsClients) { + try { + client.send(msg) + } catch { + this.wsClients.delete(client) + } + } + } + + /** Store a new validation run and notify WebSocket clients. */ + publishRun(run: ValidationRun): void { + this.config.db.insertRun(run) + this.broadcast({ type: 'run_complete', runId: run.id }) + } + + stop(): void { + if (this.server) { + this.server.stop() + this.server = null + } + } + + get url(): string { + const port = this.config.port ?? 3000 + const host = this.config.host ?? '127.0.0.1' + return `http://${host}:${port}` + } +} diff --git a/src/measurement-validator/index.ts b/src/measurement-validator/index.ts new file mode 100644 index 00000000..f06ccd0b --- /dev/null +++ b/src/measurement-validator/index.ts @@ -0,0 +1,46 @@ +// Public surface for the measurement-validator tools. +// +// Import specific modules directly for tree-shaking: +// import { ResultsDatabase } from './src/measurement-validator/results-database.js' +// import { DashboardServer } from './src/measurement-validator/dashboard-server.js' +// import { SlackNotifier } from './src/measurement-validator/slack-notifier.js' +// import { detectRegressions } from './src/measurement-validator/performance-tracker.js' + +export type { + DatabaseQueryOptions, + MeasurementResult, + PerformanceBaseline, + PerformanceMetrics, + PerformanceRegression, + Severity, + SlackAttachment, + SlackBlock, + SlackMessage, + ValidationRun, + ValidationSummary, +} from './types.js' + +export { + baselineKey, + buildBaselineEntry, + createBaseline, + createTrackingSession, + detectRegressions, + finalizeSession, + formatRegressionReport, + loadBaseline, + recordSample, + saveBaseline, + updateBaselineEntry, +} from './performance-tracker.js' + +export { ResultsDatabase } from './results-database.js' + +export { DashboardServer } from './dashboard-server.js' +export type { DashboardServerConfig } from './dashboard-server.js' + +export { + SlackNotifier, + createSlackNotifierFromEnv, +} from './slack-notifier.js' +export type { SlackNotifierConfig, SlackNotifierOptions } from './slack-notifier.js' diff --git a/src/measurement-validator/performance-tracker.test.ts b/src/measurement-validator/performance-tracker.test.ts new file mode 100644 index 00000000..1081a049 --- /dev/null +++ b/src/measurement-validator/performance-tracker.test.ts @@ -0,0 +1,197 @@ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test' +import { + createTrackingSession, + recordSample, + finalizeSession, + buildBaselineEntry, + createBaseline, + updateBaselineEntry, + baselineKey, + detectRegressions, + formatRegressionReport, + loadBaseline, + saveBaseline, +} from './performance-tracker.js' +import { mkdtempSync, unlinkSync, existsSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import type { PerformanceBaseline, PerformanceMetrics } from './types.js' + +describe('createTrackingSession', () => { + test('creates a session with empty samples', () => { + const s = createTrackingSession('english', '16px Inter') + expect(s.language).toBe('english') + expect(s.font).toBe('16px Inter') + expect(s.samples).toHaveLength(0) + expect(s.startedAt).toBeGreaterThan(0) + }) +}) + +describe('recordSample / finalizeSession', () => { + test('finalizes empty session with zeros', () => { + const s = createTrackingSession('english', '16px Inter') + const m = finalizeSession(s) + expect(m.measurementCount).toBe(0) + expect(m.totalMs).toBe(0) + }) + + test('computes averages correctly', () => { + const s = createTrackingSession('english', '16px Inter') + recordSample(s, 1.0, 0.1) + recordSample(s, 2.0, 0.2) + recordSample(s, 3.0, 0.3) + const m = finalizeSession(s) + expect(m.prepareMs).toBeCloseTo(2.0) + expect(m.layoutMs).toBeCloseTo(0.2) + expect(m.totalMs).toBeCloseTo(2.2) + expect(m.measurementCount).toBe(3) + }) +}) + +describe('buildBaselineEntry', () => { + test('builds entry with correct stats', () => { + const s = createTrackingSession('english', '16px Inter') + for (let i = 1; i <= 10; i++) { + recordSample(s, i * 0.1, 0.01) + } + const entry = buildBaselineEntry(s) + expect(entry.sampleCount).toBe(10) + expect(entry.avgTotalMs).toBeGreaterThan(0) + expect(entry.p95TotalMs).toBeGreaterThanOrEqual(entry.avgTotalMs) + }) + + test('empty session returns zeros', () => { + const s = createTrackingSession('arabic', '16px Inter') + const entry = buildBaselineEntry(s) + expect(entry.sampleCount).toBe(0) + expect(entry.avgTotalMs).toBe(0) + expect(entry.p95TotalMs).toBe(0) + }) +}) + +describe('detectRegressions', () => { + function makeBaseline(avgTotal: number): PerformanceBaseline { + const b = createBaseline() + updateBaselineEntry(b, baselineKey('english', '16px Inter'), { + avgPrepareMs: avgTotal * 0.9, + avgLayoutMs: avgTotal * 0.1, + avgTotalMs: avgTotal, + p95PrepareMs: avgTotal * 1.2, + p95LayoutMs: avgTotal * 0.15, + p95TotalMs: avgTotal * 1.3, + sampleCount: 20, + capturedAt: Date.now(), + }) + return b + } + + function makeMetrics(total: number): PerformanceMetrics { + return { + language: 'english', + font: '16px Inter', + prepareMs: total * 0.9, + layoutMs: total * 0.1, + totalMs: total, + measurementCount: 10, + avgMsPerMeasurement: total, + } + } + + test('no regressions when performance is similar', () => { + const baseline = makeBaseline(1.0) + const current = [makeMetrics(1.05)] // +5% β€” below 20% threshold + const r = detectRegressions(current, baseline) + expect(r).toHaveLength(0) + }) + + test('detects warning regression at 20%+', () => { + const baseline = makeBaseline(1.0) + const current = [makeMetrics(1.25)] // +25% + const r = detectRegressions(current, baseline) + const warnings = r.filter((x) => x.severity === 'warning') + expect(warnings.length).toBeGreaterThan(0) + expect(warnings[0]!.language).toBe('english') + }) + + test('detects critical regression at 50%+', () => { + const baseline = makeBaseline(1.0) + const current = [makeMetrics(1.6)] // +60% + const r = detectRegressions(current, baseline) + const critical = r.filter((x) => x.severity === 'critical') + expect(critical.length).toBeGreaterThan(0) + }) + + test('no regressions when baseline has zero values', () => { + const b = createBaseline() + updateBaselineEntry(b, baselineKey('english', '16px Inter'), { + avgPrepareMs: 0, avgLayoutMs: 0, avgTotalMs: 0, + p95PrepareMs: 0, p95LayoutMs: 0, p95TotalMs: 0, + sampleCount: 0, capturedAt: Date.now(), + }) + const r = detectRegressions([makeMetrics(2.0)], b) + expect(r).toHaveLength(0) + }) + + test('skips languages not in baseline', () => { + const baseline = makeBaseline(1.0) + const r = detectRegressions( + [{ ...makeMetrics(5.0), language: 'chinese', font: '16px Inter' }], + baseline, + ) + expect(r).toHaveLength(0) + }) +}) + +describe('formatRegressionReport', () => { + test('includes passed message when no regressions', () => { + const report = formatRegressionReport([], [], null) + expect(report).toContain('No performance regressions detected') + }) + + test('includes regression details', () => { + const regressions = [ + { + language: 'arabic', + metric: 'avgTotalMs' as const, + baselineMs: 1.0, + currentMs: 2.0, + changePercent: 100, + severity: 'critical' as const, + }, + ] + const report = formatRegressionReport(regressions, [], null) + expect(report).toContain('arabic') + expect(report).toContain('CRITICAL') + }) +}) + +describe('loadBaseline / saveBaseline', () => { + let tmpPath: string + + beforeEach(() => { + const dir = mkdtempSync(join(tmpdir(), 'baseline-test-')) + tmpPath = join(dir, 'baseline.json') + }) + + afterEach(() => { + if (existsSync(tmpPath)) unlinkSync(tmpPath) + }) + + test('returns null for missing file', () => { + expect(loadBaseline('/nonexistent/path.json')).toBeNull() + }) + + test('round-trips baseline data', () => { + const b = createBaseline('abc123') + updateBaselineEntry(b, 'english::16px Inter', { + avgPrepareMs: 1.0, avgLayoutMs: 0.1, avgTotalMs: 1.1, + p95PrepareMs: 1.5, p95LayoutMs: 0.15, p95TotalMs: 1.65, + sampleCount: 50, capturedAt: Date.now(), + }) + saveBaseline(b, tmpPath) + const loaded = loadBaseline(tmpPath) + expect(loaded).not.toBeNull() + expect(loaded!.commitSha).toBe('abc123') + expect(loaded!.metrics['english::16px Inter']!.avgTotalMs).toBeCloseTo(1.1) + }) +}) diff --git a/src/measurement-validator/performance-tracker.ts b/src/measurement-validator/performance-tracker.ts new file mode 100644 index 00000000..517c1d91 --- /dev/null +++ b/src/measurement-validator/performance-tracker.ts @@ -0,0 +1,220 @@ +// Performance tracking for the measurement-validator. +// +// Tracks timing metrics per language/font combination and compares against +// a version-controlled baseline file. Regression detection flags changes +// beyond configurable thresholds. + +import { readFileSync, writeFileSync, existsSync } from 'node:fs' +import type { + BaselineEntry, + PerformanceBaseline, + PerformanceMetrics, + PerformanceRegression, +} from './types.js' + +const REGRESSION_WARNING_THRESHOLD = 0.20 // 20% slower = warning +const REGRESSION_CRITICAL_THRESHOLD = 0.50 // 50% slower = critical +const DEFAULT_BASELINE_PATH = '.measurement-baseline.json' + +export type TrackingSession = { + language: string + font: string + samples: Array<{ prepareMs: number; layoutMs: number }> + startedAt: number +} + +export function createTrackingSession(language: string, font: string): TrackingSession { + return { language, font, samples: [], startedAt: Date.now() } +} + +export function recordSample( + session: TrackingSession, + prepareMs: number, + layoutMs: number, +): void { + session.samples.push({ prepareMs, layoutMs }) +} + +export function finalizeSession(session: TrackingSession): PerformanceMetrics { + const { language, font, samples } = session + if (samples.length === 0) { + return { + language, + font, + prepareMs: 0, + layoutMs: 0, + totalMs: 0, + measurementCount: 0, + avgMsPerMeasurement: 0, + } + } + const avgPrepare = samples.reduce((s, x) => s + x.prepareMs, 0) / samples.length + const avgLayout = samples.reduce((s, x) => s + x.layoutMs, 0) / samples.length + const avgTotal = avgPrepare + avgLayout + return { + language, + font, + prepareMs: avgPrepare, + layoutMs: avgLayout, + totalMs: avgTotal, + measurementCount: samples.length, + avgMsPerMeasurement: avgTotal, + } +} + +function percentile(sorted: number[], p: number): number { + if (sorted.length === 0) return 0 + const idx = Math.ceil((p / 100) * sorted.length) - 1 + return sorted[Math.max(0, Math.min(idx, sorted.length - 1))]! +} + +export function buildBaselineEntry(session: TrackingSession): BaselineEntry { + const totals = session.samples.map((s) => s.prepareMs + s.layoutMs).sort((a, b) => a - b) + const prepares = session.samples.map((s) => s.prepareMs).sort((a, b) => a - b) + const layouts = session.samples.map((s) => s.layoutMs).sort((a, b) => a - b) + const n = session.samples.length + return { + avgPrepareMs: n > 0 ? prepares.reduce((s, x) => s + x, 0) / n : 0, + avgLayoutMs: n > 0 ? layouts.reduce((s, x) => s + x, 0) / n : 0, + avgTotalMs: n > 0 ? totals.reduce((s, x) => s + x, 0) / n : 0, + p95PrepareMs: percentile(prepares, 95), + p95LayoutMs: percentile(layouts, 95), + p95TotalMs: percentile(totals, 95), + sampleCount: n, + capturedAt: Date.now(), + } +} + +export function loadBaseline(path: string = DEFAULT_BASELINE_PATH): PerformanceBaseline | null { + try { + if (!existsSync(path)) return null + const raw = readFileSync(path, 'utf-8') + return JSON.parse(raw) as PerformanceBaseline + } catch { + return null + } +} + +export function saveBaseline( + baseline: PerformanceBaseline, + path: string = DEFAULT_BASELINE_PATH, +): void { + baseline.updatedAt = Date.now() + writeFileSync(path, JSON.stringify(baseline, null, 2) + '\n', 'utf-8') +} + +export function createBaseline(commitSha?: string): PerformanceBaseline { + return { + version: '1', + createdAt: Date.now(), + updatedAt: Date.now(), + commitSha, + metrics: {}, + } +} + +export function updateBaselineEntry( + baseline: PerformanceBaseline, + key: string, + entry: BaselineEntry, +): void { + baseline.metrics[key] = entry + baseline.updatedAt = Date.now() +} + +export function baselineKey(language: string, font: string): string { + return `${language}::${font}` +} + +export function detectRegressions( + current: PerformanceMetrics[], + baseline: PerformanceBaseline, + warningThreshold: number = REGRESSION_WARNING_THRESHOLD, + criticalThreshold: number = REGRESSION_CRITICAL_THRESHOLD, +): PerformanceRegression[] { + const regressions: PerformanceRegression[] = [] + for (const metrics of current) { + const key = baselineKey(metrics.language, metrics.font) + const base = baseline.metrics[key] + if (!base) continue + + const checks: Array<{ + metric: keyof Pick + baseline: number + current: number + }> = [ + { metric: 'avgTotalMs', baseline: base.avgTotalMs, current: metrics.totalMs }, + { metric: 'p95TotalMs', baseline: base.p95TotalMs, current: metrics.totalMs }, + ] + + for (const check of checks) { + if (check.baseline <= 0) continue + const change = (check.current - check.baseline) / check.baseline + if (change >= criticalThreshold) { + regressions.push({ + language: metrics.language, + metric: check.metric, + baselineMs: check.baseline, + currentMs: check.current, + changePercent: change * 100, + severity: 'critical', + }) + } else if (change >= warningThreshold) { + regressions.push({ + language: metrics.language, + metric: check.metric, + baselineMs: check.baseline, + currentMs: check.current, + changePercent: change * 100, + severity: 'warning', + }) + } + } + } + return regressions +} + +export function formatRegressionReport( + regressions: PerformanceRegression[], + current: PerformanceMetrics[], + baseline: PerformanceBaseline | null, +): string { + const lines: string[] = ['# Performance Report\n'] + + if (baseline) { + lines.push( + `Baseline captured: ${new Date(baseline.updatedAt).toISOString()}`, + baseline.commitSha ? `Baseline commit: ${baseline.commitSha}` : '', + '', + ) + } + + lines.push('## Current Metrics\n') + for (const m of current) { + const key = baselineKey(m.language, m.font) + const base = baseline?.metrics[key] + const change = base && base.avgTotalMs > 0 + ? ((m.totalMs - base.avgTotalMs) / base.avgTotalMs * 100).toFixed(1) + : null + const trend = change !== null + ? (Number(change) > 0 ? ` (+${change}%)` : ` (${change}%)`) + : ' (no baseline)' + lines.push(`- **${m.language}** ${m.font}: ${m.totalMs.toFixed(2)}ms avg${trend}`) + } + + if (regressions.length > 0) { + lines.push('\n## ⚠️ Regressions Detected\n') + for (const r of regressions) { + const icon = r.severity === 'critical' ? 'πŸ”΄' : '🟑' + lines.push( + `${icon} **${r.language}** (${r.metric}): ` + + `${r.baselineMs.toFixed(2)}ms β†’ ${r.currentMs.toFixed(2)}ms ` + + `(+${r.changePercent.toFixed(1)}%) [${r.severity.toUpperCase()}]`, + ) + } + } else { + lines.push('\nβœ… No performance regressions detected.') + } + + return lines.filter((l) => l !== '').join('\n') +} diff --git a/src/measurement-validator/results-database.test.ts b/src/measurement-validator/results-database.test.ts new file mode 100644 index 00000000..562a111f --- /dev/null +++ b/src/measurement-validator/results-database.test.ts @@ -0,0 +1,164 @@ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test' +import { ResultsDatabase } from './results-database.js' +import { mkdtempSync, unlinkSync, existsSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import type { MeasurementResult, ValidationRun, ValidationSummary } from './types.js' + +function makeResult(overrides: Partial = {}): MeasurementResult { + return { + id: `r-${Math.random().toString(36).slice(2)}`, + timestamp: Date.now(), + language: 'english', + font: '16px Inter', + fontSize: 16, + text: 'Hello world', + pretextWidth: 100.0, + domWidth: 100.5, + delta: 0.5, + deltaPercent: 0.5, + severity: 'ok', + ...overrides, + } +} + +function makeSummary(overrides: Partial = {}): ValidationSummary { + return { + total: 10, + passed: 9, + warnings: 1, + critical: 0, + passRate: 0.9, + avgDeltaPercent: 0.5, + maxDeltaPercent: 2.0, + ...overrides, + } +} + +function makeRun( + results: MeasurementResult[] = [], + overrides: Partial = {}, +): ValidationRun { + return { + id: `run-${Math.random().toString(36).slice(2)}`, + timestamp: Date.now(), + durationMs: 1234, + results, + summary: makeSummary({ total: results.length, passed: results.length }), + ...overrides, + } +} + +describe('ResultsDatabase', () => { + let db: ResultsDatabase + let dbPath: string + + beforeEach(() => { + const dir = mkdtempSync(join(tmpdir(), 'mv-db-test-')) + dbPath = join(dir, 'test.db') + db = new ResultsDatabase(dbPath) + }) + + afterEach(() => { + db.close() + if (existsSync(dbPath)) unlinkSync(dbPath) + }) + + test('inserts and retrieves a run', () => { + const results = [ + makeResult({ language: 'english' }), + makeResult({ language: 'arabic', severity: 'warning' }), + ] + const run = makeRun(results) + db.insertRun(run) + + const loaded = db.getRunById(run.id) + expect(loaded).not.toBeNull() + expect(loaded!.id).toBe(run.id) + expect(loaded!.results).toHaveLength(2) + }) + + test('getLatestRun returns most recent', () => { + const run1 = makeRun([], { timestamp: Date.now() - 10000 }) + const run2 = makeRun([], { timestamp: Date.now() }) + db.insertRun(run1) + db.insertRun(run2) + + const latest = db.getLatestRun() + expect(latest!.id).toBe(run2.id) + }) + + test('queryResults filters by language', () => { + const r1 = makeResult({ language: 'english' }) + const r2 = makeResult({ language: 'arabic' }) + db.insertRun(makeRun([r1, r2])) + + const english = db.queryResults({ language: 'english' }) + expect(english).toHaveLength(1) + expect(english[0]!.language).toBe('english') + }) + + test('queryResults filters by severity', () => { + const r1 = makeResult({ severity: 'ok' }) + const r2 = makeResult({ severity: 'critical' }) + db.insertRun(makeRun([r1, r2])) + + const criticals = db.queryResults({ severity: 'critical' }) + expect(criticals).toHaveLength(1) + expect(criticals[0]!.severity).toBe('critical') + }) + + test('queryResults with limit', () => { + const results = Array.from({ length: 10 }, () => makeResult()) + db.insertRun(makeRun(results)) + const limited = db.queryResults({ limit: 3 }) + expect(limited).toHaveLength(3) + }) + + test('getStatistics returns correct counts', () => { + const results = [ + makeResult({ severity: 'ok' }), + makeResult({ severity: 'critical' }), + makeResult({ language: 'chinese' }), + ] + db.insertRun(makeRun(results)) + const stats = db.getStatistics() + expect(stats.totalRuns).toBe(1) + expect(stats.totalResults).toBe(3) + expect(stats.criticalCount).toBe(1) + expect(stats.languages).toContain('english') + expect(stats.languages).toContain('chinese') + }) + + test('stores and retrieves metadata', () => { + const r = makeResult({ metadata: { extra: 'data', count: 42 } }) + db.insertRun(makeRun([r])) + const [loaded] = db.queryResults() + expect(loaded!.metadata).toEqual({ extra: 'data', count: 42 }) + }) + + test('stores and retrieves commit sha and branch', () => { + const run = makeRun([], { commitSha: 'abc123', branch: 'main' }) + db.insertRun(run) + const loaded = db.getRunById(run.id) + expect(loaded!.commitSha).toBe('abc123') + expect(loaded!.branch).toBe('main') + }) + + test('queryRuns returns multiple runs in desc order', () => { + const run1 = makeRun([], { timestamp: 1000 }) + const run2 = makeRun([], { timestamp: 2000 }) + db.insertRun(run1) + db.insertRun(run2) + const runs = db.queryRuns() + expect(runs[0]!.timestamp).toBeGreaterThan(runs[1]!.timestamp) + }) + + test('returns null for unknown run id', () => { + expect(db.getRunById('nonexistent')).toBeNull() + }) + + test('returns null from getLatestRun on empty db', () => { + expect(db.getLatestRun()).toBeNull() + }) +}) diff --git a/src/measurement-validator/results-database.ts b/src/measurement-validator/results-database.ts new file mode 100644 index 00000000..e23aa727 --- /dev/null +++ b/src/measurement-validator/results-database.ts @@ -0,0 +1,306 @@ +// SQLite-backed results database for the measurement-validator. +// +// Stores all validation runs and measurement results for historical analysis, +// trend detection, and dashboard queries. Uses Bun's built-in SQLite support. + +import { Database } from 'bun:sqlite' +import type { + DatabaseQueryOptions, + MeasurementResult, + Severity, + ValidationRun, + ValidationSummary, +} from './types.js' + +const SCHEMA = ` +CREATE TABLE IF NOT EXISTS validation_runs ( + id TEXT PRIMARY KEY, + timestamp INTEGER NOT NULL, + commit_sha TEXT, + branch TEXT, + duration_ms INTEGER NOT NULL, + total INTEGER NOT NULL, + passed INTEGER NOT NULL, + warnings INTEGER NOT NULL, + critical INTEGER NOT NULL, + pass_rate REAL NOT NULL, + avg_delta_percent REAL NOT NULL, + max_delta_percent REAL NOT NULL +); + +CREATE TABLE IF NOT EXISTS measurement_results ( + id TEXT PRIMARY KEY, + run_id TEXT NOT NULL REFERENCES validation_runs(id) ON DELETE CASCADE, + timestamp INTEGER NOT NULL, + language TEXT NOT NULL, + font TEXT NOT NULL, + font_size REAL NOT NULL, + text TEXT NOT NULL, + pretext_width REAL NOT NULL, + dom_width REAL NOT NULL, + delta REAL NOT NULL, + delta_percent REAL NOT NULL, + severity TEXT NOT NULL, + root_cause TEXT, + metadata TEXT +); + +CREATE INDEX IF NOT EXISTS idx_results_run ON measurement_results(run_id); +CREATE INDEX IF NOT EXISTS idx_results_lang ON measurement_results(language); +CREATE INDEX IF NOT EXISTS idx_results_severity ON measurement_results(severity); +CREATE INDEX IF NOT EXISTS idx_results_ts ON measurement_results(timestamp); +CREATE INDEX IF NOT EXISTS idx_runs_ts ON validation_runs(timestamp); +` + +export class ResultsDatabase { + private db: Database + + constructor(path: string = '.measurement-results.db') { + this.db = new Database(path) + this.db.exec(SCHEMA) + } + + insertRun(run: ValidationRun): void { + const { summary } = run + this.db + .prepare( + `INSERT OR REPLACE INTO validation_runs + (id, timestamp, commit_sha, branch, duration_ms, + total, passed, warnings, critical, + pass_rate, avg_delta_percent, max_delta_percent) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ) + .run( + run.id, + run.timestamp, + run.commitSha ?? null, + run.branch ?? null, + run.durationMs, + summary.total, + summary.passed, + summary.warnings, + summary.critical, + summary.passRate, + summary.avgDeltaPercent, + summary.maxDeltaPercent, + ) + + const stmt = this.db.prepare( + `INSERT OR REPLACE INTO measurement_results + (id, run_id, timestamp, language, font, font_size, + text, pretext_width, dom_width, delta, delta_percent, + severity, root_cause, metadata) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ) + for (const r of run.results) { + stmt.run( + r.id, + run.id, + r.timestamp, + r.language, + r.font, + r.fontSize, + r.text, + r.pretextWidth, + r.domWidth, + r.delta, + r.deltaPercent, + r.severity, + r.rootCause ?? null, + r.metadata ? JSON.stringify(r.metadata) : null, + ) + } + } + + queryResults(opts: DatabaseQueryOptions = {}): MeasurementResult[] { + const conditions: string[] = [] + const params: (string | number | null)[] = [] + + if (opts.language) { + conditions.push('language = ?') + params.push(opts.language) + } + if (opts.font) { + conditions.push('font = ?') + params.push(opts.font) + } + if (opts.severity) { + conditions.push('severity = ?') + params.push(opts.severity) + } + if (opts.since != null) { + conditions.push('timestamp >= ?') + params.push(opts.since) + } + if (opts.until != null) { + conditions.push('timestamp <= ?') + params.push(opts.until) + } + + const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '' + const limit = opts.limit != null ? `LIMIT ${opts.limit}` : '' + const offset = opts.offset != null ? `OFFSET ${opts.offset}` : '' + + const rows = this.db + .prepare( + `SELECT * FROM measurement_results + ${where} + ORDER BY timestamp DESC + ${limit} ${offset}`, + ) + .all(...params) as Array> + + return rows.map(rowToResult) + } + + queryRuns(limit = 50): ValidationRun[] { + const runRows = this.db + .prepare( + `SELECT * FROM validation_runs ORDER BY timestamp DESC LIMIT ?`, + ) + .all(limit) as Array> + + return runRows.map((row) => { + const results = this.db + .prepare(`SELECT * FROM measurement_results WHERE run_id = ?`) + .all(row['id'] as string) as Array> + + return rowToRun(row, results.map(rowToResult)) + }) + } + + getRunById(id: string): ValidationRun | null { + const row = this.db + .prepare(`SELECT * FROM validation_runs WHERE id = ?`) + .get(id) as Record | null + if (!row) return null + + const results = this.db + .prepare(`SELECT * FROM measurement_results WHERE run_id = ?`) + .all(id) as Array> + + return rowToRun(row, results.map(rowToResult)) + } + + getLatestRun(): ValidationRun | null { + const row = this.db + .prepare(`SELECT * FROM validation_runs ORDER BY timestamp DESC LIMIT 1`) + .get() as Record | null + if (!row) return null + + const results = this.db + .prepare(`SELECT * FROM measurement_results WHERE run_id = ?`) + .all(row['id'] as string) as Array> + + return rowToRun(row, results.map(rowToResult)) + } + + getLanguageTrends( + language: string, + days = 30, + ): Array<{ date: string; passRate: number; avgDeltaPercent: number }> { + const since = Date.now() - days * 24 * 60 * 60 * 1000 + const rows = this.db + .prepare( + `SELECT + date(timestamp / 1000, 'unixepoch') as date, + AVG(pass_rate) as pass_rate, + AVG(avg_delta_percent) as avg_delta + FROM validation_runs + WHERE timestamp >= ? + GROUP BY date + ORDER BY date ASC`, + ) + .all(since) as Array<{ date: string; pass_rate: number; avg_delta: number }> + + return rows.map((r) => ({ + date: r.date, + passRate: r.pass_rate, + avgDeltaPercent: r.avg_delta, + })) + } + + getStatistics(): { + totalRuns: number + totalResults: number + avgPassRate: number + criticalCount: number + languages: string[] + } { + const runs = this.db + .prepare(`SELECT COUNT(*) as count, AVG(pass_rate) as avg_pass FROM validation_runs`) + .get() as { count: number; avg_pass: number } + + const results = this.db + .prepare(`SELECT COUNT(*) as count FROM measurement_results`) + .get() as { count: number } + + const critical = this.db + .prepare(`SELECT COUNT(*) as count FROM measurement_results WHERE severity = 'critical'`) + .get() as { count: number } + + const langs = this.db + .prepare(`SELECT DISTINCT language FROM measurement_results ORDER BY language`) + .all() as Array<{ language: string }> + + return { + totalRuns: runs.count, + totalResults: results.count, + avgPassRate: runs.avg_pass ?? 0, + criticalCount: critical.count, + languages: langs.map((l) => l.language), + } + } + + close(): void { + this.db.close() + } +} + +function rowToResult(row: Record): MeasurementResult { + const base: MeasurementResult = { + id: row['id'] as string, + timestamp: row['timestamp'] as number, + language: row['language'] as string, + font: row['font'] as string, + fontSize: row['font_size'] as number, + text: row['text'] as string, + pretextWidth: row['pretext_width'] as number, + domWidth: row['dom_width'] as number, + delta: row['delta'] as number, + deltaPercent: row['delta_percent'] as number, + severity: row['severity'] as Severity, + } + const rootCause = row['root_cause'] as string | null + if (rootCause != null) base.rootCause = rootCause + const metadata = row['metadata'] as string | null + if (metadata != null) base.metadata = JSON.parse(metadata) as Record + return base +} + +function rowToRun( + row: Record, + results: MeasurementResult[], +): ValidationRun { + const summary: ValidationSummary = { + total: row['total'] as number, + passed: row['passed'] as number, + warnings: row['warnings'] as number, + critical: row['critical'] as number, + passRate: row['pass_rate'] as number, + avgDeltaPercent: row['avg_delta_percent'] as number, + maxDeltaPercent: row['max_delta_percent'] as number, + } + const run: ValidationRun = { + id: row['id'] as string, + timestamp: row['timestamp'] as number, + durationMs: row['duration_ms'] as number, + results, + summary, + } + const commitSha = row['commit_sha'] as string | null + if (commitSha != null) run.commitSha = commitSha + const branch = row['branch'] as string | null + if (branch != null) run.branch = branch + return run +} diff --git a/src/measurement-validator/slack-notifier.test.ts b/src/measurement-validator/slack-notifier.test.ts new file mode 100644 index 00000000..fc21bbdd --- /dev/null +++ b/src/measurement-validator/slack-notifier.test.ts @@ -0,0 +1,164 @@ +import { describe, test, expect } from 'bun:test' +import { SlackNotifier, createSlackNotifierFromEnv } from './slack-notifier.js' +import type { PerformanceRegression, ValidationSummary } from './types.js' + +// We test the message-building logic by monkey-patching the private post method. +function makeTestNotifier(captured: unknown[] = []): SlackNotifier { + const n = new SlackNotifier({ webhookUrl: 'https://hooks.slack.com/test' }) + // Override fetch to capture messages instead of actually posting + ;(n as unknown as Record)['post'] = async (msg: unknown) => { + captured.push(msg) + } + return n +} + +const okSummary: ValidationSummary = { + total: 100, + passed: 100, + warnings: 0, + critical: 0, + passRate: 1.0, + avgDeltaPercent: 0.1, + maxDeltaPercent: 0.5, +} + +const warnSummary: ValidationSummary = { + total: 100, + passed: 90, + warnings: 8, + critical: 2, + passRate: 0.9, + avgDeltaPercent: 1.5, + maxDeltaPercent: 5.0, +} + +const criticalRegression: PerformanceRegression = { + language: 'arabic', + metric: 'avgTotalMs', + baselineMs: 1.0, + currentMs: 2.5, + changePercent: 150, + severity: 'critical', +} + +const warningRegression: PerformanceRegression = { + language: 'chinese', + metric: 'avgTotalMs', + baselineMs: 1.0, + currentMs: 1.3, + changePercent: 30, + severity: 'warning', +} + +describe('SlackNotifier.sendValidationSummary', () => { + test('sends message for ok summary', async () => { + const captured: unknown[] = [] + const n = makeTestNotifier(captured) + await n.sendValidationSummary(okSummary, [], { runId: 'run-1' }) + expect(captured).toHaveLength(1) + const msg = captured[0] as Record + expect(msg['text']).toContain('PASSED') + }) + + test('sends message for critical summary', async () => { + const captured: unknown[] = [] + const n = makeTestNotifier(captured) + await n.sendValidationSummary(warnSummary, [criticalRegression], { runId: 'run-2' }) + expect(captured).toHaveLength(1) + const msg = captured[0] as Record + expect((msg['text'] as string)).toContain('CRITICAL') + }) + + test('respects minSeverity=critical β€” skips ok summary', async () => { + const captured: unknown[] = [] + const n = new SlackNotifier({ + webhookUrl: 'https://hooks.slack.com/test', + minSeverity: 'critical', + }) + ;(n as unknown as Record)['post'] = async (msg: unknown) => captured.push(msg) + await n.sendValidationSummary(okSummary, [], { runId: 'run-3' }) + expect(captured).toHaveLength(0) + }) + + test('includes PR URL in message when provided', async () => { + const captured: unknown[] = [] + const n = makeTestNotifier(captured) + await n.sendValidationSummary(okSummary, [], { + runId: 'run-4', + prUrl: 'https://github.com/org/repo/pull/42', + }) + const attachment = (captured[0] as Record)['attachments']![0] as Record + expect(attachment['text']).toContain('https://github.com/org/repo/pull/42') + }) + + test('includes branch and commit in fields when provided', async () => { + const captured: unknown[] = [] + const n = makeTestNotifier(captured) + await n.sendValidationSummary(okSummary, [], { + runId: 'run-5', + branch: 'feature/foo', + commitSha: 'deadbeef1234', + }) + const attachment = (captured[0] as Record)['attachments']![0] as Record + const fields = attachment['fields'] as Array<{ title: string; value: string }> + expect(fields.some((f) => f.value === 'feature/foo')).toBe(true) + expect(fields.some((f) => f.value === 'deadbeef')).toBe(true) + }) +}) + +describe('SlackNotifier.sendRegressionAlert', () => { + test('sends nothing when no regressions', async () => { + const captured: unknown[] = [] + const n = makeTestNotifier(captured) + await n.sendRegressionAlert([], { runId: 'run-6' }) + expect(captured).toHaveLength(0) + }) + + test('sends blocks message with regression details', async () => { + const captured: unknown[] = [] + const n = makeTestNotifier(captured) + await n.sendRegressionAlert([criticalRegression, warningRegression], { runId: 'run-7' }) + expect(captured).toHaveLength(1) + const msg = captured[0] as Record + const blocks = msg['blocks'] as Array> + const allText = JSON.stringify(blocks) + expect(allText).toContain('arabic') + expect(allText).toContain('chinese') + expect(allText).toContain('run-7') + }) +}) + +describe('SlackNotifier.sendDailySummary', () => { + test('sends summary with pass rate', async () => { + const captured: unknown[] = [] + const n = makeTestNotifier(captured) + await n.sendDailySummary({ + totalRuns: 5, + avgPassRate: 0.97, + criticalCount: 0, + topLanguages: ['english', 'arabic'], + }) + expect(captured).toHaveLength(1) + const msg = captured[0] as Record + const blocksText = JSON.stringify(msg['blocks']) + expect(blocksText).toContain('97.0%') + expect(blocksText).toContain('english') + }) +}) + +describe('createSlackNotifierFromEnv', () => { + test('returns null when SLACK_WEBHOOK_URL is not set', () => { + const original = process.env['SLACK_WEBHOOK_URL'] + delete process.env['SLACK_WEBHOOK_URL'] + const n = createSlackNotifierFromEnv() + expect(n).toBeNull() + if (original) process.env['SLACK_WEBHOOK_URL'] = original + }) + + test('returns notifier when SLACK_WEBHOOK_URL is set', () => { + process.env['SLACK_WEBHOOK_URL'] = 'https://hooks.slack.com/services/test' + const n = createSlackNotifierFromEnv() + expect(n).not.toBeNull() + delete process.env['SLACK_WEBHOOK_URL'] + }) +}) diff --git a/src/measurement-validator/slack-notifier.ts b/src/measurement-validator/slack-notifier.ts new file mode 100644 index 00000000..5c9545e1 --- /dev/null +++ b/src/measurement-validator/slack-notifier.ts @@ -0,0 +1,258 @@ +// Slack webhook notifier for the measurement-validator. +// +// Sends structured notifications to a Slack channel when validation runs +// complete, regressions are detected, or critical issues are found. +// Uses incoming webhooks β€” no OAuth, no bot tokens required. + +import type { + PerformanceRegression, + SlackAttachment, + SlackBlock, + SlackMessage, + ValidationSummary, +} from './types.js' + +export type SlackNotifierConfig = { + webhookUrl: string + channel?: string + username?: string + iconEmoji?: string + /** Minimum severity to post; defaults to 'warning' */ + minSeverity?: 'ok' | 'warning' | 'critical' +} + +export type SlackNotifierOptions = { + runId: string + branch?: string + commitSha?: string + prUrl?: string +} + +export class SlackNotifier { + private config: SlackNotifierConfig + + constructor(config: SlackNotifierConfig) { + this.config = config + } + + async sendValidationSummary( + summary: ValidationSummary, + regressions: PerformanceRegression[], + opts: SlackNotifierOptions, + ): Promise { + const hasCritical = summary.critical > 0 || regressions.some((r) => r.severity === 'critical') + const hasWarning = summary.warnings > 0 || regressions.some((r) => r.severity === 'warning') + + if (this.config.minSeverity === 'critical' && !hasCritical) return + if (this.config.minSeverity !== 'ok' && !hasCritical && !hasWarning) return + + const color = hasCritical ? '#dc3545' : hasWarning ? '#ffc107' : '#28a745' + const statusIcon = hasCritical ? 'πŸ”΄' : hasWarning ? '🟑' : 'βœ…' + const statusLabel = hasCritical ? 'CRITICAL' : hasWarning ? 'WARNING' : 'PASSED' + + const fields: SlackAttachment['fields'] = [ + { title: 'Pass Rate', value: `${(summary.passRate * 100).toFixed(1)}%`, short: true }, + { title: 'Total Tests', value: String(summary.total), short: true }, + { title: 'Critical', value: String(summary.critical), short: true }, + { title: 'Warnings', value: String(summary.warnings), short: true }, + { title: 'Avg Delta', value: `${summary.avgDeltaPercent.toFixed(2)}%`, short: true }, + { title: 'Max Delta', value: `${summary.maxDeltaPercent.toFixed(2)}%`, short: true }, + ] + + if (opts.branch) { + fields.push({ title: 'Branch', value: opts.branch, short: true }) + } + if (opts.commitSha) { + fields.push({ title: 'Commit', value: opts.commitSha.slice(0, 8), short: true }) + } + + let regressionText = '' + if (regressions.length > 0) { + regressionText = '\n*Performance Regressions:*\n' + + regressions + .map( + (r) => + `${r.severity === 'critical' ? 'πŸ”΄' : '🟑'} ${r.language}: ` + + `${r.baselineMs.toFixed(2)}ms β†’ ${r.currentMs.toFixed(2)}ms ` + + `(+${r.changePercent.toFixed(1)}%)`, + ) + .join('\n') + } + + const prLink = opts.prUrl ? ` | <${opts.prUrl}|View PR>` : '' + const text = + `${statusIcon} Measurement Validation *${statusLabel}*${prLink}\n` + + `Run: \`${opts.runId}\`` + + regressionText + + const message: SlackMessage = { + text: `Measurement Validation ${statusLabel}`, + attachments: [ + { + color, + title: `Measurement Validation ${statusLabel}`, + text, + fields, + footer: 'pretext measurement-validator', + ts: Math.floor(Date.now() / 1000), + }, + ], + } + + if (this.config.channel) { + Object.assign(message, { channel: this.config.channel }) + } + if (this.config.username) { + Object.assign(message, { username: this.config.username }) + } + if (this.config.iconEmoji) { + Object.assign(message, { icon_emoji: this.config.iconEmoji }) + } + + await this.post(message) + } + + async sendRegressionAlert( + regressions: PerformanceRegression[], + opts: SlackNotifierOptions, + ): Promise { + if (regressions.length === 0) return + + const critical = regressions.filter((r) => r.severity === 'critical') + const warnings = regressions.filter((r) => r.severity === 'warning') + + const blocks: SlackBlock[] = [ + { + type: 'header', + text: { type: 'plain_text', text: '⚠️ Performance Regression Alert' }, + }, + { + type: 'section', + text: { + type: 'mrkdwn', + text: [ + `Run: \`${opts.runId}\``, + opts.branch ? `Branch: \`${opts.branch}\`` : null, + opts.commitSha ? `Commit: \`${opts.commitSha.slice(0, 8)}\`` : null, + ] + .filter(Boolean) + .join(' | '), + }, + }, + { type: 'divider' }, + ] + + if (critical.length > 0) { + blocks.push({ + type: 'section', + text: { + type: 'mrkdwn', + text: + '*πŸ”΄ Critical Regressions:*\n' + + critical + .map( + (r) => + `β€’ ${r.language} (${r.metric}): ` + + `${r.baselineMs.toFixed(2)}ms β†’ ${r.currentMs.toFixed(2)}ms ` + + `(+${r.changePercent.toFixed(1)}%)`, + ) + .join('\n'), + }, + }) + } + + if (warnings.length > 0) { + blocks.push({ + type: 'section', + text: { + type: 'mrkdwn', + text: + '*🟑 Warnings:*\n' + + warnings + .map( + (r) => + `β€’ ${r.language} (${r.metric}): ` + + `${r.baselineMs.toFixed(2)}ms β†’ ${r.currentMs.toFixed(2)}ms ` + + `(+${r.changePercent.toFixed(1)}%)`, + ) + .join('\n'), + }, + }) + } + + await this.post({ text: '⚠️ Performance Regression Alert', blocks }) + } + + async sendDailySummary( + stats: { + totalRuns: number + avgPassRate: number + criticalCount: number + topLanguages: string[] + }, + ): Promise { + const statusIcon = stats.avgPassRate >= 0.99 ? 'βœ…' : stats.avgPassRate >= 0.95 ? '🟑' : 'πŸ”΄' + + const message: SlackMessage = { + text: `${statusIcon} Daily Measurement Validation Summary`, + blocks: [ + { + type: 'header', + text: { + type: 'plain_text', + text: `${statusIcon} Daily Measurement Validation Summary`, + }, + }, + { + type: 'section', + text: { + type: 'mrkdwn', + text: [ + `*Runs today:* ${stats.totalRuns}`, + `*Avg pass rate:* ${(stats.avgPassRate * 100).toFixed(1)}%`, + `*Critical issues:* ${stats.criticalCount}`, + `*Languages tested:* ${stats.topLanguages.join(', ')}`, + ].join('\n'), + }, + }, + ], + } + + await this.post(message) + } + + private async post(message: SlackMessage): Promise { + const res = await fetch(this.config.webhookUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(message), + }) + + if (!res.ok) { + const body = await res.text() + throw new Error( + `Slack webhook request failed: ${res.status} ${res.statusText}\n${body}`, + ) + } + } +} + +/** Build a SlackNotifier from the SLACK_WEBHOOK_URL environment variable. */ +export function createSlackNotifierFromEnv(): SlackNotifier | null { + const url = process.env['SLACK_WEBHOOK_URL'] + if (!url) return null + + const rawSeverity = process.env['SLACK_MIN_SEVERITY'] + const minSeverity: 'ok' | 'warning' | 'critical' = + rawSeverity === 'ok' || rawSeverity === 'warning' || rawSeverity === 'critical' + ? rawSeverity + : 'warning' + + return new SlackNotifier({ + webhookUrl: url, + channel: process.env['SLACK_CHANNEL'], + username: process.env['SLACK_USERNAME'] ?? 'pretext-validator', + iconEmoji: process.env['SLACK_ICON_EMOJI'] ?? ':bar_chart:', + minSeverity, + }) +} diff --git a/src/measurement-validator/types.ts b/src/measurement-validator/types.ts new file mode 100644 index 00000000..92af0867 --- /dev/null +++ b/src/measurement-validator/types.ts @@ -0,0 +1,111 @@ +// Core types for the measurement-validator tool. +// +// The measurement-validator compares Pretext's canvas-based width predictions +// against real browser DOM measurements to track accuracy, detect regressions, +// and monitor performance over time. + +export type Severity = 'ok' | 'warning' | 'critical' + +export type MeasurementResult = { + id: string + timestamp: number + language: string + font: string + fontSize: number + text: string + pretextWidth: number + domWidth: number + delta: number + deltaPercent: number + severity: Severity + rootCause?: string + metadata?: Record +} + +export type ValidationRun = { + id: string + timestamp: number + commitSha?: string + branch?: string + results: MeasurementResult[] + summary: ValidationSummary + durationMs: number +} + +export type ValidationSummary = { + total: number + passed: number + warnings: number + critical: number + passRate: number + avgDeltaPercent: number + maxDeltaPercent: number +} + +export type PerformanceMetrics = { + language: string + font: string + prepareMs: number + layoutMs: number + totalMs: number + measurementCount: number + avgMsPerMeasurement: number +} + +export type PerformanceBaseline = { + version: string + createdAt: number + updatedAt: number + commitSha?: string + metrics: Record +} + +export type BaselineEntry = { + avgPrepareMs: number + avgLayoutMs: number + avgTotalMs: number + p95PrepareMs: number + p95LayoutMs: number + p95TotalMs: number + sampleCount: number + capturedAt: number +} + +export type PerformanceRegression = { + language: string + metric: keyof Pick + baselineMs: number + currentMs: number + changePercent: number + severity: 'warning' | 'critical' +} + +export type DatabaseQueryOptions = { + language?: string + font?: string + severity?: Severity + since?: number + until?: number + limit?: number + offset?: number +} + +export type SlackMessage = { + text: string + attachments?: SlackAttachment[] + blocks?: SlackBlock[] +} + +export type SlackAttachment = { + color: string + title: string + text: string + fields?: Array<{ title: string; value: string; short: boolean }> + footer?: string + ts?: number +} + +export type SlackBlock = + | { type: 'header'; text: { type: 'plain_text'; text: string } } + | { type: 'section'; text: { type: 'mrkdwn'; text: string } } + | { type: 'divider' }