Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
name: Validate Measurements

on:
push:
branches: [main]
paths:
- 'src/**'
- 'performance-baseline.json'
- '.github/workflows/validate.yml'
pull_request:
paths:
- 'src/**'
- 'performance-baseline.json'
- '.github/workflows/validate.yml'
workflow_dispatch:
inputs:
update_baseline:
description: 'Update performance baseline after run'
required: false
default: 'false'
type: boolean

permissions:
contents: write
pull-requests: write

concurrency:
group: validate-${{ github.ref }}
cancel-in-progress: true

jobs:
validate:
name: Validate & Track Performance
runs-on: ubuntu-latest
outputs:
exit_code: ${{ steps.validate.outputs.exit_code }}
has_regressions: ${{ steps.regression.outputs.has_regressions }}
pass_rate: ${{ steps.validate.outputs.pass_rate }}
criticals: ${{ steps.validate.outputs.criticals }}

steps:
- uses: actions/checkout@v4

- uses: oven-sh/setup-bun@v2
with:
bun-version: latest

- name: Install dependencies
run: bun install --frozen-lockfile

- name: Type-check
run: bun run check

- name: Run unit tests
run: bun test

- name: Run measurement validation
id: validate
run: |
set +e
bun run scripts/validator-cli.ts validate \
--report=json \
--output=validation-results.json \
--stream
EXIT_CODE=$?
set -e

# Extract summary metrics from the JSON report.
PASS_RATE=$(node -e "const d=require('./validation-results.json');console.log(d.summary?.passRate??1)")
CRITICALS=$(node -e "const d=require('./validation-results.json');console.log(d.summary?.criticals??0)")

echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
echo "pass_rate=$PASS_RATE" >> "$GITHUB_OUTPUT"
echo "criticals=$CRITICALS" >> "$GITHUB_OUTPUT"

echo "Validation exit code: $EXIT_CODE"
echo "Pass rate: $PASS_RATE"
echo "Criticals: $CRITICALS"

- name: Generate HTML report
run: |
bun run scripts/validator-cli.ts report \
--input=validation-results.json \
--report=html \
--output=validation-report.html

- name: Generate Markdown report
run: |
bun run scripts/validator-cli.ts report \
--input=validation-results.json \
--report=markdown \
--output=validation-report.md

- name: Check performance regressions
id: regression
run: |
set +e
bun run scripts/validator-cli.ts benchmark > benchmark-output.txt 2>&1
set -e

# Simple regression flag based on exit code.
if grep -q 'CRITICAL' benchmark-output.txt 2>/dev/null; then
echo "has_regressions=true" >> "$GITHUB_OUTPUT"
else
echo "has_regressions=false" >> "$GITHUB_OUTPUT"
fi
cat benchmark-output.txt

- name: Update performance baseline
if: |
github.ref == 'refs/heads/main' &&
(github.event.inputs.update_baseline == 'true' || steps.validate.outputs.exit_code == '0')
run: |
bun run scripts/validator-cli.ts benchmark --update-baseline
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add performance-baseline.json
git diff --cached --quiet || git commit -m "chore: update performance baseline [skip ci]"
git push

- name: Upload validation artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: validation-report-${{ github.run_number }}
path: |
validation-results.json
validation-report.html
validation-report.md
retention-days: 30

- name: Post PR comment
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
let body = '## 📊 Measurement Validation Results\n\n';

try {
const md = fs.readFileSync('validation-report.md', 'utf8');
// Include the summary section only to keep the comment compact.
const summaryMatch = md.match(/## Summary[\s\S]*?(?=##|$)/);
if (summaryMatch) body += summaryMatch[0] + '\n';
} catch {}

const exitCode = '${{ steps.validate.outputs.exit_code }}';
const passRate = '${{ steps.validate.outputs.pass_rate }}';
const criticals = '${{ steps.validate.outputs.criticals }}';

if (exitCode === '0') {
body += `\n✅ **All validations passed** — pass rate: ${(parseFloat(passRate) * 100).toFixed(1)}%\n`;
} else if (exitCode === '1') {
body += `\n⚠️ **Warnings detected** — pass rate: ${(parseFloat(passRate) * 100).toFixed(1)}%\n`;
} else {
body += `\n❌ **Critical divergences detected** — ${criticals} critical, pass rate: ${(parseFloat(passRate) * 100).toFixed(1)}%\n`;
}

body += `\n[View full HTML report](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})\n`;

// Find and update existing bot comment, or create new one.
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});

const botComment = comments.find(c =>
c.user.type === 'Bot' && c.body.includes('Measurement Validation Results')
);

if (botComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body,
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body,
});
}

- name: Fail on critical divergences
if: steps.validate.outputs.exit_code == '2'
run: |
echo "::error::Critical measurement divergences detected. Check validation-report.html for details."
exit 2

- name: Notify Slack on regression
if: steps.regression.outputs.has_regressions == 'true' && env.SLACK_WEBHOOK_URL != ''
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
run: |
curl -s -X POST "$SLACK_WEBHOOK_URL" \
-H 'Content-Type: application/json' \
-d '{
"text": "⚡ Performance regression detected in ${{ github.repository }} on branch ${{ github.ref_name }}. See: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
}'
186 changes: 186 additions & 0 deletions docs/measurement-validator/setup.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# Measurement Validator — Setup Guide

A developer tool for detecting divergences between Pretext's canvas-based text measurement and the browser DOM rendering. Supports 20+ languages, structured reports, a live dashboard, and CI/CD integration.

## Quick Start

```bash
# Install (bun required)
bun install

# Run validation on built-in sample texts
bun run scripts/validator-cli.ts validate

# Stream results in real time
bun run scripts/validator-cli.ts validate --stream

# Export an HTML report
bun run scripts/validator-cli.ts validate --report=html --output=report.html
```

## Installation

No extra dependencies are needed for the core validator. The CLI and server use the packages already in `package.json`.

The SQLite database module uses Bun's built-in `bun:sqlite` driver. A pure-JS in-memory fallback is used automatically in environments without SQLite support.

## CI / GitHub Actions

Add the workflow to your repository — it is already included at `.github/workflows/validate.yml`.

### What the workflow does

| Step | Description |
|------|-------------|
| Type-check | `bun run check` — TypeScript + oxlint |
| Unit tests | `bun test` |
| Validation | Runs all sample texts, produces JSON/HTML/Markdown reports |
| Performance check | Compares against `performance-baseline.json` |
| Baseline update | Auto-commits updated baseline on `main` when all pass |
| PR comment | Posts a summary comment with pass rate |
| Artifacts | Uploads `validation-report.html` and `validation-report.md` for 30 days |
| Slack notify | Posts to `SLACK_WEBHOOK_URL` secret on regression (optional) |
| Build failure | Exits non-zero on critical divergences |

### Secrets

| Secret | Purpose |
|--------|---------|
| `SLACK_WEBHOOK_URL` | Optional incoming webhook URL for regression alerts |

## CLI Reference

```
bun run scripts/validator-cli.ts <command> [options]
```

### Commands

| Command | Description |
|---------|-------------|
| `validate` | Run validation on sample texts (default) |
| `report` | Convert existing JSON results to another format |
| `watch` | Re-validate whenever a file changes |
| `stream` | Continuously stream real-time results |
| `trends` | Show historical performance trends |
| `dashboard` | Start the HTTP dashboard server |
| `benchmark` | Run benchmarks; `--update-baseline` to persist |

### Options

| Option | Default | Description |
|--------|---------|-------------|
| `--language=<lang>` | all | Filter to one language (`en`, `ar`, `zh`, …) |
| `--severity=<sev>` | all | Filter: `pass` \| `warning` \| `critical` |
| `--report=<fmt>` | `json` | Output format: `json` \| `csv` \| `markdown` \| `html` |
| `--output=<path>` | stdout | Write report to file |
| `--input=<path>` | — | Input JSON file (for `report` / `watch`) |
| `--db=<path>` | `measurements.db` | SQLite database path |
| `--baseline=<path>` | `performance-baseline.json` | Baseline JSON |
| `--port=<n>` | `3000` | Dashboard server port |
| `--stream` | off | Print each result live |
| `--limit=<n>` | `1000` | Max results to process |

### Exit codes

| Code | Meaning |
|------|---------|
| `0` | All pass |
| `1` | Warnings present |
| `2` | Critical divergences detected |

## Dashboard

```bash
bun run scripts/validator-cli.ts dashboard --port=3000
```

Opens at <http://localhost:3000>. The dashboard provides:

- **Live statistics cards** — total, passed, warnings, criticals, pass rate
- **Performance trends grid** — per-language avg/median/p95/p99/min/max
- **Filterable results table** — search by text, filter by language or severity
- **WebSocket live updates** — results pushed in real time with <50 ms latency

### REST API

| Endpoint | Description |
|----------|-------------|
| `GET /api/results` | All stored results (supports `?language=ar&severity=critical&limit=100`) |
| `GET /api/summary` | Aggregated statistics |
| `GET /api/performance/trends` | Per-language performance metrics |
| `WS /ws` | WebSocket stream for real-time result events |

## Performance Tracking

```bash
# View historical trends from the database
bun run scripts/validator-cli.ts trends

# Benchmark and update baseline
bun run scripts/validator-cli.ts benchmark --update-baseline
```

The baseline file `performance-baseline.json` is version-controlled and updated automatically by the CI workflow on `main`.

Regression thresholds:

| Severity | Threshold |
|----------|-----------|
| Minor | 10–20% slowdown |
| Major | 20–40% slowdown |
| Critical | >40% slowdown |

## Programmatic API

```ts
import {
validateSamples,
buildSummary,
exportToHtml,
computeMetrics,
detectRegressions,
MeasurementDatabase,
SlackNotifier,
DashboardServer,
} from './src/measurement-validator/index.js'

// Validate samples
const results = await validateSamples([
{ text: 'Hello', language: 'en', canvasLineCount: 1, domLineCount: 1 },
])

// Build summary
const summary = buildSummary(results, 0)

// Export HTML report
const html = exportToHtml(results, summary)

// Persist to SQLite
const db = new MeasurementDatabase({ path: 'measurements.db' })
db.insertResults(results)
db.close()

// Check for regressions
const metrics = computeMetrics(results)
const regressions = detectRegressions(compareToBaseline(metrics, baselineEntries))

// Slack notifications
const slack = new SlackNotifier({ webhookUrl: process.env.SLACK_WEBHOOK_URL! })
await slack.notifyValidation(summary)

// Dashboard server
const server = new DashboardServer({ port: 3000 })
server.start()
server.push(results) // broadcast to WebSocket clients
```

## Troubleshooting

**`bun:sqlite` not available** — the database module uses a pure-JS in-memory fallback automatically. All operations work; data is not persisted to disk.

**Dashboard not loading** — ensure port 3000 is free. Use `--port=8080` to change it.

**WebSocket disconnects** — the dashboard auto-reconnects every 3 seconds.

**Baseline out of date** — run `bun run scripts/validator-cli.ts benchmark --update-baseline` locally and commit `performance-baseline.json`, or merge a PR that triggers the CI baseline update.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
"gatsby-sweep:safari": "GATSBY_CHECK_BROWSER=safari bun run scripts/gatsby-sweep.ts",
"generate:bidi-data": "bun run scripts/generate-bidi-data.ts",
"keep-all-check": "bun run scripts/keep-all-check.ts",
"validator": "bun run scripts/validator-cli.ts",
"package-smoke-test": "bun run scripts/package-smoke-test.ts",
"prepack": "rm -rf dist && tsc -p tsconfig.build.json",
"pre-wrap-check": "bun run scripts/pre-wrap-check.ts",
Expand Down
Loading