Himaan1998Y · Copilot · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/docs/measurement-validator.md b/docs/measurement-validator.md
@@ -0,0 +1,173 @@
+# Measurement Validator
+
+Validates Pretext canvas-based text measurements against actual browser DOM
+rendering, surfaces per-line divergences, and classifies root causes.
+
+## Background
+
+Pretext uses `canvas.measureText` to measure text without forcing DOM reflow.
+While this is fast, subtle differences between canvas and DOM rendering can
+appear due to:
+
+- **Font fallback** — the requested font isn't loaded; canvas and DOM each
+  pick different system fallbacks.
+- **Emoji correction** — Chrome/Firefox canvas measures emoji wider than DOM at
+  small font sizes; Pretext auto-corrects, but the correction may over- or
+  under-compensate.
+- **Browser kerning differences** — Safari applies kerning that differs from
+  other browsers.
+- **Bidi/RTL shaping** — complex Arabic or Hebrew shaping can change effective
+  glyph widths compared to the simple sum the canvas reports.
+
+The Measurement Validator runs both pipelines on the same input and reports
+exactly how large those differences are.
+
+## Quick Start
+
+```typescript
+import { compare, buildReport, printReport } from '@chenglou/pretext/measurement-validator'
+
+// Compare a single sample (requires browser environment)
+const result = await compare({
+  text: 'The quick brown fox jumps over the lazy dog.',
+  font: '16px Arial',
+  maxWidth: 300,
+  lineHeight: 20,
+})
+
+console.log(result.metrics.severity) // 'exact' | 'minor' | 'major' | 'critical'
+console.log(result.metrics.maxLineDelta) // e.g. 0.042
+
+// Build and print a human-readable report
+const report = buildReport([result])
+printReport(report)
+```
+
+## API Reference
+
+### `compare(sample: MeasurementSample): Promise<ComparisonResult>`
+
+Compare Pretext against DOM for a single sample. **Requires a browser
+environment** — `document` must be available.
+
+### `compareAll(samples: MeasurementSample[]): Promise<ComparisonResult[]>`
+
+Run `compare` over an array of samples sequentially and return all results.
+
+### `measureDOM(sample: MeasurementSample): Promise<DOMMeasurement>`
+
+Low-level DOM measurement. Creates a hidden container, waits for fonts, then
+uses the Range API to extract per-line widths.
+
+### `buildReport(results: ComparisonResult[]): ValidationReport`
+
+Aggregate comparison results into a structured report with summary counts and
+pass rate.
+
+### `printReport(report: ValidationReport): void`
+
+Print a human-readable summary to `console.log`.
+
+### `toJSON(report: ValidationReport, pretty?: boolean): string`
+
+Serialize a report to JSON.
+
+### `toConsoleText(report: ValidationReport): string`
+
+Return the console summary as a plain string (useful for CI logs).
+
+### `classifySeverity(delta: number): Severity`
+
+Classify an absolute pixel delta into one of four buckets:
+
+| Severity | Delta |
+|----------|-------|
+| `exact`    | < 0.1 px |
+| `minor`    | 0.1–0.5 px |
+| `major`    | 0.5–2.0 px |
+| `critical` | ≥ 2.0 px |
+
+## Types
+
+```typescript
+type MeasurementSample = {
+  text: string
+  font: string
+  maxWidth?: number       // default 300
+  lineHeight?: number     // default 1.2 × font-size
+  whiteSpace?: 'normal' | 'pre-wrap'  // default 'normal'
+  wordBreak?: 'normal' | 'keep-all'   // default 'normal'
+  direction?: 'ltr' | 'rtl'          // default 'ltr'
+  label?: string
+}
+
+type ComparisonResult = {
+  sample: MeasurementSample
+  metrics: DivergenceMetrics
+  lines: LineComparison[]
+  rootCause?: string
+  timestamp: string
+  userAgent: string
+}
+
+type DivergenceMetrics = {
+  lineCountMatch: boolean
+  pretextLineCount: number
+  domLineCount: number
+  maxLineDelta: number
+  averageDelta: number
+  severity: Severity
+}
+
+type LineComparison = {
+  lineNumber: number
+  text: string
+  pretextWidth: number
+  domWidth: number
+  delta: number
+  relativeError: number
+  severity: Severity
+}
+```
+
+## Running the Test Suite
+
+The unit tests for the validator live in `test/measurement-validator.test.ts`
+and can be run with:
+
+```sh
+bun test test/measurement-validator.test.ts
+```
+
+The full suite (including all invariant tests) runs with:
+
+```sh
+bun test
+```
+
+## Test Fixtures
+
+`test/fixtures/english-samples.json` contains 10 English-language samples
+covering a range of font sizes, container widths, wrapping scenarios, and
+`pre-wrap` mode.  These are used by the accuracy browser pages to validate
+Phase 1 coverage.
+
+## Limitations (Phase 1)
+
+- **Browser-only** — DOM measurement requires `document` and `document.fonts`.
+- **English-first** — Phase 1 focuses on LTR English text; RTL and CJK scripts
+  are tracked in Phase 2.
+- **No per-glyph shaping** — measurement is line-level; sub-pixel kerning
+  variations between canvas and DOM are expected for some fonts.
+- **JSDOM** — because JSDOM has no real layout engine, line widths extracted
+  in a JSDOM environment will be zero/unreliable.  Run in a real browser or
+  Playwright for meaningful results.
+
+## Severity Thresholds
+
+| Level    | Max delta | Interpretation |
+|----------|-----------|----------------|
+| exact    | < 0.1 px  | Within sub-pixel rounding tolerance |
+| minor    | 0.1–0.5 px | Acceptable for most uses; investigate for tight layouts |
+| major    | 0.5–2.0 px | Visible in dense text; root cause should be identified |
+| critical | ≥ 2.0 px  | Likely a font or shaping issue; line breaks will diverge |
diff --git a/src/measurement-validator/comparator.ts b/src/measurement-validator/comparator.ts
@@ -0,0 +1,175 @@
+// Comparison engine: runs Pretext layout and DOM measurement on the same
+// sample, then produces a structured ComparisonResult.
+
+import { prepareWithSegments, layoutWithLines } from '../layout.js'
+import { measureDOM } from './dom-adapter.js'
+import {
+  classifySeverity,
+  type ComparisonResult,
+  type DivergenceMetrics,
+  type LineComparison,
+  type MeasurementSample,
+} from './types.js'
+
+/** Parse the numeric font-size (px) from a CSS font string like '16px Arial'. */
+function parseFontSizePx(font: string): number {
+  const m = font.match(/(\d+(?:\.\d+)?)\s*px/)
+  return m !== null ? Number.parseFloat(m[1]!) : 16
+}
+
+/**
+ * Compare Pretext measurements against DOM measurements for a single sample.
+ *
+ * Works in browser environments only (DOM measurement requires `document`).
+ */
+export async function compare(sample: MeasurementSample): Promise<ComparisonResult> {
+  const maxWidth = sample.maxWidth ?? 300
+  const fontSize = parseFontSizePx(sample.font)
+  const lineHeight = sample.lineHeight ?? fontSize * 1.2
+
+  // --- Pretext layout ---
+  const prepared = prepareWithSegments(sample.text, sample.font, {
+    whiteSpace: sample.whiteSpace,
+    wordBreak: sample.wordBreak,
+  })
+  const pretextResult = layoutWithLines(prepared, maxWidth, lineHeight)
+  const pretextLines = pretextResult.lines
+
+  // --- DOM measurement ---
+  const domMeasurement = await measureDOM(sample)
+  const domLines = domMeasurement.lines
+
+  // --- Per-line comparison ---
+  const lineComparisons: LineComparison[] = []
+  const comparableCount = Math.min(pretextLines.length, domLines.length)
+
+  for (let i = 0; i < comparableCount; i++) {
+    const pLine = pretextLines[i]!
+    const dLine = domLines[i]!
+    const delta = Math.abs(pLine.width - dLine.width)
+    const relativeError = dLine.width > 0 ? delta / dLine.width : 0
+    lineComparisons.push({
+      lineNumber: i + 1,
+      text: pLine.text,
+      pretextWidth: pLine.width,
+      domWidth: dLine.width,
+      delta,
+      relativeError,
+      severity: classifySeverity(delta),
+    })
+  }
+
+  // Lines only in Pretext (extra lines).
+  for (let i = comparableCount; i < pretextLines.length; i++) {
+    const pLine = pretextLines[i]!
+    lineComparisons.push({
+      lineNumber: i + 1,
+      text: pLine.text,
+      pretextWidth: pLine.width,
+      domWidth: 0,
+      delta: pLine.width,
+      relativeError: 1,
+      severity: 'critical',
+    })
+  }
+
+  // Lines only in DOM (extra lines).
+  for (let i = comparableCount; i < domLines.length; i++) {
+    const dLine = domLines[i]!
+    lineComparisons.push({
+      lineNumber: i + 1,
+      text: dLine.text,
+      pretextWidth: 0,
+      domWidth: dLine.width,
+      delta: dLine.width,
+      relativeError: 1,
+      severity: 'critical',
+    })
+  }
+
+  // --- Aggregate metrics ---
+  const maxDelta = lineComparisons.reduce((acc, l) => Math.max(acc, l.delta), 0)
+  const avgDelta =
+    lineComparisons.length > 0
+      ? lineComparisons.reduce((acc, l) => acc + l.delta, 0) / lineComparisons.length
+      : 0
+
+  const metrics: DivergenceMetrics = {
+    lineCountMatch: pretextLines.length === domLines.length,
+    pretextLineCount: pretextLines.length,
+    domLineCount: domLines.length,
+    maxLineDelta: maxDelta,
+    averageDelta: avgDelta,
+    severity: classifySeverity(maxDelta),
+  }
+
+  const userAgent = typeof navigator !== 'undefined' ? navigator.userAgent : ''
+
+  return {
+    sample,
+    metrics,
+    lines: lineComparisons,
+    rootCause: detectRootCause(metrics, sample, userAgent),
+    timestamp: new Date().toISOString(),
+    userAgent,
+  }
+}
+
+/**
+ * Run compare() on an array of samples and return all results.
+ */
+export async function compareAll(samples: MeasurementSample[]): Promise<ComparisonResult[]> {
+  const results: ComparisonResult[] = []
+  for (const sample of samples) {
+    results.push(await compare(sample))
+  }
+  return results
+}
+
+// Very lightweight root-cause heuristics — kept intentionally narrow for Phase 1.
+
+// Characters in RTL scripts: Hebrew (U+0590–U+05FF), Arabic (U+0600–U+06FF),
+// Arabic Supplement (U+0750–U+077F), Arabic Extended-A/B (U+08A0–U+08FF),
+// Arabic Presentation Forms-A (U+FB1D–U+FB4F).
+const RTL_CHAR_RE = /[\u0590-\u08FF\uFB1D-\uFB4F]/
+
+// Emoji glyphs that render as pictures (as opposed to text presentation).
+const EMOJI_PRESENTATION_RE = /\p{Emoji_Presentation}/u
+
+function detectRootCause(
+  metrics: DivergenceMetrics,
+  sample: MeasurementSample,
+  userAgent: string,
+): string | undefined {
+  if (metrics.severity === 'exact') return undefined
+
+  // Bidi / RTL text.
+  if (sample.direction === 'rtl' || RTL_CHAR_RE.test(sample.text)) {
+    if (metrics.severity === 'major' || metrics.severity === 'critical') {
+      return 'Possible bidi/RTL shaping divergence: RTL characters detected with significant delta'
+    }
+  }
+
+  // Emoji.
+  if (EMOJI_PRESENTATION_RE.test(sample.text)) {
+    return 'Possible emoji correction divergence: emoji glyphs detected'
+  }
+
+  // Line count mismatch is its own class.
+  if (!metrics.lineCountMatch) {
+    return `Line count mismatch: Pretext produced ${metrics.pretextLineCount} lines, DOM produced ${metrics.domLineCount}`
+  }
+
+  // Safari kerning.
+  if (userAgent.includes('Safari') && !userAgent.includes('Chrome')) {
+    if (metrics.severity === 'major' || metrics.severity === 'critical') {
+      return 'Possible Safari kerning/hinting divergence'
+    }
+  }
+
+  if (metrics.severity === 'major' || metrics.severity === 'critical') {
+    return 'Unknown measurement divergence — check font loading or canvas calibration'
+  }
+
+  return undefined
+}