From 9b3bce86f09dae7c41eee691e172516865861d40 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:53:00 +0000
Subject: [PATCH 1/3] Initial plan


From be6abe681f4b5057e670bf8f218a42b2a55475ac Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 4 Apr 2026 20:01:27 +0000
Subject: [PATCH 2/3] feat: add measurement validator module (Phase 1)

Agent-Logs-Url: https://github.com/Himaan1998Y/pretext/sessions/64d3a3ab-a278-4221-abf3-57b5769d9c26

Co-authored-by: Himaan1998Y <210527591+Himaan1998Y@users.noreply.github.com>
---
 docs/measurement-validator.md                 | 173 ++++++++++++
 src/measurement-validator/comparator.ts       | 166 ++++++++++++
 src/measurement-validator/dom-adapter.ts      | 145 ++++++++++
 src/measurement-validator/index.ts            |  33 +++
 src/measurement-validator/report-generator.ts | 107 ++++++++
 src/measurement-validator/types.ts            | 131 +++++++++
 test/fixtures/english-samples.json            |  86 ++++++
 test/measurement-validator.test.ts            | 251 ++++++++++++++++++
 8 files changed, 1092 insertions(+)
 create mode 100644 docs/measurement-validator.md
 create mode 100644 src/measurement-validator/comparator.ts
 create mode 100644 src/measurement-validator/dom-adapter.ts
 create mode 100644 src/measurement-validator/index.ts
 create mode 100644 src/measurement-validator/report-generator.ts
 create mode 100644 src/measurement-validator/types.ts
 create mode 100644 test/fixtures/english-samples.json
 create mode 100644 test/measurement-validator.test.ts
diff --git a/docs/measurement-validator.md b/docs/measurement-validator.md
new file mode 100644
index 00000000..719fe094
--- /dev/null
+++ b/docs/measurement-validator.md
@@ -0,0 +1,173 @@
+# Measurement Validator
+
+Validates Pretext canvas-based text measurements against actual browser DOM
+rendering, surfaces per-line divergences, and classifies root causes.
+
+## Background
+
+Pretext uses `canvas.measureText` to measure text without forcing DOM reflow.
+While this is fast, subtle differences between canvas and DOM rendering can
+appear due to:
+
+- **Font fallback** — the requested font isn't loaded; canvas and DOM each
+  pick different system fallbacks.
+- **Emoji correction** — Chrome/Firefox canvas measures emoji wider than DOM at
+  small font sizes; Pretext auto-corrects, but the correction may over- or
+  under-compensate.
+- **Browser kerning differences** — Safari applies kerning that differs from
+  other browsers.
+- **Bidi/RTL shaping** — complex Arabic or Hebrew shaping can change effective
+  glyph widths compared to the simple sum the canvas reports.
+
+The Measurement Validator runs both pipelines on the same input and reports
+exactly how large those differences are.
+
+## Quick Start
+
+```typescript
+import { compare, buildReport, printReport } from '@chenglou/pretext/measurement-validator'
+
+// Compare a single sample (requires browser environment)
+const result = await compare({
+  text: 'The quick brown fox jumps over the lazy dog.',
+  font: '16px Arial',
+  maxWidth: 300,
+  lineHeight: 20,
+})
+
+console.log(result.metrics.severity) // 'exact' | 'minor' | 'major' | 'critical'
+console.log(result.metrics.maxLineDelta) // e.g. 0.042
+
+// Build and print a human-readable report
+const report = buildReport([result])
+printReport(report)
+```
+
+## API Reference
+
+### `compare(sample: MeasurementSample): Promise<ComparisonResult>`
+
+Compare Pretext against DOM for a single sample. **Requires a browser
+environment** — `document` must be available.
+
+### `compareAll(samples: MeasurementSample[]): Promise<ComparisonResult[]>`
+
+Run `compare` over an array of samples sequentially and return all results.
+
+### `measureDOM(sample: MeasurementSample): Promise<DOMMeasurement>`
+
+Low-level DOM measurement. Creates a hidden container, waits for fonts, then
+uses the Range API to extract per-line widths.
+
+### `buildReport(results: ComparisonResult[]): ValidationReport`
+
+Aggregate comparison results into a structured report with summary counts and
+pass rate.
+
+### `printReport(report: ValidationReport): void`
+
+Print a human-readable summary to `console.log`.
+
+### `toJSON(report: ValidationReport, pretty?: boolean): string`
+
+Serialize a report to JSON.
+
+### `toConsoleText(report: ValidationReport): string`
+
+Return the console summary as a plain string (useful for CI logs).
+
+### `classifySeverity(delta: number): Severity`
+
+Classify an absolute pixel delta into one of four buckets:
+
+| Severity | Delta |
+|----------|-------|
+| `exact`    | < 0.1 px |
+| `minor`    | 0.1–0.5 px |
+| `major`    | 0.5–2.0 px |
+| `critical` | ≥ 2.0 px |
+
+## Types
+
+```typescript
+type MeasurementSample = {
+  text: string
+  font: string
+  maxWidth?: number       // default 300
+  lineHeight?: number     // default 1.2 × font-size
+  whiteSpace?: 'normal' | 'pre-wrap'  // default 'normal'
+  wordBreak?: 'normal' | 'keep-all'   // default 'normal'
+  direction?: 'ltr' | 'rtl'          // default 'ltr'
+  label?: string
+}
+
+type ComparisonResult = {
+  sample: MeasurementSample
+  metrics: DivergenceMetrics
+  lines: LineComparison[]
+  rootCause?: string
+  timestamp: string
+  userAgent: string
+}
+
+type DivergenceMetrics = {
+  lineCountMatch: boolean
+  pretextLineCount: number
+  domLineCount: number
+  maxLineDelta: number
+  averageDelta: number
+  severity: Severity
+}
+
+type LineComparison = {
+  lineNumber: number
+  text: string
+  pretextWidth: number
+  domWidth: number
+  delta: number
+  relativeError: number
+  severity: Severity
+}
+```
+
+## Running the Test Suite
+
+The unit tests for the validator live in `test/measurement-validator.test.ts`
+and can be run with:
+
+```sh
+bun test test/measurement-validator.test.ts
+```
+
+The full suite (including all invariant tests) runs with:
+
+```sh
+bun test
+```
+
+## Test Fixtures
+
+`test/fixtures/english-samples.json` contains 10 English-language samples
+covering a range of font sizes, container widths, wrapping scenarios, and
+`pre-wrap` mode.  These are used by the accuracy browser pages to validate
+Phase 1 coverage.
+
+## Limitations (Phase 1)
+
+- **Browser-only** — DOM measurement requires `document` and `document.fonts`.
+- **English-first** — Phase 1 focuses on LTR English text; RTL and CJK scripts
+  are tracked in Phase 2.
+- **No per-glyph shaping** — measurement is line-level; sub-pixel kerning
+  variations between canvas and DOM are expected for some fonts.
+- **JSDOM** — because JSDOM has no real layout engine, line widths extracted
+  in a JSDOM environment will be zero/unreliable.  Run in a real browser or
+  Playwright for meaningful results.
+
+## Severity Thresholds
+
+| Level    | Max delta | Interpretation |
+|----------|-----------|----------------|
+| exact    | < 0.1 px  | Within sub-pixel rounding tolerance |
+| minor    | 0.1–0.5 px | Acceptable for most uses; investigate for tight layouts |
+| major    | 0.5–2.0 px | Visible in dense text; root cause should be identified |
+| critical | ≥ 2.0 px  | Likely a font or shaping issue; line breaks will diverge |
diff --git a/src/measurement-validator/comparator.ts b/src/measurement-validator/comparator.ts
new file mode 100644
index 00000000..d227ca83
--- /dev/null
+++ b/src/measurement-validator/comparator.ts
@@ -0,0 +1,166 @@
+// Comparison engine: runs Pretext layout and DOM measurement on the same
+// sample, then produces a structured ComparisonResult.
+
+import { prepareWithSegments, layoutWithLines } from '../layout.js'
+import { measureDOM } from './dom-adapter.js'
+import {
+  classifySeverity,
+  type ComparisonResult,
+  type DivergenceMetrics,
+  type LineComparison,
+  type MeasurementSample,
+} from './types.js'
+
+/** Parse the numeric font-size (px) from a CSS font string like '16px Arial'. */
+function parseFontSizePx(font: string): number {
+  const m = font.match(/(\d+(?:\.\d+)?)\s*px/)
+  return m !== null ? Number.parseFloat(m[1]!) : 16
+}
+
+/**
+ * Compare Pretext measurements against DOM measurements for a single sample.
+ *
+ * Works in browser environments only (DOM measurement requires `document`).
+ */
+export async function compare(sample: MeasurementSample): Promise<ComparisonResult> {
+  const maxWidth = sample.maxWidth ?? 300
+  const fontSize = parseFontSizePx(sample.font)
+  const lineHeight = sample.lineHeight ?? fontSize * 1.2
+
+  // --- Pretext layout ---
+  const prepared = prepareWithSegments(sample.text, sample.font, {
+    whiteSpace: sample.whiteSpace,
+    wordBreak: sample.wordBreak,
+  })
+  const pretextResult = layoutWithLines(prepared, maxWidth, lineHeight)
+  const pretextLines = pretextResult.lines
+
+  // --- DOM measurement ---
+  const domMeasurement = await measureDOM(sample)
+  const domLines = domMeasurement.lines
+
+  // --- Per-line comparison ---
+  const lineComparisons: LineComparison[] = []
+  const comparableCount = Math.min(pretextLines.length, domLines.length)
+
+  for (let i = 0; i < comparableCount; i++) {
+    const pLine = pretextLines[i]!
+    const dLine = domLines[i]!
+    const delta = Math.abs(pLine.width - dLine.width)
+    const relativeError = dLine.width > 0 ? delta / dLine.width : 0
+    lineComparisons.push({
+      lineNumber: i + 1,
+      text: pLine.text,
+      pretextWidth: pLine.width,
+      domWidth: dLine.width,
+      delta,
+      relativeError,
+      severity: classifySeverity(delta),
+    })
+  }
+
+  // Lines only in Pretext (extra lines).
+  for (let i = comparableCount; i < pretextLines.length; i++) {
+    const pLine = pretextLines[i]!
+    lineComparisons.push({
+      lineNumber: i + 1,
+      text: pLine.text,
+      pretextWidth: pLine.width,
+      domWidth: 0,
+      delta: pLine.width,
+      relativeError: 1,
+      severity: 'critical',
+    })
+  }
+
+  // Lines only in DOM (extra lines).
+  for (let i = comparableCount; i < domLines.length; i++) {
+    const dLine = domLines[i]!
+    lineComparisons.push({
+      lineNumber: i + 1,
+      text: dLine.text,
+      pretextWidth: 0,
+      domWidth: dLine.width,
+      delta: dLine.width,
+      relativeError: 1,
+      severity: 'critical',
+    })
+  }
+
+  // --- Aggregate metrics ---
+  const maxDelta = lineComparisons.reduce((acc, l) => Math.max(acc, l.delta), 0)
+  const avgDelta =
+    lineComparisons.length > 0
+      ? lineComparisons.reduce((acc, l) => acc + l.delta, 0) / lineComparisons.length
+      : 0
+
+  const metrics: DivergenceMetrics = {
+    lineCountMatch: pretextLines.length === domLines.length,
+    pretextLineCount: pretextLines.length,
+    domLineCount: domLines.length,
+    maxLineDelta: maxDelta,
+    averageDelta: avgDelta,
+    severity: classifySeverity(maxDelta),
+  }
+
+  const userAgent = typeof navigator !== 'undefined' ? navigator.userAgent : ''
+
+  return {
+    sample,
+    metrics,
+    lines: lineComparisons,
+    rootCause: detectRootCause(metrics, sample, userAgent),
+    timestamp: new Date().toISOString(),
+    userAgent,
+  }
+}
+
+/**
+ * Run compare() on an array of samples and return all results.
+ */
+export async function compareAll(samples: MeasurementSample[]): Promise<ComparisonResult[]> {
+  const results: ComparisonResult[] = []
+  for (const sample of samples) {
+    results.push(await compare(sample))
+  }
+  return results
+}
+
+// Very lightweight root-cause heuristics — kept intentionally narrow for Phase 1.
+function detectRootCause(
+  metrics: DivergenceMetrics,
+  sample: MeasurementSample,
+  userAgent: string,
+): string | undefined {
+  if (metrics.severity === 'exact') return undefined
+
+  // Bidi / RTL text.
+  if (sample.direction === 'rtl' || /[\u0590-\u08FF\uFB1D-\uFB4F]/.test(sample.text)) {
+    if (metrics.severity === 'major' || metrics.severity === 'critical') {
+      return 'Possible bidi/RTL shaping divergence: RTL characters detected with significant delta'
+    }
+  }
+
+  // Emoji.
+  if (/\p{Emoji_Presentation}/u.test(sample.text)) {
+    return 'Possible emoji correction divergence: emoji glyphs detected'
+  }
+
+  // Line count mismatch is its own class.
+  if (!metrics.lineCountMatch) {
+    return `Line count mismatch: Pretext produced ${metrics.pretextLineCount} lines, DOM produced ${metrics.domLineCount}`
+  }
+
+  // Safari kerning.
+  if (userAgent.includes('Safari') && !userAgent.includes('Chrome')) {
+    if (metrics.severity === 'major' || metrics.severity === 'critical') {
+      return 'Possible Safari kerning/hinting divergence'
+    }
+  }
+
+  if (metrics.severity === 'major' || metrics.severity === 'critical') {
+    return 'Unknown measurement divergence — check font loading or canvas calibration'
+  }
+
+  return undefined
+}
diff --git a/src/measurement-validator/dom-adapter.ts b/src/measurement-validator/dom-adapter.ts
new file mode 100644
index 00000000..916a12c3
--- /dev/null
+++ b/src/measurement-validator/dom-adapter.ts
@@ -0,0 +1,145 @@
+// DOM adapter for measuring text using actual browser rendering.
+//
+// Uses the Range API to measure line widths after laying text out in a hidden
+// DOM container. This is ground-truth for Pretext's canvas-based measurements.
+//
+// Only works in a browser environment (document / document.fonts must exist).
+
+import type { DOMLine, DOMMeasurement, MeasurementSample } from './types.js'
+
+/**
+ * Measure text in the DOM and return per-line widths.
+ *
+ * The function creates a temporary hidden container, sets font/width/direction
+ * styling on it, appends it to `document.body`, waits for fonts, walks the
+ * layout with Range selection to extract line rects, then removes the element.
+ */
+export async function measureDOM(sample: MeasurementSample): Promise<DOMMeasurement> {
+  if (typeof document === 'undefined') {
+    throw new Error('measureDOM requires a browser environment (document not available)')
+  }
+
+  const container = document.createElement('div')
+  applyStyles(container, sample)
+  container.textContent = sample.text
+  document.body.appendChild(container)
+
+  try {
+    // Wait for fonts to be ready so metrics are correct.
+    if (typeof document.fonts !== 'undefined') {
+      await document.fonts.ready
+    }
+
+    const lines = extractLines(container, sample)
+    const totalHeight = container.getBoundingClientRect().height
+    return { lines, totalHeight }
+  } finally {
+    document.body.removeChild(container)
+  }
+}
+
+function applyStyles(el: HTMLDivElement, sample: MeasurementSample): void {
+  const style = el.style
+  style.position = 'absolute'
+  style.top = '-9999px'
+  style.left = '-9999px'
+  style.visibility = 'hidden'
+  style.pointerEvents = 'none'
+  style.font = sample.font
+  style.whiteSpace = sample.whiteSpace === 'pre-wrap' ? 'pre-wrap' : 'normal'
+  style.wordBreak = sample.wordBreak === 'keep-all' ? 'keep-all' : 'normal'
+  style.direction = sample.direction === 'rtl' ? 'rtl' : 'ltr'
+  style.overflowWrap = 'break-word'
+  style.lineBreak = 'auto'
+  style.margin = '0'
+  style.padding = '0'
+  style.border = 'none'
+  style.boxSizing = 'content-box'
+
+  const maxWidth = sample.maxWidth ?? 300
+  style.width = `${maxWidth}px`
+
+  if (sample.lineHeight !== undefined) {
+    style.lineHeight = `${sample.lineHeight}px`
+  }
+}
+
+/**
+ * Walk the text node character-by-character using Range to detect line breaks
+ * and accumulate per-line widths.
+ */
+function extractLines(container: HTMLDivElement, sample: MeasurementSample): DOMLine[] {
+  const textNode = container.firstChild
+  if (textNode === null || textNode.nodeType !== Node.TEXT_NODE) {
+    return []
+  }
+
+  const text = sample.text
+  if (text.length === 0) return []
+
+  const range = document.createRange()
+  const lines: DOMLine[] = []
+
+  // Group characters by their top-offset (y-position) to detect line breaks.
+  // We probe every grapheme cluster boundary using a Range around each character.
+  const segmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' })
+  const graphemes: Array<{ segment: string; index: number }> = []
+  for (const seg of segmenter.segment(text)) {
+    graphemes.push({ segment: seg.segment, index: seg.index })
+  }
+
+  if (graphemes.length === 0) return []
+
+  // Measure each grapheme rect.
+  type GraphemeRect = { top: number; right: number; width: number; segment: string; index: number }
+  const rects: GraphemeRect[] = []
+
+  for (const g of graphemes) {
+    const start = g.index
+    const end = start + g.segment.length
+    range.setStart(textNode, start)
+    range.setEnd(textNode, end)
+    const rect = range.getBoundingClientRect()
+    rects.push({ top: rect.top, right: rect.right, width: rect.width, segment: g.segment, index: g.index })
+  }
+
+  // Group by line (same top ± tolerance).
+  const LINE_TOP_TOLERANCE = 1
+  let currentLineTop = rects[0]!.top
+  let currentLineText = ''
+  let currentLineMaxRight = 0
+  let currentLineMinLeft = rects[0]!.right - rects[0]!.width
+
+  for (const r of rects) {
+    if (Math.abs(r.top - currentLineTop) > LINE_TOP_TOLERANCE) {
+      // New line detected — push the previous one.
+      lines.push({
+        text: currentLineText,
+        width: currentLineMaxRight - currentLineMinLeft,
+      })
+      currentLineText = r.segment
+      currentLineTop = r.top
+      currentLineMinLeft = r.right - r.width
+      currentLineMaxRight = r.right
+    } else {
+      currentLineText += r.segment
+      if (r.right > currentLineMaxRight) currentLineMaxRight = r.right
+    }
+  }
+  // Push the last line.
+  lines.push({
+    text: currentLineText,
+    width: currentLineMaxRight - currentLineMinLeft,
+  })
+
+  // Sanity-check: if we got zero width on all lines (e.g. JSDOM has no layout
+  // engine), fall back to a single line with the container width.
+  const allZeroWidths = lines.every(l => l.width === 0)
+  if (allZeroWidths) {
+    const containerWidth = container.getBoundingClientRect().width
+    return [{ text, width: containerWidth > 0 ? containerWidth : (sample.maxWidth ?? 300) }]
+  }
+
+  return lines
+}
+
diff --git a/src/measurement-validator/index.ts b/src/measurement-validator/index.ts
new file mode 100644
index 00000000..0770cfca
--- /dev/null
+++ b/src/measurement-validator/index.ts
@@ -0,0 +1,33 @@
+// Public API for the Measurement Validator module.
+//
+// Usage example (browser environment):
+//
+//   import { compare, compareAll, buildReport, printReport } from '@chenglou/pretext/measurement-validator'
+//
+//   const result = await compare({ text: 'Hello world', font: '16px Arial', maxWidth: 200 })
+//   console.log(result.metrics.severity) // 'exact' | 'minor' | 'major' | 'critical'
+//
+//   const report = buildReport([result])
+//   printReport(report)
+
+export { compare, compareAll } from './comparator.js'
+export { measureDOM } from './dom-adapter.js'
+export { buildReport, printReport, toConsoleText, toJSON } from './report-generator.js'
+export {
+  classifySeverity,
+  THRESHOLD_EXACT,
+  THRESHOLD_MAJOR,
+  THRESHOLD_MINOR,
+  type ComparisonResult,
+  type DivergenceMetrics,
+  type DOMMeasurement,
+  type DOMLine,
+  type LineComparison,
+  type MeasurementSample,
+  type ReportSummary,
+  type Severity,
+  type TextDirection,
+  type ValidationReport,
+  type WhiteSpaceMode,
+  type WordBreakMode,
+} from './types.js'
diff --git a/src/measurement-validator/report-generator.ts b/src/measurement-validator/report-generator.ts
new file mode 100644
index 00000000..6d4fe08b
--- /dev/null
+++ b/src/measurement-validator/report-generator.ts
@@ -0,0 +1,107 @@
+// Report generator: formats ComparisonResult arrays into JSON, console
+// summaries, and human-readable text reports.
+
+import type {
+  ComparisonResult,
+  ReportSummary,
+  Severity,
+  ValidationReport,
+} from './types.js'
+
+// --- Summary helpers ---
+
+function buildSummary(results: ComparisonResult[]): ReportSummary {
+  let exact = 0
+  let minor = 0
+  let major = 0
+  let critical = 0
+  let lineCountMismatches = 0
+
+  for (const r of results) {
+    const s: Severity = r.metrics.severity
+    if (s === 'exact') exact++
+    else if (s === 'minor') minor++
+    else if (s === 'major') major++
+    else critical++
+
+    if (!r.metrics.lineCountMatch) lineCountMismatches++
+  }
+
+  const total = results.length
+  const passRate = total > 0 ? ((exact + minor) / total) * 100 : 100
+
+  return { total, exact, minor, major, critical, lineCountMismatches, passRate }
+}
+
+// --- JSON report ---
+
+/**
+ * Build a structured ValidationReport from an array of comparison results.
+ */
+export function buildReport(results: ComparisonResult[]): ValidationReport {
+  const userAgent = results.length > 0 && results[0] !== undefined ? results[0].userAgent : ''
+  return {
+    summary: buildSummary(results),
+    results,
+    generatedAt: new Date().toISOString(),
+    userAgent,
+  }
+}
+
+/**
+ * Serialize a ValidationReport to a JSON string.
+ */
+export function toJSON(report: ValidationReport, pretty = false): string {
+  return JSON.stringify(report, null, pretty ? 2 : undefined)
+}
+
+// --- Console / text report ---
+
+const SEVERITY_ICON: Record<Severity, string> = {
+  exact: '✅',
+  minor: '🟡',
+  major: '🟠',
+  critical: '🔴',
+}
+
+/**
+ * Return a compact multiline console summary string.
+ */
+export function toConsoleText(report: ValidationReport): string {
+  const { summary } = report
+  const lines: string[] = [
+    '╔══════════════════════════════════════╗',
+    '║   Measurement Validator — Summary    ║',
+    '╚══════════════════════════════════════╝',
+    `  Total samples : ${summary.total}`,
+    `  ✅ Exact      : ${summary.exact}`,
+    `  🟡 Minor      : ${summary.minor}`,
+    `  🟠 Major      : ${summary.major}`,
+    `  🔴 Critical   : ${summary.critical}`,
+    `  Line mismatches: ${summary.lineCountMismatches}`,
+    `  Pass rate     : ${summary.passRate.toFixed(1)}%`,
+    '',
+  ]
+
+  for (const result of report.results) {
+    const icon = SEVERITY_ICON[result.metrics.severity]
+    const label = result.sample.label ?? result.sample.text.slice(0, 40)
+    lines.push(`${icon} [${result.metrics.severity.toUpperCase()}] ${label}`)
+    lines.push(
+      `     Pretext lines: ${result.metrics.pretextLineCount}  DOM lines: ${result.metrics.domLineCount}  maxΔ: ${result.metrics.maxLineDelta.toFixed(3)}px  avgΔ: ${result.metrics.averageDelta.toFixed(3)}px`,
+    )
+    if (result.rootCause !== undefined) {
+      lines.push(`     ⚠️  Root cause: ${result.rootCause}`)
+    }
+    lines.push('')
+  }
+
+  return lines.join('\n')
+}
+
+/**
+ * Print the report to console.log.
+ */
+export function printReport(report: ValidationReport): void {
+  console.log(toConsoleText(report))
+}
diff --git a/src/measurement-validator/types.ts b/src/measurement-validator/types.ts
new file mode 100644
index 00000000..f5ef54a7
--- /dev/null
+++ b/src/measurement-validator/types.ts
@@ -0,0 +1,131 @@
+// Type definitions for the Measurement Validator module.
+//
+// The validator compares Pretext canvas-based measurements against actual
+// browser DOM rendering (ground truth) to surface divergences and their
+// likely root causes.
+
+// --- Input types ---
+
+export type WhiteSpaceMode = 'normal' | 'pre-wrap'
+export type WordBreakMode = 'normal' | 'keep-all'
+export type TextDirection = 'ltr' | 'rtl'
+
+export type MeasurementSample = {
+  /** The text to measure and compare. */
+  text: string
+  /** CSS font string, e.g. '16px Arial'. */
+  font: string
+  /** Container width in pixels. Defaults to 300. */
+  maxWidth?: number
+  /** Line height in pixels. Defaults to 1.2 * parsed font size. */
+  lineHeight?: number
+  /** CSS white-space mode. Defaults to 'normal'. */
+  whiteSpace?: WhiteSpaceMode
+  /** CSS word-break mode. Defaults to 'normal'. */
+  wordBreak?: WordBreakMode
+  /** Text direction. Defaults to 'ltr'. */
+  direction?: TextDirection
+  /** Optional label for reporting. */
+  label?: string
+}
+
+// --- DOM measurement types ---
+
+export type DOMLine = {
+  /** Rendered line text extracted from DOM. */
+  text: string
+  /** Measured width of this line from the DOM (Range API). */
+  width: number
+}
+
+export type DOMMeasurement = {
+  lines: DOMLine[]
+  /** Total rendered height of the container. */
+  totalHeight: number
+}
+
+// --- Comparison types ---
+
+export type Severity = 'exact' | 'minor' | 'major' | 'critical'
+
+export type LineComparison = {
+  /** 1-based line number. */
+  lineNumber: number
+  /** Line text (from Pretext; DOM text when Pretext line count differs). */
+  text: string
+  /** Width reported by Pretext. */
+  pretextWidth: number
+  /** Width measured from the DOM. */
+  domWidth: number
+  /** Absolute delta: |pretextWidth - domWidth|. */
+  delta: number
+  /** Relative error as a fraction of domWidth. */
+  relativeError: number
+  /** Per-line severity classification. */
+  severity: Severity
+}
+
+export type DivergenceMetrics = {
+  /** True when Pretext and DOM agree on the number of lines. */
+  lineCountMatch: boolean
+  /** Number of lines produced by Pretext. */
+  pretextLineCount: number
+  /** Number of lines measured from the DOM. */
+  domLineCount: number
+  /** Maximum absolute delta across all comparable lines. */
+  maxLineDelta: number
+  /** Average absolute delta across all comparable lines. */
+  averageDelta: number
+  /** Overall severity based on maxLineDelta. */
+  severity: Severity
+}
+
+export type ComparisonResult = {
+  /** Original sample that was compared. */
+  sample: MeasurementSample
+  /** Aggregate divergence metrics. */
+  metrics: DivergenceMetrics
+  /** Per-line comparison details. */
+  lines: LineComparison[]
+  /** Human-readable root cause if a known pattern was detected. */
+  rootCause?: string | undefined
+  /** ISO 8601 timestamp of when the comparison was performed. */
+  timestamp: string
+  /** navigator.userAgent at the time of comparison (empty in non-browser). */
+  userAgent: string
+}
+
+// --- Report types ---
+
+export type ReportSummary = {
+  total: number
+  exact: number
+  minor: number
+  major: number
+  critical: number
+  lineCountMismatches: number
+  passRate: number
+}
+
+export type ValidationReport = {
+  summary: ReportSummary
+  results: ComparisonResult[]
+  generatedAt: string
+  userAgent: string
+}
+
+// --- Severity thresholds ---
+
+/** < 0.1 px  → exact */
+export const THRESHOLD_EXACT = 0.1
+/** 0.1–0.5 px → minor */
+export const THRESHOLD_MINOR = 0.5
+/** 0.5–2 px  → major */
+export const THRESHOLD_MAJOR = 2.0
+
+export function classifySeverity(delta: number): Severity {
+  if (delta < THRESHOLD_EXACT) return 'exact'
+  if (delta < THRESHOLD_MINOR) return 'minor'
+  if (delta < THRESHOLD_MAJOR) return 'major'
+  return 'critical'
+}
diff --git a/test/fixtures/english-samples.json b/test/fixtures/english-samples.json
new file mode 100644
index 00000000..e4fd54d4
--- /dev/null
+++ b/test/fixtures/english-samples.json
@@ -0,0 +1,86 @@
+{
+  "description": "English-language test samples for the Measurement Validator Phase 1 corpus.",
+  "samples": [
+    {
+      "id": "en-short-word",
+      "label": "Short English word",
+      "text": "Hello",
+      "font": "16px Arial",
+      "maxWidth": 300,
+      "lineHeight": 20
+    },
+    {
+      "id": "en-short-sentence",
+      "label": "Short English sentence",
+      "text": "Hello, world!",
+      "font": "16px Arial",
+      "maxWidth": 300,
+      "lineHeight": 20
+    },
+    {
+      "id": "en-wrapping-sentence",
+      "label": "Sentence that wraps at narrow width",
+      "text": "The quick brown fox jumps over the lazy dog.",
+      "font": "16px Arial",
+      "maxWidth": 200,
+      "lineHeight": 20
+    },
+    {
+      "id": "en-long-paragraph",
+      "label": "Multi-line paragraph",
+      "text": "Text measurement is a fundamental challenge in web rendering. Accurate measurement enables pixel-perfect layouts without constant DOM reflow.",
+      "font": "16px Arial",
+      "maxWidth": 300,
+      "lineHeight": 20
+    },
+    {
+      "id": "en-narrow-container",
+      "label": "Narrow container forces many wraps",
+      "text": "Each word on its own line when the container is narrow enough.",
+      "font": "16px Arial",
+      "maxWidth": 80,
+      "lineHeight": 20
+    },
+    {
+      "id": "en-large-font",
+      "label": "Large font size",
+      "text": "Big text",
+      "font": "32px Arial",
+      "maxWidth": 300,
+      "lineHeight": 40
+    },
+    {
+      "id": "en-small-font",
+      "label": "Small font size",
+      "text": "Small text fits more words per line",
+      "font": "12px Arial",
+      "maxWidth": 200,
+      "lineHeight": 16
+    },
+    {
+      "id": "en-numbers-punctuation",
+      "label": "Numbers and punctuation",
+      "text": "Version 3.14.159 released on 2024-01-15.",
+      "font": "16px Arial",
+      "maxWidth": 300,
+      "lineHeight": 20
+    },
+    {
+      "id": "en-pre-wrap-newlines",
+      "label": "Hard line breaks in pre-wrap mode",
+      "text": "Line one\nLine two\nLine three",
+      "font": "16px Arial",
+      "maxWidth": 300,
+      "lineHeight": 20,
+      "whiteSpace": "pre-wrap"
+    },
+    {
+      "id": "en-long-word",
+      "label": "Word longer than container (forced grapheme break)",
+      "text": "supercalifragilisticexpialidocious",
+      "font": "16px Arial",
+      "maxWidth": 100,
+      "lineHeight": 20
+    }
+  ]
+}
diff --git a/test/measurement-validator.test.ts b/test/measurement-validator.test.ts
new file mode 100644
index 00000000..1cf2a1a3
--- /dev/null
+++ b/test/measurement-validator.test.ts
@@ -0,0 +1,251 @@
+import { beforeAll, beforeEach, describe, expect, test } from 'bun:test'
+
+// Measurement Validator integration tests.
+//
+// These tests exercise the pure-logic portions of the validator (types,
+// severity classification, report generation, and comparator arithmetic)
+// using the same fake canvas backend as layout.test.ts.
+//
+// DOM-dependent paths (measureDOM / compare / compareAll) require a real
+// browser and are covered by the browser accuracy pages instead of this suite.
+
+// ---------------------------------------------------------------------------
+// Re-use the same fake canvas from layout.test.ts so that prepareWithSegments
+// and layoutWithLines give deterministic measurements in the test environment.
+// ---------------------------------------------------------------------------
+
+function parseFontSize(font: string): number {
+  const m = font.match(/(\d+(?:\.\d+)?)\s*px/)
+  return m !== null ? Number.parseFloat(m[1]!) : 16
+}
+
+const emojiPresentationRe = /\p{Emoji_Presentation}/u
+const punctuationRe = /[.,!?;:%)\]}'""'»›…—-]/u
+const decimalDigitRe = /\p{Nd}/u
+
+function measureWidth(text: string, font: string): number {
+  const fontSize = parseFontSize(font)
+  let width = 0
+  let previousWasDecimalDigit = false
+
+  for (const ch of text) {
+    if (ch === ' ') {
+      width += fontSize * 0.33
+      previousWasDecimalDigit = false
+    } else if (ch === '\t') {
+      width += fontSize * 1.32
+      previousWasDecimalDigit = false
+    } else if (emojiPresentationRe.test(ch) || ch === '\uFE0F') {
+      width += fontSize
+      previousWasDecimalDigit = false
+    } else if (decimalDigitRe.test(ch)) {
+      width += fontSize * (previousWasDecimalDigit ? 0.48 : 0.52)
+      previousWasDecimalDigit = true
+    } else if (punctuationRe.test(ch)) {
+      width += fontSize * 0.4
+      previousWasDecimalDigit = false
+    } else {
+      width += fontSize * 0.6
+      previousWasDecimalDigit = false
+    }
+  }
+
+  return width
+}
+
+class TestCanvasRenderingContext2D {
+  font = ''
+  measureText(text: string): { width: number } {
+    return { width: measureWidth(text, this.font) }
+  }
+}
+
+class TestOffscreenCanvas {
+  constructor(_w: number, _h: number) {}
+  getContext(_kind: string): TestCanvasRenderingContext2D {
+    return new TestCanvasRenderingContext2D()
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Module-level references (loaded after canvas mock is installed).
+// ---------------------------------------------------------------------------
+
+type ValidatorModule = typeof import('../src/measurement-validator/index.ts')
+
+let classifySeverity: ValidatorModule['classifySeverity']
+let THRESHOLD_EXACT: ValidatorModule['THRESHOLD_EXACT']
+let THRESHOLD_MINOR: ValidatorModule['THRESHOLD_MINOR']
+let THRESHOLD_MAJOR: ValidatorModule['THRESHOLD_MAJOR']
+let buildReport: ValidatorModule['buildReport']
+let toJSON: ValidatorModule['toJSON']
+let toConsoleText: ValidatorModule['toConsoleText']
+
+type LayoutModule = typeof import('../src/layout.ts')
+let prepareWithSegments: LayoutModule['prepareWithSegments']
+let layoutWithLines: LayoutModule['layoutWithLines']
+let clearCache: LayoutModule['clearCache']
+
+// ---------------------------------------------------------------------------
+
+beforeAll(async () => {
+  Reflect.set(globalThis, 'OffscreenCanvas', TestOffscreenCanvas)
+
+  const [validatorMod, layoutMod] = await Promise.all([
+    import('../src/measurement-validator/index.ts'),
+    import('../src/layout.ts'),
+  ])
+
+  ;({ classifySeverity, THRESHOLD_EXACT, THRESHOLD_MINOR, THRESHOLD_MAJOR, buildReport, toJSON, toConsoleText } = validatorMod)
+  ;({ prepareWithSegments, layoutWithLines, clearCache } = layoutMod)
+})
+
+beforeEach(() => {
+  clearCache()
+})
+
+// ---------------------------------------------------------------------------
+
+describe('severity classification', () => {
+  test('delta below THRESHOLD_EXACT is exact', () => {
+    expect(classifySeverity(0)).toBe('exact')
+    expect(classifySeverity(THRESHOLD_EXACT - 0.001)).toBe('exact')
+  })
+
+  test('delta at THRESHOLD_EXACT is minor', () => {
+    expect(classifySeverity(THRESHOLD_EXACT)).toBe('minor')
+    expect(classifySeverity(THRESHOLD_MINOR - 0.001)).toBe('minor')
+  })
+
+  test('delta at THRESHOLD_MINOR is major', () => {
+    expect(classifySeverity(THRESHOLD_MINOR)).toBe('major')
+    expect(classifySeverity(THRESHOLD_MAJOR - 0.001)).toBe('major')
+  })
+
+  test('delta at or above THRESHOLD_MAJOR is critical', () => {
+    expect(classifySeverity(THRESHOLD_MAJOR)).toBe('critical')
+    expect(classifySeverity(100)).toBe('critical')
+  })
+})
+
+// ---------------------------------------------------------------------------
+
+describe('report generator', () => {
+  test('buildReport with no results produces zero counts and 100% pass rate', () => {
+    const report = buildReport([])
+    expect(report.summary.total).toBe(0)
+    expect(report.summary.exact).toBe(0)
+    expect(report.summary.passRate).toBe(100)
+    expect(report.results).toHaveLength(0)
+    expect(typeof report.generatedAt).toBe('string')
+  })
+
+  test('buildReport counts severities correctly', () => {
+    type Sev = 'exact' | 'minor' | 'major' | 'critical'
+    const makeResult = (severity: Sev, lineCountMatch = true) => ({
+      sample: { text: 'hi', font: '16px Arial' },
+      metrics: {
+        lineCountMatch,
+        pretextLineCount: 1,
+        domLineCount: lineCountMatch ? 1 : 2,
+        maxLineDelta: severity === 'exact' ? 0 : severity === 'minor' ? 0.2 : severity === 'major' ? 1 : 3,
+        averageDelta: 0,
+        severity,
+      },
+      lines: [],
+      timestamp: new Date().toISOString(),
+      userAgent: '',
+    })
+
+    const results = [
+      makeResult('exact'),
+      makeResult('exact'),
+      makeResult('minor'),
+      makeResult('major'),
+      makeResult('critical', false),
+    ]
+
+    const report = buildReport(results)
+    expect(report.summary.total).toBe(5)
+    expect(report.summary.exact).toBe(2)
+    expect(report.summary.minor).toBe(1)
+    expect(report.summary.major).toBe(1)
+    expect(report.summary.critical).toBe(1)
+    expect(report.summary.lineCountMismatches).toBe(1)
+    // exact + minor = 3, passRate = 3/5 = 60%
+    expect(report.summary.passRate).toBeCloseTo(60, 1)
+  })
+
+  test('toJSON serializes to valid JSON', () => {
+    const report = buildReport([])
+    const json = toJSON(report)
+    expect(() => JSON.parse(json)).not.toThrow()
+    const pretty = toJSON(report, true)
+    expect(pretty).toContain('\n')
+  })
+
+  test('toConsoleText includes all severity counts', () => {
+    const report = buildReport([])
+    const text = toConsoleText(report)
+    expect(text).toContain('Total samples')
+    expect(text).toContain('Pass rate')
+  })
+})
+
+// ---------------------------------------------------------------------------
+
+describe('pretext layout integration', () => {
+  // These tests verify that the Pretext layout engine behaves consistently
+  // with the fake canvas, which is what the comparator will consume.
+
+  test('short text produces one line at wide width', () => {
+    const FONT = '16px Test Sans'
+    const prepared = prepareWithSegments('Hello world', FONT)
+    const result = layoutWithLines(prepared, 1000, 20)
+    expect(result.lineCount).toBe(1)
+    expect(result.lines).toHaveLength(1)
+    expect(result.lines[0]!.text).toBe('Hello world')
+    expect(result.lines[0]!.width).toBeGreaterThan(0)
+  })
+
+  test('text wraps at narrow width', () => {
+    const FONT = '16px Test Sans'
+    const prepared = prepareWithSegments('Hello world test', FONT)
+    const result = layoutWithLines(prepared, 60, 20)
+    expect(result.lineCount).toBeGreaterThan(1)
+  })
+
+  test('empty text produces zero lines', () => {
+    const FONT = '16px Test Sans'
+    const prepared = prepareWithSegments('', FONT)
+    const result = layoutWithLines(prepared, 300, 20)
+    expect(result.lineCount).toBe(0)
+    expect(result.lines).toHaveLength(0)
+  })
+})
+
+// ---------------------------------------------------------------------------
+
+describe('comparator arithmetic (pure, no DOM)', () => {
+  // Simulate comparator logic manually to verify the arithmetic contract.
+
+  test('identical widths produce exact severity', () => {
+    const delta = Math.abs(100 - 100)
+    expect(classifySeverity(delta)).toBe('exact')
+  })
+
+  test('0.3px divergence is minor', () => {
+    const delta = Math.abs(100.3 - 100)
+    expect(classifySeverity(delta)).toBe('minor')
+  })
+
+  test('1px divergence is major', () => {
+    const delta = Math.abs(101 - 100)
+    expect(classifySeverity(delta)).toBe('major')
+  })
+
+  test('3px divergence is critical', () => {
+    const delta = Math.abs(103 - 100)
+    expect(classifySeverity(delta)).toBe('critical')
+  })
+})

From 5a39bdf9af8b9f2c33fc8ca4b474efbe20126f80 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 4 Apr 2026 20:02:48 +0000
Subject: [PATCH 3/3] refactor: extract bidi/emoji regexes to named constants
 in comparator

Agent-Logs-Url: https://github.com/Himaan1998Y/pretext/sessions/64d3a3ab-a278-4221-abf3-57b5769d9c26

Co-authored-by: Himaan1998Y <210527591+Himaan1998Y@users.noreply.github.com>
---
 src/measurement-validator/comparator.ts | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/measurement-validator/comparator.ts b/src/measurement-validator/comparator.ts
index d227ca83..750efb51 100644
--- a/src/measurement-validator/comparator.ts
+++ b/src/measurement-validator/comparator.ts
@@ -127,6 +127,15 @@ export async function compareAll(samples: MeasurementSample[]): Promise<Comparis
 }
 
 // Very lightweight root-cause heuristics — kept intentionally narrow for Phase 1.
+
+// Characters in RTL scripts: Hebrew (U+0590–U+05FF), Arabic (U+0600–U+06FF),
+// Arabic Supplement (U+0750–U+077F), Arabic Extended-A/B (U+08A0–U+08FF),
+// Arabic Presentation Forms-A (U+FB1D–U+FB4F).
+const RTL_CHAR_RE = /[\u0590-\u08FF\uFB1D-\uFB4F]/
+
+// Emoji glyphs that render as pictures (as opposed to text presentation).
+const EMOJI_PRESENTATION_RE = /\p{Emoji_Presentation}/u
+
 function detectRootCause(
   metrics: DivergenceMetrics,
   sample: MeasurementSample,
@@ -135,14 +144,14 @@ function detectRootCause(
   if (metrics.severity === 'exact') return undefined
 
   // Bidi / RTL text.
-  if (sample.direction === 'rtl' || /[\u0590-\u08FF\uFB1D-\uFB4F]/.test(sample.text)) {
+  if (sample.direction === 'rtl' || RTL_CHAR_RE.test(sample.text)) {
     if (metrics.severity === 'major' || metrics.severity === 'critical') {
       return 'Possible bidi/RTL shaping divergence: RTL characters detected with significant delta'
     }
   }
 
   // Emoji.
-  if (/\p{Emoji_Presentation}/u.test(sample.text)) {
+  if (EMOJI_PRESENTATION_RE.test(sample.text)) {
     return 'Possible emoji correction divergence: emoji glyphs detected'
   }