From a8e7c2c58df5818158375265b554729cd121745e Mon Sep 17 00:00:00 2001
From: somnai-dreams <maxingham@gmail.com>
Date: Fri, 3 Apr 2026 01:06:20 +1100
Subject: [PATCH 1/3] Add source span mapping to rich layout APIs

---
 README.md          |  16 ++++++
 src/analysis.ts    | 108 ++++++++++++++++++++++++++++++++++-------
 src/layout.test.ts |  30 ++++++++++++
 src/layout.ts      | 118 +++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 250 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index ee00d5cf..f266bef5 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,8 @@ const { lines } = layoutWithLines(prepared, 320, 26) // 320px max width, 26px li
 for (let i = 0; i < lines.length; i++) ctx.fillText(lines[i].text, 0, i * 26)
 ```
 
+Each rich line also carries `sourceOffset` / `sourceLength`, so callers can map a rendered line back to the original input without reconstructing whitespace normalization themselves.
+
 - `walkLineRanges()` gives you line widths and cursors without building the text strings:
 
 ```ts
@@ -88,6 +90,16 @@ while (true) {
 }
 ```
 
+- `cursorToSourceOffset()` / `cursorRangeToSourceSpan()` convert rich cursors back into original-source offsets:
+
+```ts
+const line = layoutNextLine(prepared, cursor, width)
+if (line) {
+  const start = cursorToSourceOffset(prepared, line.start)
+  const { sourceOffset, sourceLength } = cursorRangeToSourceSpan(prepared, line.start, line.end)
+}
+```
+
 This usage allows rendering to canvas, SVG, WebGL and (eventually) server-side.
 
 ### API Glossary
@@ -109,6 +121,8 @@ type LayoutLine = {
   width: number // Measured width of this line, e.g. 87.5
   start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
   end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
+  sourceOffset: number // Source offset in the original input covered by this line
+  sourceLength: number // Source span length in the original input covered by this line
 }
 type LayoutLineRange = {
   width: number // Measured width of this line, e.g. 87.5
@@ -125,6 +139,8 @@ Other helpers:
 ```ts
 clearCache(): void // clears Pretext's shared internal caches used by prepare() and prepareWithSegments(). Useful if your app cycles through many different fonts or text variants and you want to release the accumulated cache
 setLocale(locale?: string): void // optional (by default we use the current locale). Sets locale for future prepare() and prepareWithSegments(). Internally, it also calls clearCache(). Setting a new locale doesn't affect existing prepare() and prepareWithSegments() states (no mutations to them)
+cursorToSourceOffset(prepared: PreparedTextWithSegments, cursor: LayoutCursor): number // converts a rich cursor back into an offset in the original input text
+cursorRangeToSourceSpan(prepared: PreparedTextWithSegments, start: LayoutCursor, end: LayoutCursor): { sourceOffset: number, sourceLength: number } // converts a rich cursor range back into the original input span
 ```
 
 ## Caveats
diff --git a/src/analysis.ts b/src/analysis.ts
index a22d881e..ebde2d84 100644
--- a/src/analysis.ts
+++ b/src/analysis.ts
@@ -31,13 +31,17 @@ export type AnalysisChunk = {
   consumedEndSegmentIndex: number
 }
 
-export type TextAnalysis = { normalized: string, chunks: AnalysisChunk[] } & MergedSegmentation
+export type TextAnalysis = { normalized: string, chunks: AnalysisChunk[], sourceBoundaries?: number[] } & MergedSegmentation
+
+type NormalizedTextWithSourceBoundaries = {
+  text: string
+  sourceBoundaries: number[]
+}
 
 export type AnalysisProfile = {
   carryCJKAfterClosingQuote: boolean
 }
 
-const collapsibleWhitespaceRunRe = /[ \t\n\r\f]+/g
 const needsWhitespaceNormalizationRe = /[\t\n\r\f]| {2,}|^ | $/
 
 type WhiteSpaceProfile = {
@@ -54,23 +58,89 @@ function getWhiteSpaceProfile(whiteSpace?: WhiteSpaceMode): WhiteSpaceProfile {
 }
 
 export function normalizeWhitespaceNormal(text: string): string {
-  if (!needsWhitespaceNormalizationRe.test(text)) return text
+  return normalizeWhitespaceNormalWithSourceBoundaries(text).text
+}
+
+function normalizeWhitespaceNormalWithSourceBoundaries(text: string): NormalizedTextWithSourceBoundaries {
+  if (!needsWhitespaceNormalizationRe.test(text)) {
+    const sourceBoundaries = new Array<number>(text.length + 1)
+    for (let i = 0; i <= text.length; i++) sourceBoundaries[i] = i
+    return { text, sourceBoundaries }
+  }
+
+  const pieces: string[] = []
+  const sourceBoundaries: number[] = []
+  let i = 0
+
+  while (i < text.length) {
+    const ch = text[i]!
+    const isWhitespace = ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '\f'
+    if (!isWhitespace) break
+    i++
+  }
+
+  sourceBoundaries.push(i)
+
+  while (i < text.length) {
+    const ch = text[i]!
+    const isWhitespace = ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '\f'
+    if (isWhitespace) {
+      while (i < text.length) {
+        const next = text[i]!
+        if (next !== ' ' && next !== '\t' && next !== '\n' && next !== '\r' && next !== '\f') break
+        i++
+      }
+      if (i >= text.length) break
+      pieces.push(' ')
+      sourceBoundaries.push(i)
+      continue
+    }
 
-  let normalized = text.replace(collapsibleWhitespaceRunRe, ' ')
-  if (normalized.charCodeAt(0) === 0x20) {
-    normalized = normalized.slice(1)
+    pieces.push(ch)
+    i++
+    sourceBoundaries.push(i)
   }
-  if (normalized.length > 0 && normalized.charCodeAt(normalized.length - 1) === 0x20) {
-    normalized = normalized.slice(0, -1)
+
+  return { text: pieces.join(''), sourceBoundaries }
+}
+
+function normalizeWhitespacePreWrapWithSourceBoundaries(text: string): NormalizedTextWithSourceBoundaries {
+  if (!/[\r\f]/.test(text)) {
+    return {
+      text,
+      sourceBoundaries: buildPreWrapSourceBoundaries(text),
+    }
+  }
+
+  return {
+    text: text
+      .replace(/\r\n/g, '\n')
+      .replace(/[\r\f]/g, '\n'),
+    sourceBoundaries: buildPreWrapSourceBoundaries(text),
   }
-  return normalized
 }
 
-function normalizeWhitespacePreWrap(text: string): string {
-  if (!/[\r\f]/.test(text)) return text.replace(/\r\n/g, '\n')
-  return text
-    .replace(/\r\n/g, '\n')
-    .replace(/[\r\f]/g, '\n')
+function buildPreWrapSourceBoundaries(text: string): number[] {
+  const sourceBoundaries = [0]
+  let i = 0
+
+  while (i < text.length) {
+    const ch = text[i]!
+    if (ch === '\r' && i + 1 < text.length && text[i + 1] === '\n') {
+      i += 2
+      sourceBoundaries.push(i)
+      continue
+    }
+    if (ch === '\r' || ch === '\f') {
+      i += 1
+      sourceBoundaries.push(i)
+      continue
+    }
+    i += 1
+    sourceBoundaries.push(i)
+  }
+
+  return sourceBoundaries
 }
 
 let sharedWordSegmenter: Intl.Segmenter | null = null
@@ -982,11 +1052,13 @@ export function analyzeText(
   text: string,
   profile: AnalysisProfile,
   whiteSpace: WhiteSpaceMode = 'normal',
+  includeSourceBoundaries = false,
 ): TextAnalysis {
   const whiteSpaceProfile = getWhiteSpaceProfile(whiteSpace)
-  const normalized = whiteSpaceProfile.mode === 'pre-wrap'
-    ? normalizeWhitespacePreWrap(text)
-    : normalizeWhitespaceNormal(text)
+  const normalizedResult = whiteSpaceProfile.mode === 'pre-wrap'
+    ? normalizeWhitespacePreWrapWithSourceBoundaries(text)
+    : normalizeWhitespaceNormalWithSourceBoundaries(text)
+  const normalized = normalizedResult.text
   if (normalized.length === 0) {
     return {
       normalized,
@@ -996,12 +1068,14 @@ export function analyzeText(
       isWordLike: [],
       kinds: [],
       starts: [],
+      ...(includeSourceBoundaries ? { sourceBoundaries: normalizedResult.sourceBoundaries } : {}),
     }
   }
   const segmentation = buildMergedSegmentation(normalized, profile, whiteSpaceProfile)
   return {
     normalized,
     chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile),
+    ...(includeSourceBoundaries ? { sourceBoundaries: normalizedResult.sourceBoundaries } : {}),
     ...segmentation,
   }
 }
diff --git a/src/layout.test.ts b/src/layout.test.ts
index 3b5d01bb..6bb02f14 100644
--- a/src/layout.test.ts
+++ b/src/layout.test.ts
@@ -17,6 +17,8 @@ let layout: LayoutModule['layout']
 let layoutWithLines: LayoutModule['layoutWithLines']
 let layoutNextLine: LayoutModule['layoutNextLine']
 let walkLineRanges: LayoutModule['walkLineRanges']
+let cursorToSourceOffset: LayoutModule['cursorToSourceOffset']
+let cursorRangeToSourceSpan: LayoutModule['cursorRangeToSourceSpan']
 let clearCache: LayoutModule['clearCache']
 let setLocale: LayoutModule['setLocale']
 let countPreparedLines: LineBreakModule['countPreparedLines']
@@ -107,6 +109,8 @@ beforeAll(async () => {
     layoutWithLines,
     layoutNextLine,
     walkLineRanges,
+    cursorToSourceOffset,
+    cursorRangeToSourceSpan,
     clearCache,
     setLocale,
   } = mod)
@@ -127,6 +131,8 @@ describe('prepare invariants', () => {
   test('collapses ordinary whitespace runs and trims the edges', () => {
     const prepared = prepareWithSegments('  Hello\t \n  World  ', FONT)
     expect(prepared.segments).toEqual(['Hello', ' ', 'World'])
+    expect(prepared.segmentSourceOffsets).toEqual([2, 7, 12])
+    expect(prepared.segmentSourceLengths).toEqual([5, 5, 5])
   })
 
   test('pre-wrap mode keeps ordinary spaces instead of collapsing them', () => {
@@ -413,6 +419,8 @@ describe('layout invariants', () => {
       width: widthOfHello,
       start: { segmentIndex: 0, graphemeIndex: 0 },
       end: { segmentIndex: 1, graphemeIndex: 0 },
+      sourceOffset: 0,
+      sourceLength: 5,
     }])
   })
 
@@ -430,6 +438,22 @@ describe('layout invariants', () => {
     expect(rich.lines.map(line => line.text).join('')).toBe('Superlongword')
     expect(rich.lines[0]!.start).toEqual({ segmentIndex: 0, graphemeIndex: 0 })
     expect(rich.lines.at(-1)!.end).toEqual({ segmentIndex: 1, graphemeIndex: 0 })
+    expect(cursorToSourceOffset(prepared, rich.lines[0]!.end)).toBe(rich.lines[0]!.sourceOffset + rich.lines[0]!.sourceLength)
+  })
+
+  test('rich lines preserve source spans through collapsed whitespace normalization', () => {
+    const source = '  foo   bar  '
+    const prepared = prepareWithSegments(source, FONT)
+    const lines = layoutWithLines(prepared, 200, LINE_HEIGHT)
+    expect(lines.lines).toHaveLength(1)
+    expect(lines.lines[0]!.text).toBe('foo bar')
+    expect(lines.lines[0]!.sourceOffset).toBe(2)
+    expect(lines.lines[0]!.sourceLength).toBe(9)
+    expect(source.slice(lines.lines[0]!.sourceOffset, lines.lines[0]!.sourceOffset + lines.lines[0]!.sourceLength)).toBe('foo   bar')
+    expect(cursorRangeToSourceSpan(prepared, lines.lines[0]!.start, lines.lines[0]!.end)).toEqual({
+      sourceOffset: 2,
+      sourceLength: 9,
+    })
   })
 
   test('mixed-direction text is a stable smoke test', () => {
@@ -587,6 +611,7 @@ describe('layout invariants', () => {
       start: { segmentIndex: number, graphemeIndex: number }
       end: { segmentIndex: number, graphemeIndex: number }
     }> = []
+    const sourceSpans: Array<{ sourceOffset: number, sourceLength: number }> = []
 
     const lineCount = walkLineRanges(prepared, width, line => {
       actual.push({
@@ -594,6 +619,7 @@ describe('layout invariants', () => {
         start: { ...line.start },
         end: { ...line.end },
       })
+      sourceSpans.push(cursorRangeToSourceSpan(prepared, line.start, line.end))
     })
 
     expect(lineCount).toBe(expected.lineCount)
@@ -602,6 +628,10 @@ describe('layout invariants', () => {
       start: line.start,
       end: line.end,
     })))
+    expect(sourceSpans).toEqual(expected.lines.map(line => ({
+      sourceOffset: line.sourceOffset,
+      sourceLength: line.sourceLength,
+    })))
   })
 
   test('countPreparedLines stays aligned with the walked line counter', () => {
diff --git a/src/layout.ts b/src/layout.ts
index 465a0673..dc8f7b79 100644
--- a/src/layout.ts
+++ b/src/layout.ts
@@ -106,6 +106,8 @@ type InternalPreparedText = PreparedText & PreparedCore
 // Treat this as the unstable escape hatch for experiments and custom rendering.
 export type PreparedTextWithSegments = InternalPreparedText & {
   segments: string[] // Segment text aligned with the parallel arrays, e.g. ['hello', ' ', 'world']
+  segmentSourceOffsets: number[] // Source offset per segment in the original input
+  segmentSourceLengths: number[] // Source span length per segment in the original input
 }
 
 export type LayoutCursor = {
@@ -123,6 +125,8 @@ export type LayoutLine = {
   width: number // Measured width of this line, e.g. 87.5
   start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
   end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
+  sourceOffset: number // Source offset of the covered text in the original input
+  sourceLength: number // Source span length of the covered text in the original input
 }
 
 export type LayoutLineRange = {
@@ -171,6 +175,8 @@ function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | P
       tabStopAdvance: 0,
       chunks: [],
       segments: [],
+      segmentSourceOffsets: [],
+      segmentSourceLengths: [],
     } as unknown as PreparedTextWithSegments
   }
   return {
@@ -214,6 +220,9 @@ function measureAnalysis(
   const breakableWidths: (number[] | null)[] = []
   const breakablePrefixWidths: (number[] | null)[] = []
   const segments = includeSegments ? [] as string[] : null
+  const segmentSourceOffsets = includeSegments ? [] as number[] : null
+  const segmentSourceLengths = includeSegments ? [] as number[] : null
+  const sourceBoundaries = includeSegments ? analysis.sourceBoundaries ?? null : null
   const preparedStartByAnalysisIndex = Array.from<number>({ length: analysis.len })
   const preparedEndByAnalysisIndex = Array.from<number>({ length: analysis.len })
 
@@ -224,6 +233,8 @@ function measureAnalysis(
     lineEndPaintAdvance: number,
     kind: SegmentBreakKind,
     start: number,
+    sourceOffset: number,
+    sourceLength: number,
     breakable: number[] | null,
     breakablePrefix: number[] | null,
   ): void {
@@ -238,6 +249,10 @@ function measureAnalysis(
     breakableWidths.push(breakable)
     breakablePrefixWidths.push(breakablePrefix)
     if (segments !== null) segments.push(text)
+    if (segmentSourceOffsets !== null && segmentSourceLengths !== null) {
+      segmentSourceOffsets.push(sourceOffset)
+      segmentSourceLengths.push(sourceLength)
+    }
   }
 
   for (let mi = 0; mi < analysis.len; mi++) {
@@ -246,6 +261,9 @@ function measureAnalysis(
     const segWordLike = analysis.isWordLike[mi]!
     const segKind = analysis.kinds[mi]!
     const segStart = analysis.starts[mi]!
+    const segSourceOffset = sourceBoundaries?.[segStart] ?? segStart
+    const segSourceEnd = sourceBoundaries?.[segStart + segText.length] ?? (segStart + segText.length)
+    const segSourceLength = segSourceEnd - segSourceOffset
 
     if (segKind === 'soft-hyphen') {
       pushMeasuredSegment(
@@ -255,6 +273,8 @@ function measureAnalysis(
         discretionaryHyphenWidth,
         segKind,
         segStart,
+        segSourceOffset,
+        segSourceLength,
         null,
         null,
       )
@@ -263,13 +283,13 @@ function measureAnalysis(
     }
 
     if (segKind === 'hard-break') {
-      pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null)
+      pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, segSourceOffset, segSourceLength, null, null)
       preparedEndByAnalysisIndex[mi] = widths.length
       continue
     }
 
     if (segKind === 'tab') {
-      pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null)
+      pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, segSourceOffset, segSourceLength, null, null)
       preparedEndByAnalysisIndex[mi] = widths.length
       continue
     }
@@ -303,7 +323,9 @@ function measureAnalysis(
 
         const unitMetrics = getSegmentMetrics(unitText, cache)
         const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection)
-        pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null)
+        const unitSourceOffset = sourceBoundaries?.[segStart + unitStart] ?? (segStart + unitStart)
+        const unitSourceEnd = sourceBoundaries?.[segStart + unitStart + unitText.length] ?? (segStart + unitStart + unitText.length)
+        pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, unitSourceOffset, unitSourceEnd - unitSourceOffset, null, null)
 
         unitText = grapheme
         unitStart = gs.index
@@ -312,7 +334,9 @@ function measureAnalysis(
       if (unitText.length > 0) {
         const unitMetrics = getSegmentMetrics(unitText, cache)
         const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection)
-        pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null)
+        const unitSourceOffset = sourceBoundaries?.[segStart + unitStart] ?? (segStart + unitStart)
+        const unitSourceEnd = sourceBoundaries?.[segStart + unitStart + unitText.length] ?? (segStart + unitStart + unitText.length)
+        pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, unitSourceOffset, unitSourceEnd - unitSourceOffset, null, null)
       }
       preparedEndByAnalysisIndex[mi] = widths.length
       continue
@@ -340,6 +364,8 @@ function measureAnalysis(
         lineEndPaintAdvance,
         segKind,
         segStart,
+        segSourceOffset,
+        segSourceLength,
         graphemeWidths,
         graphemePrefixWidths,
       )
@@ -351,6 +377,8 @@ function measureAnalysis(
         lineEndPaintAdvance,
         segKind,
         segStart,
+        segSourceOffset,
+        segSourceLength,
         null,
         null,
       )
@@ -374,6 +402,8 @@ function measureAnalysis(
       tabStopAdvance,
       chunks,
       segments,
+      segmentSourceOffsets,
+      segmentSourceLengths,
     } as unknown as PreparedTextWithSegments
   }
   return {
@@ -427,7 +457,7 @@ function prepareInternal(
   includeSegments: boolean,
   options?: PrepareOptions,
 ): InternalPreparedText | PreparedTextWithSegments {
-  const analysis = analyzeText(text, getEngineProfile(), options?.whiteSpace)
+  const analysis = analyzeText(text, getEngineProfile(), options?.whiteSpace, includeSegments)
   return measureAnalysis(analysis, font, includeSegments)
 }
 
@@ -526,6 +556,55 @@ function getLineTextCache(prepared: PreparedTextWithSegments): Map<number, strin
   return cache
 }
 
+function getPreparedSourceEnd(prepared: PreparedTextWithSegments): number {
+  const lastIndex = prepared.segmentSourceOffsets.length - 1
+  if (lastIndex < 0) return 0
+  return prepared.segmentSourceOffsets[lastIndex]! + prepared.segmentSourceLengths[lastIndex]!
+}
+
+function cursorToSourceOffsetWithCache(
+  prepared: PreparedTextWithSegments,
+  cache: Map<number, string[]>,
+  cursor: LayoutCursor,
+): number {
+  if (prepared.segments.length === 0) return 0
+  if (cursor.segmentIndex <= 0 && cursor.graphemeIndex <= 0) {
+    return prepared.segmentSourceOffsets[0]!
+  }
+  if (cursor.segmentIndex >= prepared.segments.length) {
+    return getPreparedSourceEnd(prepared)
+  }
+
+  const segmentIndex = cursor.segmentIndex
+  const segmentSourceOffset = prepared.segmentSourceOffsets[segmentIndex]!
+  const segmentSourceLength = prepared.segmentSourceLengths[segmentIndex]!
+  if (cursor.graphemeIndex <= 0) return segmentSourceOffset
+
+  const graphemes = getSegmentGraphemes(segmentIndex, prepared.segments, cache)
+  if (cursor.graphemeIndex >= graphemes.length) {
+    return segmentSourceOffset + segmentSourceLength
+  }
+
+  let sourceDelta = 0
+  for (let i = 0; i < cursor.graphemeIndex; i++) {
+    sourceDelta += graphemes[i]!.length
+  }
+  return segmentSourceOffset + Math.min(sourceDelta, segmentSourceLength)
+}
+
+function cursorRangeToSourceSpanWithCache(
+  prepared: PreparedTextWithSegments,
+  cache: Map<number, string[]>,
+  start: LayoutCursor,
+  end: LayoutCursor,
+): { sourceOffset: number; sourceLength: number } {
+  const sourceStart = cursorToSourceOffsetWithCache(prepared, cache, start)
+  const sourceEnd = cursorToSourceOffsetWithCache(prepared, cache, end)
+  return sourceStart <= sourceEnd
+    ? { sourceOffset: sourceStart, sourceLength: sourceEnd - sourceStart }
+    : { sourceOffset: sourceEnd, sourceLength: sourceStart - sourceEnd }
+}
+
 function lineHasDiscretionaryHyphen(
   kinds: SegmentBreakKind[],
   startSegmentIndex: number,
@@ -587,6 +666,18 @@ function createLayoutLine(
   endSegmentIndex: number,
   endGraphemeIndex: number,
 ): LayoutLine {
+  const sourceSpan = cursorRangeToSourceSpanWithCache(
+    prepared,
+    cache,
+    {
+      segmentIndex: startSegmentIndex,
+      graphemeIndex: startGraphemeIndex,
+    },
+    {
+      segmentIndex: endSegmentIndex,
+      graphemeIndex: endGraphemeIndex,
+    },
+  )
   return {
     text: buildLineTextFromRange(
       prepared.segments,
@@ -606,6 +697,8 @@ function createLayoutLine(
       segmentIndex: endSegmentIndex,
       graphemeIndex: endGraphemeIndex,
     },
+    sourceOffset: sourceSpan.sourceOffset,
+    sourceLength: sourceSpan.sourceLength,
   }
 }
 
@@ -678,6 +771,21 @@ export function walkLineRanges(
   })
 }
 
+export function cursorToSourceOffset(
+  prepared: PreparedTextWithSegments,
+  cursor: LayoutCursor,
+): number {
+  return cursorToSourceOffsetWithCache(prepared, getLineTextCache(prepared), cursor)
+}
+
+export function cursorRangeToSourceSpan(
+  prepared: PreparedTextWithSegments,
+  start: LayoutCursor,
+  end: LayoutCursor,
+): { sourceOffset: number; sourceLength: number } {
+  return cursorRangeToSourceSpanWithCache(prepared, getLineTextCache(prepared), start, end)
+}
+
 export function layoutNextLine(
   prepared: PreparedTextWithSegments,
   start: LayoutCursor,

From 4fa13fe9a3765be348ac418f1d6685e5d9e1fd47 Mon Sep 17 00:00:00 2001
From: somnai-dreams <maxingham@gmail.com>
Date: Fri, 3 Apr 2026 01:26:19 +1100
Subject: [PATCH 2/3] Avoid source-boundary work on the simple path

---
 src/analysis.ts | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/analysis.ts b/src/analysis.ts
index ebde2d84..3eb4dea1 100644
--- a/src/analysis.ts
+++ b/src/analysis.ts
@@ -42,6 +42,7 @@ export type AnalysisProfile = {
   carryCJKAfterClosingQuote: boolean
 }
 
+const collapsibleWhitespaceRunRe = /[ \t\n\r\f]+/g
 const needsWhitespaceNormalizationRe = /[\t\n\r\f]| {2,}|^ | $/
 
 type WhiteSpaceProfile = {
@@ -58,7 +59,16 @@ function getWhiteSpaceProfile(whiteSpace?: WhiteSpaceMode): WhiteSpaceProfile {
 }
 
 export function normalizeWhitespaceNormal(text: string): string {
-  return normalizeWhitespaceNormalWithSourceBoundaries(text).text
+  if (!needsWhitespaceNormalizationRe.test(text)) return text
+
+  let normalized = text.replace(collapsibleWhitespaceRunRe, ' ')
+  if (normalized.charCodeAt(0) === 0x20) {
+    normalized = normalized.slice(1)
+  }
+  if (normalized.length > 0 && normalized.charCodeAt(normalized.length - 1) === 0x20) {
+    normalized = normalized.slice(0, -1)
+  }
+  return normalized
 }
 
 function normalizeWhitespaceNormalWithSourceBoundaries(text: string): NormalizedTextWithSourceBoundaries {
@@ -120,6 +130,13 @@ function normalizeWhitespacePreWrapWithSourceBoundaries(text: string): Normalize
   }
 }
 
+function normalizeWhitespacePreWrap(text: string): string {
+  if (!/[\r\f]/.test(text)) return text
+  return text
+    .replace(/\r\n/g, '\n')
+    .replace(/[\r\f]/g, '\n')
+}
+
 function buildPreWrapSourceBoundaries(text: string): number[] {
   const sourceBoundaries = [0]
   let i = 0
@@ -1055,10 +1072,19 @@ export function analyzeText(
   includeSourceBoundaries = false,
 ): TextAnalysis {
   const whiteSpaceProfile = getWhiteSpaceProfile(whiteSpace)
-  const normalizedResult = whiteSpaceProfile.mode === 'pre-wrap'
-    ? normalizeWhitespacePreWrapWithSourceBoundaries(text)
-    : normalizeWhitespaceNormalWithSourceBoundaries(text)
-  const normalized = normalizedResult.text
+  const normalizedResult = includeSourceBoundaries
+    ? (whiteSpaceProfile.mode === 'pre-wrap'
+        ? normalizeWhitespacePreWrapWithSourceBoundaries(text)
+        : normalizeWhitespaceNormalWithSourceBoundaries(text))
+    : null
+  const normalized = normalizedResult
+    ? normalizedResult.text
+    : (whiteSpaceProfile.mode === 'pre-wrap'
+        ? normalizeWhitespacePreWrap(text)
+        : normalizeWhitespaceNormal(text))
+  const sourceBoundariesPayload = normalizedResult
+    ? { sourceBoundaries: normalizedResult.sourceBoundaries }
+    : {}
   if (normalized.length === 0) {
     return {
       normalized,
@@ -1068,14 +1094,14 @@ export function analyzeText(
       isWordLike: [],
       kinds: [],
       starts: [],
-      ...(includeSourceBoundaries ? { sourceBoundaries: normalizedResult.sourceBoundaries } : {}),
+      ...sourceBoundariesPayload,
     }
   }
   const segmentation = buildMergedSegmentation(normalized, profile, whiteSpaceProfile)
   return {
     normalized,
     chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile),
-    ...(includeSourceBoundaries ? { sourceBoundaries: normalizedResult.sourceBoundaries } : {}),
+    ...sourceBoundariesPayload,
     ...segmentation,
   }
 }

From 566cadaff54a7a035c5de41d5eda123c965f0eac Mon Sep 17 00:00:00 2001
From: somnai-dreams <maxingham@gmail.com>
Date: Fri, 3 Apr 2026 01:29:24 +1100
Subject: [PATCH 3/3] Add simple pre-wrap CRLF regression test

---
 src/layout.test.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/layout.test.ts b/src/layout.test.ts
index 6bb02f14..2129a2eb 100644
--- a/src/layout.test.ts
+++ b/src/layout.test.ts
@@ -153,6 +153,11 @@ describe('prepare invariants', () => {
     expect(prepared.kinds).toEqual(['text', 'hard-break', 'text'])
   })
 
+  test('pre-wrap mode also normalizes CRLF on the simple prepare path', () => {
+    const prepared = prepare('Hello\r\nWorld', FONT, { whiteSpace: 'pre-wrap' })
+    expect(layout(prepared, 200, LINE_HEIGHT).lineCount).toBe(2)
+  })
+
   test('pre-wrap mode keeps tabs as explicit segments', () => {
     const prepared = prepareWithSegments('Hello\tWorld', FONT, { whiteSpace: 'pre-wrap' })
     expect(prepared.segments).toEqual(['Hello', '\t', 'World'])