Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ const { lines } = layoutWithLines(prepared, 320, 26) // 320px max width, 26px li
for (let i = 0; i < lines.length; i++) ctx.fillText(lines[i].text, 0, i * 26)
```

Each rich line also carries `sourceOffset` / `sourceLength`, so callers can map a rendered line back to the original input without reconstructing whitespace normalization themselves.

- `walkLineRanges()` gives you line widths and cursors without building the text strings:

```ts
Expand All @@ -88,6 +90,16 @@ while (true) {
}
```

- `cursorToSourceOffset()` / `cursorRangeToSourceSpan()` convert rich cursors back into original-source offsets:

```ts
const line = layoutNextLine(prepared, cursor, width)
if (line) {
const start = cursorToSourceOffset(prepared, line.start)
const { sourceOffset, sourceLength } = cursorRangeToSourceSpan(prepared, line.start, line.end)
}
```

This usage allows rendering to canvas, SVG, WebGL and (eventually) server-side.

### API Glossary
Expand All @@ -109,6 +121,8 @@ type LayoutLine = {
width: number // Measured width of this line, e.g. 87.5
start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
sourceOffset: number // Source offset in the original input covered by this line
sourceLength: number // Source span length in the original input covered by this line
}
type LayoutLineRange = {
width: number // Measured width of this line, e.g. 87.5
Expand All @@ -125,6 +139,8 @@ Other helpers:
```ts
clearCache(): void // clears Pretext's shared internal caches used by prepare() and prepareWithSegments(). Useful if your app cycles through many different fonts or text variants and you want to release the accumulated cache
setLocale(locale?: string): void // optional (by default we use the current locale). Sets locale for future prepare() and prepareWithSegments(). Internally, it also calls clearCache(). Setting a new locale doesn't affect existing prepare() and prepareWithSegments() states (no mutations to them)
cursorToSourceOffset(prepared: PreparedTextWithSegments, cursor: LayoutCursor): number // converts a rich cursor back into an offset in the original input text
cursorRangeToSourceSpan(prepared: PreparedTextWithSegments, start: LayoutCursor, end: LayoutCursor): { sourceOffset: number, sourceLength: number } // converts a rich cursor range back into the original input span
```

## Caveats
Expand Down
110 changes: 105 additions & 5 deletions src/analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ export type AnalysisChunk = {
consumedEndSegmentIndex: number
}

export type TextAnalysis = { normalized: string, chunks: AnalysisChunk[] } & MergedSegmentation
export type TextAnalysis = { normalized: string, chunks: AnalysisChunk[], sourceBoundaries?: number[] } & MergedSegmentation

type NormalizedTextWithSourceBoundaries = {
text: string
sourceBoundaries: number[]
}

export type AnalysisProfile = {
carryCJKAfterClosingQuote: boolean
Expand Down Expand Up @@ -66,13 +71,95 @@ export function normalizeWhitespaceNormal(text: string): string {
return normalized
}

function normalizeWhitespaceNormalWithSourceBoundaries(text: string): NormalizedTextWithSourceBoundaries {
if (!needsWhitespaceNormalizationRe.test(text)) {
const sourceBoundaries = new Array<number>(text.length + 1)
for (let i = 0; i <= text.length; i++) sourceBoundaries[i] = i
return { text, sourceBoundaries }
}

const pieces: string[] = []
const sourceBoundaries: number[] = []
let i = 0

while (i < text.length) {
const ch = text[i]!
const isWhitespace = ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '\f'
if (!isWhitespace) break
i++
}

sourceBoundaries.push(i)

while (i < text.length) {
const ch = text[i]!
const isWhitespace = ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '\f'
if (isWhitespace) {
while (i < text.length) {
const next = text[i]!
if (next !== ' ' && next !== '\t' && next !== '\n' && next !== '\r' && next !== '\f') break
i++
}
if (i >= text.length) break
pieces.push(' ')
sourceBoundaries.push(i)
continue
}

pieces.push(ch)
i++
sourceBoundaries.push(i)
}

return { text: pieces.join(''), sourceBoundaries }
}

function normalizeWhitespacePreWrapWithSourceBoundaries(text: string): NormalizedTextWithSourceBoundaries {
if (!/[\r\f]/.test(text)) {
return {
text,
sourceBoundaries: buildPreWrapSourceBoundaries(text),
}
}

return {
text: text
.replace(/\r\n/g, '\n')
.replace(/[\r\f]/g, '\n'),
sourceBoundaries: buildPreWrapSourceBoundaries(text),
}
}

function normalizeWhitespacePreWrap(text: string): string {
if (!/[\r\f]/.test(text)) return text.replace(/\r\n/g, '\n')
if (!/[\r\f]/.test(text)) return text
return text
.replace(/\r\n/g, '\n')
.replace(/[\r\f]/g, '\n')
}

function buildPreWrapSourceBoundaries(text: string): number[] {
const sourceBoundaries = [0]
let i = 0

while (i < text.length) {
const ch = text[i]!
if (ch === '\r' && i + 1 < text.length && text[i + 1] === '\n') {
i += 2
sourceBoundaries.push(i)
continue
}
if (ch === '\r' || ch === '\f') {
i += 1
sourceBoundaries.push(i)
continue
}
i += 1
sourceBoundaries.push(i)
}

return sourceBoundaries
}

let sharedWordSegmenter: Intl.Segmenter | null = null
let segmenterLocale: string | undefined

Expand Down Expand Up @@ -982,11 +1069,22 @@ export function analyzeText(
text: string,
profile: AnalysisProfile,
whiteSpace: WhiteSpaceMode = 'normal',
includeSourceBoundaries = false,
): TextAnalysis {
const whiteSpaceProfile = getWhiteSpaceProfile(whiteSpace)
const normalized = whiteSpaceProfile.mode === 'pre-wrap'
? normalizeWhitespacePreWrap(text)
: normalizeWhitespaceNormal(text)
const normalizedResult = includeSourceBoundaries
? (whiteSpaceProfile.mode === 'pre-wrap'
? normalizeWhitespacePreWrapWithSourceBoundaries(text)
: normalizeWhitespaceNormalWithSourceBoundaries(text))
: null
const normalized = normalizedResult
? normalizedResult.text
: (whiteSpaceProfile.mode === 'pre-wrap'
? normalizeWhitespacePreWrap(text)
: normalizeWhitespaceNormal(text))
const sourceBoundariesPayload = normalizedResult
? { sourceBoundaries: normalizedResult.sourceBoundaries }
: {}
if (normalized.length === 0) {
return {
normalized,
Expand All @@ -996,12 +1094,14 @@ export function analyzeText(
isWordLike: [],
kinds: [],
starts: [],
...sourceBoundariesPayload,
}
}
const segmentation = buildMergedSegmentation(normalized, profile, whiteSpaceProfile)
return {
normalized,
chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile),
...sourceBoundariesPayload,
...segmentation,
}
}
35 changes: 35 additions & 0 deletions src/layout.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ let layout: LayoutModule['layout']
let layoutWithLines: LayoutModule['layoutWithLines']
let layoutNextLine: LayoutModule['layoutNextLine']
let walkLineRanges: LayoutModule['walkLineRanges']
let cursorToSourceOffset: LayoutModule['cursorToSourceOffset']
let cursorRangeToSourceSpan: LayoutModule['cursorRangeToSourceSpan']
let clearCache: LayoutModule['clearCache']
let setLocale: LayoutModule['setLocale']
let countPreparedLines: LineBreakModule['countPreparedLines']
Expand Down Expand Up @@ -107,6 +109,8 @@ beforeAll(async () => {
layoutWithLines,
layoutNextLine,
walkLineRanges,
cursorToSourceOffset,
cursorRangeToSourceSpan,
clearCache,
setLocale,
} = mod)
Expand All @@ -127,6 +131,8 @@ describe('prepare invariants', () => {
test('collapses ordinary whitespace runs and trims the edges', () => {
const prepared = prepareWithSegments(' Hello\t \n World ', FONT)
expect(prepared.segments).toEqual(['Hello', ' ', 'World'])
expect(prepared.segmentSourceOffsets).toEqual([2, 7, 12])
expect(prepared.segmentSourceLengths).toEqual([5, 5, 5])
})

test('pre-wrap mode keeps ordinary spaces instead of collapsing them', () => {
Expand All @@ -147,6 +153,11 @@ describe('prepare invariants', () => {
expect(prepared.kinds).toEqual(['text', 'hard-break', 'text'])
})

test('pre-wrap mode also normalizes CRLF on the simple prepare path', () => {
const prepared = prepare('Hello\r\nWorld', FONT, { whiteSpace: 'pre-wrap' })
expect(layout(prepared, 200, LINE_HEIGHT).lineCount).toBe(2)
})

test('pre-wrap mode keeps tabs as explicit segments', () => {
const prepared = prepareWithSegments('Hello\tWorld', FONT, { whiteSpace: 'pre-wrap' })
expect(prepared.segments).toEqual(['Hello', '\t', 'World'])
Expand Down Expand Up @@ -413,6 +424,8 @@ describe('layout invariants', () => {
width: widthOfHello,
start: { segmentIndex: 0, graphemeIndex: 0 },
end: { segmentIndex: 1, graphemeIndex: 0 },
sourceOffset: 0,
sourceLength: 5,
}])
})

Expand All @@ -430,6 +443,22 @@ describe('layout invariants', () => {
expect(rich.lines.map(line => line.text).join('')).toBe('Superlongword')
expect(rich.lines[0]!.start).toEqual({ segmentIndex: 0, graphemeIndex: 0 })
expect(rich.lines.at(-1)!.end).toEqual({ segmentIndex: 1, graphemeIndex: 0 })
expect(cursorToSourceOffset(prepared, rich.lines[0]!.end)).toBe(rich.lines[0]!.sourceOffset + rich.lines[0]!.sourceLength)
})

test('rich lines preserve source spans through collapsed whitespace normalization', () => {
const source = ' foo bar '
const prepared = prepareWithSegments(source, FONT)
const lines = layoutWithLines(prepared, 200, LINE_HEIGHT)
expect(lines.lines).toHaveLength(1)
expect(lines.lines[0]!.text).toBe('foo bar')
expect(lines.lines[0]!.sourceOffset).toBe(2)
expect(lines.lines[0]!.sourceLength).toBe(9)
expect(source.slice(lines.lines[0]!.sourceOffset, lines.lines[0]!.sourceOffset + lines.lines[0]!.sourceLength)).toBe('foo bar')
expect(cursorRangeToSourceSpan(prepared, lines.lines[0]!.start, lines.lines[0]!.end)).toEqual({
sourceOffset: 2,
sourceLength: 9,
})
})

test('mixed-direction text is a stable smoke test', () => {
Expand Down Expand Up @@ -587,13 +616,15 @@ describe('layout invariants', () => {
start: { segmentIndex: number, graphemeIndex: number }
end: { segmentIndex: number, graphemeIndex: number }
}> = []
const sourceSpans: Array<{ sourceOffset: number, sourceLength: number }> = []

const lineCount = walkLineRanges(prepared, width, line => {
actual.push({
width: line.width,
start: { ...line.start },
end: { ...line.end },
})
sourceSpans.push(cursorRangeToSourceSpan(prepared, line.start, line.end))
})

expect(lineCount).toBe(expected.lineCount)
Expand All @@ -602,6 +633,10 @@ describe('layout invariants', () => {
start: line.start,
end: line.end,
})))
expect(sourceSpans).toEqual(expected.lines.map(line => ({
sourceOffset: line.sourceOffset,
sourceLength: line.sourceLength,
})))
})

test('countPreparedLines stays aligned with the walked line counter', () => {
Expand Down
Loading