From 7376604fb3d5b0d39e8b8aa1e1f26a8afda6a254 Mon Sep 17 00:00:00 2001 From: yiming Date: Mon, 30 Mar 2026 18:36:15 +0800 Subject: [PATCH] perf: optimize line-break calculations with pre-computed effectiveMaxWidth - Replace canBreakAfter() positive check with negative check (3 comparisons vs 5) - Remove unused isSimpleCollapsibleSpace() helper - Pre-compute effectiveMaxWidth = maxWidth + lineFitEpsilon to avoid repeated arithmetic - Update fitSoftHyphenBreak() signature to accept effectiveMaxWidth directly - Replace all 'maxWidth + lineFitEpsilon' comparisons with effectiveMaxWidth variable - Inline isSimpleCollapsibleSpace() check as 'kind === "space"' These micro-optimizations reduce per-call overhead in hot paths (layout, walkPreparedLines). --- .claude/settings.local.json | 23 + package-lock.json | 548 ++++++++++++++ src/analysis.ts | 1116 +++++++++++++++++---------- src/bidi.ts | 367 +++++---- src/line-break.ts | 1418 +++++++++++++++++------------------ 5 files changed, 2182 insertions(+), 1290 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 package-lock.json diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 00000000..7c85b405 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,23 @@ +{ + "permissions": { + "allow": [ + "mcp__github__issue_read", + "Bash(git fetch:*)", + "Bash(git remote:*)", + "Bash(git checkout:*)", + "Bash(npm test:*)", + "Bash(npm run:*)", + "Bash(bun install:*)", + "Bash(npm install:*)", + "Bash(npx tsc:*)", + "Bash(git add:*)", + "Bash(git commit -m \"perf: optimize line-break calculations with pre-computed effectiveMaxWidth:*)", + "Bash(git push:*)", + "mcp__github__fork_repository", + "Bash(git config:*)", + "mcp__github__get_me", + "mcp__github__create_pull_request", + "Bash(git commit:*)" + ] + } +} diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..26125489 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,548 @@ +{ + "name": "@chenglou/pretext", + "version": "0.0.3", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@chenglou/pretext", + "version": "0.0.3", + "license": "MIT", + "devDependencies": { + "@types/bun": "latest", + "oxlint": "^1.51.0", + "oxlint-tsgolint": "^0.15.0", + "tsgolint": "^0.0.1", + "typescript": "6.0.2" + } + }, + "node_modules/@oxlint-tsgolint/darwin-arm64": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@oxlint-tsgolint/darwin-arm64/-/darwin-arm64-0.15.0.tgz", + "integrity": "sha512-d7Ch+A6hic+RYrm32+Gh1o4lOrQqnFsHi721ORdHUDBiQPea+dssKUEMwIbA6MKmCy6TVJ02sQyi24OEfCiGzw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@oxlint-tsgolint/darwin-x64": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@oxlint-tsgolint/darwin-x64/-/darwin-x64-0.15.0.tgz", + "integrity": "sha512-Aoai2wAkaUJqp/uEs1gml6TbaPW4YmyO5Ai/vOSkiizgHqVctjhjKqmRiWTX2xuPY94VkwOLqp+Qr3y/0qSpWQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@oxlint-tsgolint/linux-arm64": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@oxlint-tsgolint/linux-arm64/-/linux-arm64-0.15.0.tgz", + "integrity": "sha512-4og13a7ec4Vku5t2Y7s3zx6YJP6IKadb1uA9fOoRH6lm/wHWoCnxjcfJmKHXRZJII81WmbdJMSPxaBfwN/S68Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@oxlint-tsgolint/linux-x64": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@oxlint-tsgolint/linux-x64/-/linux-x64-0.15.0.tgz", + "integrity": "sha512-9b9xzh/1Harn3a+XiKTK/8LrWw3VcqLfYp/vhV5/zAVR2Mt0d63WSp4FL+wG7DKnI2T/CbMFUFHwc7kCQjDMzQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@oxlint-tsgolint/win32-arm64": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@oxlint-tsgolint/win32-arm64/-/win32-arm64-0.15.0.tgz", + "integrity": "sha512-nNac5hewHdkk5mowOwTqB1ZD76zB/FsUiyUvdCyupq5cG54XyKqSLEp9QGbx7wFJkWCkeWmuwRed4sfpAlKaeA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@oxlint-tsgolint/win32-x64": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@oxlint-tsgolint/win32-x64/-/win32-x64-0.15.0.tgz", + "integrity": "sha512-ioAY2XLpy83E2EqOLH9p1cEgj0G2qB1lmAn0a3yFV1jHQB29LIPIKGNsu/tYCClpwmHN79pT5KZAHZOgWxxqNg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@oxlint/binding-android-arm-eabi": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-android-arm-eabi/-/binding-android-arm-eabi-1.57.0.tgz", + "integrity": "sha512-C7EiyfAJG4B70496eV543nKiq5cH0o/xIh/ufbjQz3SIvHhlDDsyn+mRFh+aW8KskTyUpyH2LGWL8p2oN6bl1A==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-android-arm64": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-android-arm64/-/binding-android-arm64-1.57.0.tgz", + "integrity": "sha512-9i80AresjZ/FZf5xK8tKFbhQnijD4s1eOZw6/FHUwD59HEZbVLRc2C88ADYJfLZrF5XofWDiRX/Ja9KefCLy7w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-darwin-arm64": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-darwin-arm64/-/binding-darwin-arm64-1.57.0.tgz", + "integrity": "sha512-0eUfhRz5L2yKa9I8k3qpyl37XK3oBS5BvrgdVIx599WZK63P8sMbg+0s4IuxmIiZuBK68Ek+Z+gcKgeYf0otsg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-darwin-x64": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-darwin-x64/-/binding-darwin-x64-1.57.0.tgz", + "integrity": "sha512-UvrSuzBaYOue+QMAcuDITe0k/Vhj6KZGjfnI6x+NkxBTke/VoM7ZisaxgNY0LWuBkTnd1OmeQfEQdQ48fRjkQg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-freebsd-x64": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-freebsd-x64/-/binding-freebsd-x64-1.57.0.tgz", + "integrity": "sha512-wtQq0dCoiw4bUwlsNVDJJ3pxJA218fOezpgtLKrbQqUtQJcM9yP8z+I9fu14aHg0uyAxIY+99toL6uBa2r7nxA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-arm-gnueabihf": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.57.0.tgz", + "integrity": "sha512-qxFWl2BBBFcT4djKa+OtMdnLgoHEJXpqjyGwz8OhW35ImoCwR5qtAGqApNYce5260FQqoAHW8S8eZTjiX67Tsg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-arm-musleabihf": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-arm-musleabihf/-/binding-linux-arm-musleabihf-1.57.0.tgz", + "integrity": "sha512-SQoIsBU7J0bDW15/f0/RvxHfY3Y0+eB/caKBQtNFbuerTiA6JCYx9P1MrrFTwY2dTm/lMgTSgskvCEYk2AtG/Q==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-arm64-gnu": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.57.0.tgz", + "integrity": "sha512-jqxYd1W6WMeozsCmqe9Rzbu3SRrGTyGDAipRlRggetyYbUksJqJKvUNTQtZR/KFoJPb+grnSm5SHhdWrywv3RQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-arm64-musl": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.57.0.tgz", + "integrity": "sha512-i66WyEPVEvq9bxRUCJ/MP5EBfnTDN3nhwEdFZFTO5MmLLvzngfWEG3NSdXQzTT3vk5B9i6C2XSIYBh+aG6uqyg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-ppc64-gnu": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.57.0.tgz", + "integrity": "sha512-oMZDCwz4NobclZU3pH+V1/upVlJZiZvne4jQP+zhJwt+lmio4XXr4qG47CehvrW1Lx2YZiIHuxM2D4YpkG3KVA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-riscv64-gnu": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-riscv64-gnu/-/binding-linux-riscv64-gnu-1.57.0.tgz", + "integrity": "sha512-uoBnjJ3MMEBbfnWC1jSFr7/nSCkcQYa72NYoNtLl1imshDnWSolYCjzb8LVCwYCCfLJXD+0gBLD7fyC14c0+0g==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-riscv64-musl": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-riscv64-musl/-/binding-linux-riscv64-musl-1.57.0.tgz", + "integrity": "sha512-BdrwD7haPZ8a9KrZhKJRSj6jwCor+Z8tHFZ3PT89Y3Jq5v3LfMfEePeAmD0LOTWpiTmzSzdmyw9ijneapiVHKQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-s390x-gnu": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.57.0.tgz", + "integrity": "sha512-BNs+7ZNsRstVg2tpNxAXfMX/Iv5oZh204dVyb8Z37+/gCh+yZqNTlg6YwCLIMPSk5wLWIGOaQjT0GUOahKYImw==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-x64-gnu": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.57.0.tgz", + "integrity": "sha512-AghS18w+XcENcAX0+BQGLiqjpqpaxKJa4cWWP0OWNLacs27vHBxu7TYkv9LUSGe5w8lOJHeMxcYfZNOAPqw2bg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-linux-x64-musl": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-linux-x64-musl/-/binding-linux-x64-musl-1.57.0.tgz", + "integrity": "sha512-E/FV3GB8phu/Rpkhz5T96hAiJlGzn91qX5yj5gU754P5cmVGXY1Jw/VSjDSlZBCY3VHjsVLdzgdkJaomEmcNOg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-openharmony-arm64": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-openharmony-arm64/-/binding-openharmony-arm64-1.57.0.tgz", + "integrity": "sha512-xvZ2yZt0nUVfU14iuGv3V25jpr9pov5N0Wr28RXnHFxHCRxNDMtYPHV61gGLhN9IlXM96gI4pyYpLSJC5ClLCQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-win32-arm64-msvc": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.57.0.tgz", + "integrity": "sha512-Z4D8Pd0AyHBKeazhdIXeUUy5sIS3Mo0veOlzlDECg6PhRRKgEsBJCCV1n+keUZtQ04OP+i7+itS3kOykUyNhDg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-win32-ia32-msvc": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-win32-ia32-msvc/-/binding-win32-ia32-msvc-1.57.0.tgz", + "integrity": "sha512-StOZ9nFMVKvevicbQfql6Pouu9pgbeQnu60Fvhz2S6yfMaii+wnueLnqQ5I1JPgNF0Syew4voBlAaHD13wH6tw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxlint/binding-win32-x64-msvc": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@oxlint/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.57.0.tgz", + "integrity": "sha512-6PuxhYgth8TuW0+ABPOIkGdBYw+qYGxgIdXPHSVpiCDm+hqTTWCmC739St1Xni0DJBt8HnSHTG67i1y6gr8qrA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@types/bun": { + "version": "1.3.11", + "resolved": "https://registry.npmjs.org/@types/bun/-/bun-1.3.11.tgz", + "integrity": "sha512-5vPne5QvtpjGpsGYXiFyycfpDF2ECyPcTSsFBMa0fraoxiQyMJ3SmuQIGhzPg2WJuWxVBoxWJ2kClYTcw/4fAg==", + "dev": true, + "license": "MIT", + "dependencies": { + "bun-types": "1.3.11" + } + }, + "node_modules/@types/node": { + "version": "25.5.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.0.tgz", + "integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.18.0" + } + }, + "node_modules/bun-types": { + "version": "1.3.11", + "resolved": "https://registry.npmjs.org/bun-types/-/bun-types-1.3.11.tgz", + "integrity": "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/oxlint": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/oxlint/-/oxlint-1.57.0.tgz", + "integrity": "sha512-DGFsuBX5MFZX9yiDdtKjTrYPq45CZ8Fft6qCltJITYZxfwYjVdGf/6wycGYTACloauwIPxUnYhBVeZbHvleGhw==", + "dev": true, + "license": "MIT", + "bin": { + "oxlint": "bin/oxlint" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/sponsors/Boshen" + }, + "optionalDependencies": { + "@oxlint/binding-android-arm-eabi": "1.57.0", + "@oxlint/binding-android-arm64": "1.57.0", + "@oxlint/binding-darwin-arm64": "1.57.0", + "@oxlint/binding-darwin-x64": "1.57.0", + "@oxlint/binding-freebsd-x64": "1.57.0", + "@oxlint/binding-linux-arm-gnueabihf": "1.57.0", + "@oxlint/binding-linux-arm-musleabihf": "1.57.0", + "@oxlint/binding-linux-arm64-gnu": "1.57.0", + "@oxlint/binding-linux-arm64-musl": "1.57.0", + "@oxlint/binding-linux-ppc64-gnu": "1.57.0", + "@oxlint/binding-linux-riscv64-gnu": "1.57.0", + "@oxlint/binding-linux-riscv64-musl": "1.57.0", + "@oxlint/binding-linux-s390x-gnu": "1.57.0", + "@oxlint/binding-linux-x64-gnu": "1.57.0", + "@oxlint/binding-linux-x64-musl": "1.57.0", + "@oxlint/binding-openharmony-arm64": "1.57.0", + "@oxlint/binding-win32-arm64-msvc": "1.57.0", + "@oxlint/binding-win32-ia32-msvc": "1.57.0", + "@oxlint/binding-win32-x64-msvc": "1.57.0" + }, + "peerDependencies": { + "oxlint-tsgolint": ">=0.15.0" + }, + "peerDependenciesMeta": { + "oxlint-tsgolint": { + "optional": true + } + } + }, + "node_modules/oxlint-tsgolint": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/oxlint-tsgolint/-/oxlint-tsgolint-0.15.0.tgz", + "integrity": "sha512-iwvFmhKQVZzVTFygUVI4t2S/VKEm+Mqkw3jQRJwfDuTcUYI5LCIYzdO5Dbuv4mFOkXZCcXaRRh0m+uydB5xdqw==", + "dev": true, + "license": "MIT", + "bin": { + "tsgolint": "bin/tsgolint.js" + }, + "optionalDependencies": { + "@oxlint-tsgolint/darwin-arm64": "0.15.0", + "@oxlint-tsgolint/darwin-x64": "0.15.0", + "@oxlint-tsgolint/linux-arm64": "0.15.0", + "@oxlint-tsgolint/linux-x64": "0.15.0", + "@oxlint-tsgolint/win32-arm64": "0.15.0", + "@oxlint-tsgolint/win32-x64": "0.15.0" + } + }, + "node_modules/tsgolint": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/tsgolint/-/tsgolint-0.0.1.tgz", + "integrity": "sha512-cSh6jgqMsVrzaRipcTBDcfiUo3iTK92gukInY0eeFP14ICe1pZjBC+yL1rVfQSBR72ZaBizmwsqEI4g1eqx1Eg==", + "dev": true, + "license": "ISC" + }, + "node_modules/typescript": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.2.tgz", + "integrity": "sha512-bGdAIrZ0wiGDo5l8c++HWtbaNCWTS4UTv7RaTH/ThVIgjkveJt83m74bBHMJkuCbslY8ixgLBVZJIOiQlQTjfQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "7.18.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", + "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/src/analysis.ts b/src/analysis.ts index a22d881e..96d4417b 100644 --- a/src/analysis.ts +++ b/src/analysis.ts @@ -10,13 +10,6 @@ export type SegmentBreakKind = | 'soft-hyphen' | 'hard-break' -type SegmentationPiece = { - text: string - isWordLike: boolean - kind: SegmentBreakKind - start: number -} - export type MergedSegmentation = { len: number texts: string[] @@ -46,11 +39,12 @@ type WhiteSpaceProfile = { preserveHardBreaks: boolean } +// Pre-cached profiles — avoids allocating a new object per analyzeText() call. +const WS_PROFILE_NORMAL: WhiteSpaceProfile = { mode: 'normal', preserveOrdinarySpaces: false, preserveHardBreaks: false } +const WS_PROFILE_PRE_WRAP: WhiteSpaceProfile = { mode: 'pre-wrap', preserveOrdinarySpaces: true, preserveHardBreaks: true } + function getWhiteSpaceProfile(whiteSpace?: WhiteSpaceMode): WhiteSpaceProfile { - const mode = whiteSpace ?? 'normal' - return mode === 'pre-wrap' - ? { mode, preserveOrdinarySpaces: true, preserveHardBreaks: true } - : { mode, preserveOrdinarySpaces: false, preserveHardBreaks: false } + return (whiteSpace ?? 'normal') === 'pre-wrap' ? WS_PROFILE_PRE_WRAP : WS_PROFILE_NORMAL } export function normalizeWhitespaceNormal(text: string): string { @@ -94,34 +88,85 @@ export function setAnalysisLocale(locale?: string): void { sharedWordSegmenter = null } -const arabicScriptRe = /\p{Script=Arabic}/u const combiningMarkRe = /\p{M}/u const decimalDigitRe = /\p{Nd}/u +// Fast charCode-based test for Arabic script characters. +// Covers the main Arabic BMP blocks; avoids regex for common text. +function isArabicScriptCharCode(c: number): boolean { + return (c >= 0x0600 && c <= 0x06FF) || // Arabic + (c >= 0x0750 && c <= 0x077F) || // Arabic Supplement + (c >= 0x08A0 && c <= 0x08FF) || // Arabic Extended-A + (c >= 0xFB50 && c <= 0xFDFF) || // Arabic Presentation Forms-A + (c >= 0xFE70 && c <= 0xFEFF) // Arabic Presentation Forms-B +} + function containsArabicScript(text: string): boolean { - return arabicScriptRe.test(text) + for (let i = 0; i < text.length; i++) { + if (isArabicScriptCharCode(text.charCodeAt(i))) return true + } + return false +} + +// Fast charCode-based test for combining marks. +// Covers the most common BMP combining mark ranges used in +// Arabic, Devanagari, Thai, Myanmar, and Latin text. +function isCombiningMark(ch: string): boolean { + const c = ch.charCodeAt(0) + // Fast path: most common combining mark ranges + if ((c >= 0x0300 && c <= 0x036F) || // Combining Diacritical Marks + (c >= 0x0610 && c <= 0x061A) || // Arabic combining above + (c >= 0x064B && c <= 0x065F) || // Arabic tashkeel + c === 0x0670 || // Arabic superscript alef + (c >= 0x06D6 && c <= 0x06ED) || // Arabic extended combining + (c >= 0x0900 && c <= 0x0903) || // Devanagari combining + (c >= 0x093A && c <= 0x094F) || // Devanagari vowel signs + (c >= 0x0951 && c <= 0x0957) || // Devanagari stress marks + c === 0x0962 || c === 0x0963 || // Devanagari vowel sign vocalic + c === 0x0E31 || // Thai combining + (c >= 0x0E34 && c <= 0x0E3A) || // Thai combining vowels + (c >= 0x0E47 && c <= 0x0E4E) || // Thai combining marks + c === 0x1039 || c === 0x103A || // Myanmar virama + (c >= 0x103B && c <= 0x103E) || // Myanmar medials + (c >= 0xFE20 && c <= 0xFE2F)) { // Combining Half Marks + return true + } + // Fallback to regex for rare/extended combining marks + return combiningMarkRe.test(ch) } export function isCJK(s: string): boolean { - for (const ch of s) { - const c = ch.codePointAt(0)! + const len = s.length + for (let i = 0; i < len; i++) { + const c = s.charCodeAt(i) + // BMP CJK ranges (most common) if ((c >= 0x4E00 && c <= 0x9FFF) || (c >= 0x3400 && c <= 0x4DBF) || - (c >= 0x20000 && c <= 0x2A6DF) || - (c >= 0x2A700 && c <= 0x2B73F) || - (c >= 0x2B740 && c <= 0x2B81F) || - (c >= 0x2B820 && c <= 0x2CEAF) || - (c >= 0x2CEB0 && c <= 0x2EBEF) || - (c >= 0x30000 && c <= 0x3134F) || - (c >= 0xF900 && c <= 0xFAFF) || - (c >= 0x2F800 && c <= 0x2FA1F) || (c >= 0x3000 && c <= 0x303F) || (c >= 0x3040 && c <= 0x309F) || (c >= 0x30A0 && c <= 0x30FF) || (c >= 0xAC00 && c <= 0xD7AF) || - (c >= 0xFF00 && c <= 0xFFEF)) { + (c >= 0xFF00 && c <= 0xFFEF) || + (c >= 0xF900 && c <= 0xFAFF)) { return true } + // Surrogate pair → decode astral code point + if (c >= 0xD800 && c <= 0xDBFF && i + 1 < len) { + const lo = s.charCodeAt(i + 1) + if (lo >= 0xDC00 && lo <= 0xDFFF) { + const cp = ((c - 0xD800) << 10) + (lo - 0xDC00) + 0x10000 + if ((cp >= 0x20000 && cp <= 0x2A6DF) || + (cp >= 0x2A700 && cp <= 0x2B73F) || + (cp >= 0x2B740 && cp <= 0x2B81F) || + (cp >= 0x2B820 && cp <= 0x2CEAF) || + (cp >= 0x2CEB0 && cp <= 0x2EBEF) || + (cp >= 0x30000 && cp <= 0x3134F) || + (cp >= 0x2F800 && cp <= 0x2FA1F)) { + return true + } + i++ // skip low surrogate + } + } } return false } @@ -201,9 +246,7 @@ const arabicNoSpaceTrailingPunctuation = new Set([ '\u061B', ]) -const myanmarMedialGlue = new Set([ - '\u104F', -]) + const closingQuoteChars = new Set([ '”', '’', '»', '›', @@ -217,38 +260,58 @@ const closingQuoteChars = new Set([ ]) function isLeftStickyPunctuationSegment(segment: string): boolean { + // Fast path: single-char punctuation (most common case) + if (segment.length === 1) return leftStickyPunctuation.has(segment) + if (segment.length === 0) return false + // Check for escaped quote cluster pattern if (isEscapedQuoteClusterSegment(segment)) return true + // Multi-char: all chars must be punctuation or combining marks after punctuation let sawPunctuation = false - for (const ch of segment) { + for (let i = 0; i < segment.length; i++) { + const ch = segment[i]! if (leftStickyPunctuation.has(ch)) { sawPunctuation = true continue } - if (sawPunctuation && combiningMarkRe.test(ch)) continue + if (sawPunctuation && isCombiningMark(ch)) continue return false } return sawPunctuation } function isCJKLineStartProhibitedSegment(segment: string): boolean { - for (const ch of segment) { + for (let i = 0; i < segment.length; i++) { + const ch = segment[i]! if (!kinsokuStart.has(ch) && !leftStickyPunctuation.has(ch)) return false } return segment.length > 0 } function isForwardStickyClusterSegment(segment: string): boolean { + if (segment.length === 0) return false + // Fast path: single-char segments + if (segment.length === 1) { + return kinsokuEnd.has(segment) || forwardStickyGlue.has(segment) + } if (isEscapedQuoteClusterSegment(segment)) return true - for (const ch of segment) { - if (!kinsokuEnd.has(ch) && !forwardStickyGlue.has(ch) && !combiningMarkRe.test(ch)) return false + for (let i = 0; i < segment.length; i++) { + const ch = segment[i]! + if (!kinsokuEnd.has(ch) && !forwardStickyGlue.has(ch) && !isCombiningMark(ch)) return false } - return segment.length > 0 + return true } function isEscapedQuoteClusterSegment(segment: string): boolean { + if (segment.length === 0) return false + // Quick reject: first char must be \ or a quote/bracket character + const first = segment[0]! + if (first !== '\\' && !kinsokuEnd.has(first) && !leftStickyPunctuation.has(first) && !forwardStickyGlue.has(first)) { + return false + } let sawQuote = false - for (const ch of segment) { - if (ch === '\\' || combiningMarkRe.test(ch)) continue + for (let i = 0; i < segment.length; i++) { + const ch = segment[i]! + if (ch === '\\' || isCombiningMark(ch)) continue if (kinsokuEnd.has(ch) || leftStickyPunctuation.has(ch) || forwardStickyGlue.has(ch)) { sawQuote = true continue @@ -259,12 +322,20 @@ function isEscapedQuoteClusterSegment(segment: string): boolean { } function splitTrailingForwardStickyCluster(text: string): { head: string, tail: string } | null { - const chars = Array.from(text) - let splitIndex = chars.length + let splitIndex = text.length while (splitIndex > 0) { - const ch = chars[splitIndex - 1]! - if (combiningMarkRe.test(ch)) { + const code = text.charCodeAt(splitIndex - 1) + // Skip low surrogates — if previous char is a high surrogate, skip both + if (code >= 0xDC00 && code <= 0xDFFF && splitIndex >= 2) { + const hi = text.charCodeAt(splitIndex - 2) + if (hi >= 0xD800 && hi <= 0xDBFF) { + // This is an astral character — not a combining mark or kinsoku/glue char + break + } + } + const ch = text[splitIndex - 1]! + if (isCombiningMark(ch)) { splitIndex-- continue } @@ -275,35 +346,41 @@ function splitTrailingForwardStickyCluster(text: string): { head: string, tail: break } - if (splitIndex <= 0 || splitIndex === chars.length) return null + if (splitIndex <= 0 || splitIndex === text.length) return null return { - head: chars.slice(0, splitIndex).join(''), - tail: chars.slice(splitIndex).join(''), + head: text.slice(0, splitIndex), + tail: text.slice(splitIndex), } } function isRepeatedSingleCharRun(segment: string, ch: string): boolean { if (segment.length === 0) return false - for (const part of segment) { - if (part !== ch) return false + for (let i = 0; i < segment.length; i++) { + if (segment[i] !== ch) return false } return true } function endsWithArabicNoSpacePunctuation(segment: string): boolean { - if (!containsArabicScript(segment) || segment.length === 0) return false - return arabicNoSpaceTrailingPunctuation.has(segment[segment.length - 1]!) -} - -function endsWithMyanmarMedialGlue(segment: string): boolean { if (segment.length === 0) return false - return myanmarMedialGlue.has(segment[segment.length - 1]!) + // Check cheap trailing-char condition first, before the full Arabic scan + if (!arabicNoSpaceTrailingPunctuation.has(segment[segment.length - 1]!)) return false + return containsArabicScript(segment) } + function splitLeadingSpaceAndMarks(segment: string): { space: string, marks: string } | null { if (segment.length < 2 || segment[0] !== ' ') return null const marks = segment.slice(1) - if (/^\p{M}+$/u.test(marks)) { + // Check all characters are combining marks using fast isCombiningMark + let allMarks = true + for (let i = 0; i < marks.length; i++) { + if (!isCombiningMark(marks[i]!)) { + allMarks = false + break + } + } + if (allMarks) { return { space: ' ', marks } } return null @@ -318,70 +395,86 @@ export function endsWithClosingQuote(text: string): boolean { return false } -function classifySegmentBreakChar(ch: string, whiteSpaceProfile: WhiteSpaceProfile): SegmentBreakKind { - if (whiteSpaceProfile.preserveOrdinarySpaces || whiteSpaceProfile.preserveHardBreaks) { - if (ch === ' ') return 'preserved-space' - if (ch === '\t') return 'tab' - if (whiteSpaceProfile.preserveHardBreaks && ch === '\n') return 'hard-break' +function classifySegmentBreakCharCode(code: number, whiteSpaceProfile: WhiteSpaceProfile): SegmentBreakKind { + if (code === 0x20) { // space + return whiteSpaceProfile.preserveOrdinarySpaces ? 'preserved-space' : 'space' + } + if (code === 0x09) { // tab + return whiteSpaceProfile.preserveOrdinarySpaces ? 'tab' : 'text' } - if (ch === ' ') return 'space' - if (ch === '\u00A0' || ch === '\u202F' || ch === '\u2060' || ch === '\uFEFF') { + if (code === 0x0A) { // newline + return whiteSpaceProfile.preserveHardBreaks ? 'hard-break' : 'text' + } + if (code === 0x00A0 || code === 0x202F || code === 0x2060 || code === 0xFEFF) { return 'glue' } - if (ch === '\u200B') return 'zero-width-break' - if (ch === '\u00AD') return 'soft-hyphen' + if (code === 0x200B) return 'zero-width-break' + if (code === 0x00AD) return 'soft-hyphen' return 'text' } -function splitSegmentByBreakKind( +// Fast check: does this segment contain any break-kind special characters? +// If not, the whole segment is a single 'text' piece and forEachBreakKindPiece can be skipped. +function segmentNeedsSplitting(segment: string): boolean { + for (let i = 0; i < segment.length; i++) { + const c = segment.charCodeAt(i) + if (c === 0x20 || c === 0x09 || c === 0x0A || // space, tab, newline + c === 0x00A0 || c === 0x202F || c === 0x2060 || c === 0xFEFF || // NBSP, NNBSP, WJ, BOM + c === 0x200B || c === 0x00AD) { // ZWSP, SHY + return true + } + } + return false +} + +// Segment splitting — emits pieces directly to a MergeBuilder. +// Avoids allocating a pieces array + piece objects and function-call overhead for callbacks. +function forEachBreakKindPiece( segment: string, - isWordLike: boolean, start: number, whiteSpaceProfile: WhiteSpaceProfile, -): SegmentationPiece[] { - const pieces: SegmentationPiece[] = [] + builder: MergeBuilder, +): void { let currentKind: SegmentBreakKind | null = null - let currentText = '' + let runStart = 0 let currentStart = start let currentWordLike = false - let offset = 0 + const len = segment.length + + for (let i = 0; i < len; i++) { + let code = segment.charCodeAt(i) + let charLen = 1 + // Handle surrogate pairs — astral chars are always 'text' + if (code >= 0xD800 && code <= 0xDBFF && i + 1 < len) { + const lo = segment.charCodeAt(i + 1) + if (lo >= 0xDC00 && lo <= 0xDFFF) { + code = 0x10000 // sentinel: any astral char → 'text' + charLen = 2 + } + } - for (const ch of segment) { - const kind = classifySegmentBreakChar(ch, whiteSpaceProfile) - const wordLike = kind === 'text' && isWordLike + const kind = classifySegmentBreakCharCode(code, whiteSpaceProfile) + const wordLike = kind === 'text' && builder.lastWordLike if (currentKind !== null && kind === currentKind && wordLike === currentWordLike) { - currentText += ch - offset += ch.length + i += charLen - 1 // skip low surrogate if pair continue } if (currentKind !== null) { - pieces.push({ - text: currentText, - isWordLike: currentWordLike, - kind: currentKind, - start: currentStart, - }) + builder.addPiece(segment.slice(runStart, i), currentWordLike, currentKind, currentStart) } currentKind = kind - currentText = ch - currentStart = start + offset + runStart = i + currentStart = start + i currentWordLike = wordLike - offset += ch.length + i += charLen - 1 // skip low surrogate if pair } if (currentKind !== null) { - pieces.push({ - text: currentText, - isWordLike: currentWordLike, - kind: currentKind, - start: currentStart, - }) + builder.addPiece(segment.slice(runStart), currentWordLike, currentKind!, currentStart) } - - return pieces } function isTextRunBoundary(kind: SegmentBreakKind): boolean { @@ -410,29 +503,30 @@ function isUrlQueryBoundarySegment(text: string): boolean { return text.includes('?') && (text.includes('://') || text.startsWith('www.')) } -function mergeUrlLikeRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts = segmentation.texts.slice() - const isWordLike = segmentation.isWordLike.slice() - const kinds = segmentation.kinds.slice() - const starts = segmentation.starts.slice() +function mergeUrlLikeRunsInPlace(seg: MergedSegmentation): void { + const texts = seg.texts + const isWordLike = seg.isWordLike + const kinds = seg.kinds + const starts = seg.starts + let len = seg.len - for (let i = 0; i < segmentation.len; i++) { - if (kinds[i] !== 'text' || !isUrlLikeRunStart(segmentation, i)) continue + for (let i = 0; i < len; i++) { + if (kinds[i] !== 'text' || !isUrlLikeRunStart(seg, i)) continue let j = i + 1 - while (j < segmentation.len && !isTextRunBoundary(kinds[j]!)) { + while (j < len && !isTextRunBoundary(kinds[j]!)) { texts[i] += texts[j]! isWordLike[i] = true const endsQueryPrefix = texts[j]!.includes('?') - kinds[j] = 'text' texts[j] = '' j++ if (endsQueryPrefix) break } } + // Compact out empty entries let compactLen = 0 - for (let read = 0; read < texts.length; read++) { + for (let read = 0; read < len; read++) { const text = texts[read]! if (text.length === 0) continue if (compactLen !== read) { @@ -448,145 +542,172 @@ function mergeUrlLikeRuns(segmentation: MergedSegmentation): MergedSegmentation isWordLike.length = compactLen kinds.length = compactLen starts.length = compactLen - - return { - len: compactLen, - texts, - isWordLike, - kinds, - starts, - } + seg.len = compactLen } -function mergeUrlQueryRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] +function mergeUrlQueryRunsInPlace(seg: MergedSegmentation): void { + const texts = seg.texts + const isWordLike = seg.isWordLike + const kinds = seg.kinds + const starts = seg.starts + let len = seg.len + + // We scan for URL query boundaries and merge subsequent runs. + // Since merging can only reduce segments (or insert one merged query), + // we can use a read/write cursor on the same arrays. + // However, query merging could in theory need insertion. But actually + // each query merge consumes N input segments and produces 1 output segment, + // so write <= read always holds. We can do this in-place with a compacting pass. + + let write = 0 + for (let i = 0; i < len; i++) { + const text = texts[i]! + + if (isUrlQueryBoundarySegment(text)) { + // Copy current segment + if (write !== i) { + texts[write] = text + isWordLike[write] = isWordLike[i]! + kinds[write] = kinds[i]! + starts[write] = starts[i]! + } + write++ - for (let i = 0; i < segmentation.len; i++) { - const text = segmentation.texts[i]! - texts.push(text) - isWordLike.push(segmentation.isWordLike[i]!) - kinds.push(segmentation.kinds[i]!) - starts.push(segmentation.starts[i]!) - - if (!isUrlQueryBoundarySegment(text)) continue - - const nextIndex = i + 1 - if ( - nextIndex >= segmentation.len || - isTextRunBoundary(segmentation.kinds[nextIndex]!) - ) { - continue - } + const nextIndex = i + 1 + if ( + nextIndex >= len || + isTextRunBoundary(kinds[nextIndex]!) + ) { + continue + } - let queryText = '' - const queryStart = segmentation.starts[nextIndex]! - let j = nextIndex - while (j < segmentation.len && !isTextRunBoundary(segmentation.kinds[j]!)) { - queryText += segmentation.texts[j]! - j++ - } + // Merge subsequent non-boundary segments into one query segment + let queryText = '' + const queryStart = starts[nextIndex]! + let j = nextIndex + while (j < len && !isTextRunBoundary(kinds[j]!)) { + queryText += texts[j]! + j++ + } - if (queryText.length > 0) { - texts.push(queryText) - isWordLike.push(true) - kinds.push('text') - starts.push(queryStart) - i = j - 1 + if (queryText.length > 0) { + texts[write] = queryText + isWordLike[write] = true + kinds[write] = 'text' + starts[write] = queryStart + write++ + i = j - 1 + } + } else { + if (write !== i) { + texts[write] = text + isWordLike[write] = isWordLike[i]! + kinds[write] = kinds[i]! + starts[write] = starts[i]! + } + write++ } } - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } + texts.length = write + isWordLike.length = write + kinds.length = write + starts.length = write + seg.len = write } -const numericJoinerChars = new Set([ - ':', '-', '/', '×', ',', '.', '+', - '\u2013', - '\u2014', -]) - const asciiPunctuationChainSegmentRe = /^[A-Za-z0-9_]+[,:;]*$/ const asciiPunctuationChainTrailingJoinersRe = /[,:;]+$/ function segmentContainsDecimalDigit(text: string): boolean { - for (const ch of text) { - if (decimalDigitRe.test(ch)) return true + for (let i = 0; i < text.length; i++) { + const c = text.charCodeAt(i) + // ASCII digits 0-9 + if (c >= 0x30 && c <= 0x39) return true + // Common non-ASCII decimal digit ranges (Arabic-Indic, Devanagari, etc.) + if (c >= 0x0660 && decimalDigitRe.test(text[i]!)) return true } return false } function isNumericRunSegment(text: string): boolean { if (text.length === 0) return false - for (const ch of text) { - if (decimalDigitRe.test(ch) || numericJoinerChars.has(ch)) continue + for (let i = 0; i < text.length; i++) { + const c = text.charCodeAt(i) + // ASCII digits + if (c >= 0x30 && c <= 0x39) continue + // Numeric joiner chars by charCode + if (c === 0x3A || c === 0x2D || c === 0x2F || c === 0xD7 || // : - / × + c === 0x2C || c === 0x2E || c === 0x2B || // , . + + c === 0x2013 || c === 0x2014) continue // en-dash, em-dash + // Non-ASCII decimal digits + if (c >= 0x0660 && decimalDigitRe.test(text[i]!)) continue return false } return true } -function mergeNumericRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] +function mergeNumericRunsInPlace(seg: MergedSegmentation): void { + const texts = seg.texts + const isWordLike = seg.isWordLike + const kinds = seg.kinds + const starts = seg.starts + const len = seg.len - for (let i = 0; i < segmentation.len; i++) { - const text = segmentation.texts[i]! - const kind = segmentation.kinds[i]! + let write = 0 + for (let i = 0; i < len; i++) { + const text = texts[i]! + const kind = kinds[i]! if (kind === 'text' && isNumericRunSegment(text) && segmentContainsDecimalDigit(text)) { let mergedText = text let j = i + 1 while ( - j < segmentation.len && - segmentation.kinds[j] === 'text' && - isNumericRunSegment(segmentation.texts[j]!) + j < len && + kinds[j] === 'text' && + isNumericRunSegment(texts[j]!) ) { - mergedText += segmentation.texts[j]! + mergedText += texts[j]! j++ } - texts.push(mergedText) - isWordLike.push(true) - kinds.push('text') - starts.push(segmentation.starts[i]!) + texts[write] = mergedText + isWordLike[write] = true + kinds[write] = 'text' + starts[write] = starts[i]! + write++ i = j - 1 continue } - texts.push(text) - isWordLike.push(segmentation.isWordLike[i]!) - kinds.push(kind) - starts.push(segmentation.starts[i]!) + if (write !== i) { + texts[write] = text + isWordLike[write] = isWordLike[i]! + kinds[write] = kind + starts[write] = starts[i]! + } + write++ } - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } + texts.length = write + isWordLike.length = write + kinds.length = write + starts.length = write + seg.len = write } -function mergeAsciiPunctuationChains(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] +function mergeAsciiPunctuationChainsInPlace(seg: MergedSegmentation): void { + const texts = seg.texts + const isWordLike = seg.isWordLike + const kinds = seg.kinds + const starts = seg.starts + const len = seg.len - for (let i = 0; i < segmentation.len; i++) { - const text = segmentation.texts[i]! - const kind = segmentation.kinds[i]! - const wordLike = segmentation.isWordLike[i]! + let write = 0 + for (let i = 0; i < len; i++) { + const text = texts[i]! + const kind = kinds[i]! + const wordLike = isWordLike[i]! if (kind === 'text' && wordLike && asciiPunctuationChainSegmentRe.test(text)) { let mergedText = text @@ -594,47 +715,81 @@ function mergeAsciiPunctuationChains(segmentation: MergedSegmentation): MergedSe while ( asciiPunctuationChainTrailingJoinersRe.test(mergedText) && - j < segmentation.len && - segmentation.kinds[j] === 'text' && - segmentation.isWordLike[j] && - asciiPunctuationChainSegmentRe.test(segmentation.texts[j]!) + j < len && + kinds[j] === 'text' && + isWordLike[j] && + asciiPunctuationChainSegmentRe.test(texts[j]!) ) { - mergedText += segmentation.texts[j]! + mergedText += texts[j]! j++ } - texts.push(mergedText) - isWordLike.push(true) - kinds.push('text') - starts.push(segmentation.starts[i]!) + texts[write] = mergedText + isWordLike[write] = true + kinds[write] = 'text' + starts[write] = starts[i]! + write++ i = j - 1 continue } - texts.push(text) - isWordLike.push(wordLike) - kinds.push(kind) - starts.push(segmentation.starts[i]!) + if (write !== i) { + texts[write] = text + isWordLike[write] = wordLike + kinds[write] = kind + starts[write] = starts[i]! + } + write++ } - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } + texts.length = write + isWordLike.length = write + kinds.length = write + starts.length = write + seg.len = write } -function splitHyphenatedNumericRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] +function splitHyphenatedNumericRunsInPlace(seg: MergedSegmentation): void { + const texts = seg.texts + const isWordLike = seg.isWordLike + const kinds = seg.kinds + const starts = seg.starts + const len = seg.len + + // First pass: check if any splits are needed at all + let needsSplit = false + for (let i = 0; i < len; i++) { + if (kinds[i] === 'text' && texts[i]!.includes('-')) { + const text = texts[i]! + const parts = text.split('-') + if (parts.length > 1) { + let allNumeric = true + for (let j = 0; j < parts.length; j++) { + const part = parts[j]! + if (part.length === 0 || !segmentContainsDecimalDigit(part) || !isNumericRunSegment(part)) { + allNumeric = false + break + } + } + if (allNumeric) { + needsSplit = true + break + } + } + } + } - for (let i = 0; i < segmentation.len; i++) { - const text = segmentation.texts[i]! - if (segmentation.kinds[i] === 'text' && text.includes('-')) { + if (!needsSplit) return + + // Slow path: allocate new arrays only when splits exist + const newTexts: string[] = [] + const newWordLike: boolean[] = [] + const newKinds: SegmentBreakKind[] = [] + const newStarts: number[] = [] + + for (let i = 0; i < len; i++) { + const text = texts[i]! + if (kinds[i] === 'text' && text.includes('-')) { const parts = text.split('-') let shouldSplit = parts.length > 1 for (let j = 0; j < parts.length; j++) { @@ -654,64 +809,66 @@ function splitHyphenatedNumericRuns(segmentation: MergedSegmentation): MergedSeg for (let j = 0; j < parts.length; j++) { const part = parts[j]! const splitText = j < parts.length - 1 ? `${part}-` : part - texts.push(splitText) - isWordLike.push(true) - kinds.push('text') - starts.push(segmentation.starts[i]! + offset) + newTexts.push(splitText) + newWordLike.push(true) + newKinds.push('text') + newStarts.push(starts[i]! + offset) offset += splitText.length } continue } } - texts.push(text) - isWordLike.push(segmentation.isWordLike[i]!) - kinds.push(segmentation.kinds[i]!) - starts.push(segmentation.starts[i]!) + newTexts.push(text) + newWordLike.push(isWordLike[i]!) + newKinds.push(kinds[i]!) + newStarts.push(starts[i]!) } - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } + // Replace the arrays in the segmentation + seg.texts = newTexts + seg.isWordLike = newWordLike + seg.kinds = newKinds + seg.starts = newStarts + seg.len = newTexts.length } -function mergeGlueConnectedTextRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] +function mergeGlueConnectedTextRunsInPlace(seg: MergedSegmentation): void { + const texts = seg.texts + const isWordLike = seg.isWordLike + const kinds = seg.kinds + const starts = seg.starts + const len = seg.len + let write = 0 let read = 0 - while (read < segmentation.len) { - let text = segmentation.texts[read]! - let wordLike = segmentation.isWordLike[read]! - let kind = segmentation.kinds[read]! - let start = segmentation.starts[read]! + while (read < len) { + let text = texts[read]! + let wordLike = isWordLike[read]! + let kind = kinds[read]! + let start = starts[read]! if (kind === 'glue') { let glueText = text const glueStart = start read++ - while (read < segmentation.len && segmentation.kinds[read] === 'glue') { - glueText += segmentation.texts[read]! + while (read < len && kinds[read] === 'glue') { + glueText += texts[read]! read++ } - if (read < segmentation.len && segmentation.kinds[read] === 'text') { - text = glueText + segmentation.texts[read]! - wordLike = segmentation.isWordLike[read]! + if (read < len && kinds[read] === 'text') { + text = glueText + texts[read]! + wordLike = isWordLike[read]! kind = 'text' start = glueStart read++ } else { - texts.push(glueText) - isWordLike.push(false) - kinds.push('glue') - starts.push(glueStart) + texts[write] = glueText + isWordLike[write] = false + kinds[write] = 'glue' + starts[write] = glueStart + write++ continue } } else { @@ -719,16 +876,16 @@ function mergeGlueConnectedTextRuns(segmentation: MergedSegmentation): MergedSeg } if (kind === 'text') { - while (read < segmentation.len && segmentation.kinds[read] === 'glue') { + while (read < len && kinds[read] === 'glue') { let glueText = '' - while (read < segmentation.len && segmentation.kinds[read] === 'glue') { - glueText += segmentation.texts[read]! + while (read < len && kinds[read] === 'glue') { + glueText += texts[read]! read++ } - if (read < segmentation.len && segmentation.kinds[read] === 'text') { - text += glueText + segmentation.texts[read]! - wordLike = wordLike || segmentation.isWordLike[read]! + if (read < len && kinds[read] === 'text') { + text += glueText + texts[read]! + wordLike = wordLike || isWordLike[read]! read++ continue } @@ -737,28 +894,26 @@ function mergeGlueConnectedTextRuns(segmentation: MergedSegmentation): MergedSeg } } - texts.push(text) - isWordLike.push(wordLike) - kinds.push(kind) - starts.push(start) + texts[write] = text + isWordLike[write] = wordLike + kinds[write] = kind + starts[write] = start + write++ } - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } + texts.length = write + isWordLike.length = write + kinds.length = write + starts.length = write + seg.len = write } -function carryTrailingForwardStickyAcrossCJKBoundary(segmentation: MergedSegmentation): MergedSegmentation { - const texts = segmentation.texts.slice() - const isWordLike = segmentation.isWordLike.slice() - const kinds = segmentation.kinds.slice() - const starts = segmentation.starts.slice() +function carryTrailingForwardStickyAcrossCJKBoundaryInPlace(seg: MergedSegmentation): void { + const texts = seg.texts + const kinds = seg.kinds + const starts = seg.starts - for (let i = 0; i < texts.length - 1; i++) { + for (let i = 0; i < seg.len - 1; i++) { if (kinds[i] !== 'text' || kinds[i + 1] !== 'text') continue if (!isCJK(texts[i]!) || !isCJK(texts[i + 1]!)) continue @@ -769,16 +924,135 @@ function carryTrailingForwardStickyAcrossCJKBoundary(segmentation: MergedSegment texts[i + 1] = split.tail + texts[i + 1]! starts[i + 1] = starts[i]! + split.head.length } +} - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, +// Reusable builder avoids closure allocation per buildMergedSegmentation call. +// V8 sees a stable hidden class for the method dispatch. +// Content-presence flags allow skipping post-merge passes that can't fire. +class MergeBuilder { + texts: string[] = [] + isWordLike: boolean[] = [] + kinds: SegmentBreakKind[] = [] + starts: number[] = [] + len = 0 + carryCJK = false + lastWordLike = false + // Content-presence flags — set during addPiece, used to skip post-merge passes + hasGlue = false + hasCJK = false + hasArabicSpace = false + // Set when a non-word text segment is pushed that could be an escaped-quote or forward-sticky cluster. + // When false, the escaped-quote/forward-sticky/compact post-passes can be skipped entirely. + hasNonWordTextSegment = false + + reset(carryCJK: boolean): void { + // Create fresh arrays — previous arrays are owned by the returned MergedSegmentation + this.texts = [] + this.isWordLike = [] + this.kinds = [] + this.starts = [] + this.len = 0 + this.carryCJK = carryCJK + this.hasGlue = false + this.hasCJK = false + this.hasArabicSpace = false + this.hasNonWordTextSegment = false + } + + addPiece(pieceText: string, pieceWordLike: boolean, pieceKind: SegmentBreakKind, pieceStart: number): void { + const len = this.len + + // Track content presence for post-merge pass skipping + if (pieceKind === 'glue') { + this.hasGlue = true + } else if (pieceKind === 'space' || pieceKind === 'preserved-space') { + // Track if we have space + following Arabic pattern + this.hasArabicSpace = true + } + + // Fast path: try to merge into previous text segment + if (pieceKind === 'text' && len > 0 && this.kinds[len - 1] === 'text') { + const prevText = this.texts[len - 1]! + + if (pieceWordLike) { + // Word-like text piece — check Arabic no-space punctuation merge + if ( + containsArabicScript(pieceText) && + endsWithArabicNoSpacePunctuation(prevText) + ) { + this.texts[len - 1] += pieceText + this.isWordLike[len - 1] = true + return + } + } else { + // Non-word-like text piece — check left-sticky punctuation, repeated chars + if ( + isLeftStickyPunctuationSegment(pieceText) || + (pieceText === '-' && this.isWordLike[len - 1]!) + ) { + this.texts[len - 1] += pieceText + return + } + if ( + pieceText.length === 1 && + pieceText !== '-' && + pieceText !== '\u2014' && + isRepeatedSingleCharRun(prevText, pieceText) + ) { + this.texts[len - 1] += pieceText + return + } + } + + // CJK kinsoku: line-start prohibited merge + if ( + isCJKLineStartProhibitedSegment(pieceText) && + isCJK(prevText) + ) { + this.texts[len - 1] += pieceText + this.isWordLike[len - 1] = this.isWordLike[len - 1]! || pieceWordLike + this.hasCJK = true + return + } + + // CJK after closing quote (Chromium profile only) + if ( + this.carryCJK && + isCJK(pieceText) && + isCJK(prevText) && + endsWithClosingQuote(prevText) + ) { + this.texts[len - 1] += pieceText + this.isWordLike[len - 1] = this.isWordLike[len - 1]! || pieceWordLike + this.hasCJK = true + return + } + + // Myanmar medial glue — check last char code directly + if (prevText.charCodeAt(prevText.length - 1) === 0x104F) { + this.texts[len - 1] += pieceText + this.isWordLike[len - 1] = this.isWordLike[len - 1]! || pieceWordLike + return + } + } + + // Track non-word text segments that survive the merge loop — + // only these can trigger the escaped-quote/forward-sticky post-passes + if (pieceKind === 'text' && !pieceWordLike) { + this.hasNonWordTextSegment = true + } + + // No merge — push new segment + this.texts[len] = pieceText + this.isWordLike[len] = pieceWordLike + this.kinds[len] = pieceKind + this.starts[len] = pieceStart + this.len = len + 1 } } +// Module-level singleton — reused across calls. +const mergeBuilder = new MergeBuilder() function buildMergedSegmentation( normalized: string, @@ -786,161 +1060,165 @@ function buildMergedSegmentation( whiteSpaceProfile: WhiteSpaceProfile, ): MergedSegmentation { const wordSegmenter = getSharedWordSegmenter() - let mergedLen = 0 - const mergedTexts: string[] = [] - const mergedWordLike: boolean[] = [] - const mergedKinds: SegmentBreakKind[] = [] - const mergedStarts: number[] = [] - - for (const s of wordSegmenter.segment(normalized)) { - for (const piece of splitSegmentByBreakKind(s.segment, s.isWordLike ?? false, s.index, whiteSpaceProfile)) { - const isText = piece.kind === 'text' - - if ( - profile.carryCJKAfterClosingQuote && - isText && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - isCJK(piece.text) && - isCJK(mergedTexts[mergedLen - 1]!) && - endsWithClosingQuote(mergedTexts[mergedLen - 1]!) - ) { - mergedTexts[mergedLen - 1] += piece.text - mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike - } else if ( - isText && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - isCJKLineStartProhibitedSegment(piece.text) && - isCJK(mergedTexts[mergedLen - 1]!) - ) { - mergedTexts[mergedLen - 1] += piece.text - mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike - } else if ( - isText && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - endsWithMyanmarMedialGlue(mergedTexts[mergedLen - 1]!) - ) { - mergedTexts[mergedLen - 1] += piece.text - mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike - } else if ( - isText && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - piece.isWordLike && - containsArabicScript(piece.text) && - endsWithArabicNoSpacePunctuation(mergedTexts[mergedLen - 1]!) - ) { - mergedTexts[mergedLen - 1] += piece.text - mergedWordLike[mergedLen - 1] = true - } else if ( - isText && - !piece.isWordLike && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - piece.text.length === 1 && - piece.text !== '-' && - piece.text !== '—' && - isRepeatedSingleCharRun(mergedTexts[mergedLen - 1]!, piece.text) - ) { - mergedTexts[mergedLen - 1] += piece.text - } else if ( - isText && - !piece.isWordLike && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - ( - isLeftStickyPunctuationSegment(piece.text) || - (piece.text === '-' && mergedWordLike[mergedLen - 1]!) - ) - ) { - mergedTexts[mergedLen - 1] += piece.text - } else { - mergedTexts[mergedLen] = piece.text - mergedWordLike[mergedLen] = piece.isWordLike - mergedKinds[mergedLen] = piece.kind - mergedStarts[mergedLen] = piece.start - mergedLen++ + const builder = mergeBuilder + builder.reset(profile.carryCJKAfterClosingQuote) + + const segments = wordSegmenter.segment(normalized) + const iter = segments[Symbol.iterator]() + + // Pre-scan normalized text for content-presence flags. + // These are cheap string searches that allow skipping entire post-merge passes. + let hasUrlLikeContent = false + let hasDigit = false + let hasAsciiChainJoiner = false + for (let si = 0; si < normalized.length; si++) { + const c = normalized.charCodeAt(si) + if (!hasDigit && c >= 0x30 && c <= 0x39) hasDigit = true + if (!hasAsciiChainJoiner && (c === 0x3B || c === 0x2C) && si > 0) hasAsciiChainJoiner = true // ; or , (not leading) + if (!hasUrlLikeContent && c === 0x3A && si + 2 < normalized.length && // ':' + normalized.charCodeAt(si + 1) === 0x2F && normalized.charCodeAt(si + 2) === 0x2F) { // '://' + hasUrlLikeContent = true + } + if (hasDigit && hasAsciiChainJoiner && hasUrlLikeContent) break + } + // Also check for 'www.' prefix which doesn't need '://' + if (!hasUrlLikeContent && normalized.length >= 4) { + // Check if 'www.' appears at start or after a space + if (normalized.charCodeAt(0) === 0x77 && normalized.charCodeAt(1) === 0x77 && + normalized.charCodeAt(2) === 0x77 && normalized.charCodeAt(3) === 0x2E) { + hasUrlLikeContent = true + } else { + for (let si = 1; si < normalized.length - 4; si++) { + if (normalized.charCodeAt(si) === 0x20 && // space before 'www.' + normalized.charCodeAt(si + 1) === 0x77 && normalized.charCodeAt(si + 2) === 0x77 && + normalized.charCodeAt(si + 3) === 0x77 && normalized.charCodeAt(si + 4) === 0x2E) { + hasUrlLikeContent = true + break + } } } } + // Check for non-ASCII digits if no ASCII digit found + if (!hasDigit) { + for (let si = 0; si < normalized.length; si++) { + const c = normalized.charCodeAt(si) + if (c >= 0x0660 && c <= 0x0669) { hasDigit = true; break } // Arabic-Indic + if (c >= 0x0966 && c <= 0x096F) { hasDigit = true; break } // Devanagari + if (c >= 0x09E6 && c <= 0x09EF) { hasDigit = true; break } // Bengali + } + } - for (let i = 1; i < mergedLen; i++) { - if ( - mergedKinds[i] === 'text' && - !mergedWordLike[i]! && - isEscapedQuoteClusterSegment(mergedTexts[i]!) && - mergedKinds[i - 1] === 'text' - ) { - mergedTexts[i - 1] += mergedTexts[i]! - mergedWordLike[i - 1] = mergedWordLike[i - 1]! || mergedWordLike[i]! - mergedTexts[i] = '' + for (let r = iter.next(); !r.done; r = iter.next()) { + const s = r.value + const seg = s.segment + const wordLike = s.isWordLike === true + // Fast path: if segment has no special chars, emit as single 'text' piece + if (!segmentNeedsSplitting(seg)) { + builder.addPiece(seg, wordLike, 'text', s.index) + } else { + builder.lastWordLike = wordLike + forEachBreakKindPiece(seg, s.index, whiteSpaceProfile, builder) } } - for (let i = mergedLen - 2; i >= 0; i--) { - if (mergedKinds[i] === 'text' && !mergedWordLike[i]! && isForwardStickyClusterSegment(mergedTexts[i]!)) { - let j = i + 1 - while (j < mergedLen && mergedTexts[j] === '') j++ - if (j < mergedLen && mergedKinds[j] === 'text') { - mergedTexts[j] = mergedTexts[i]! + mergedTexts[j]! - mergedStarts[j] = mergedStarts[i]! + const mergedTexts = builder.texts + const mergedWordLike = builder.isWordLike + const mergedKinds = builder.kinds + const mergedStarts = builder.starts + let mergedLen = builder.len + + // Escaped-quote backward merge + forward-sticky carry + compact pass: + // Only needed when non-word text segments survived the initial merge loop. + if (builder.hasNonWordTextSegment) { + for (let i = 1; i < mergedLen; i++) { + if ( + mergedKinds[i] === 'text' && + !mergedWordLike[i]! && + isEscapedQuoteClusterSegment(mergedTexts[i]!) && + mergedKinds[i - 1] === 'text' + ) { + mergedTexts[i - 1] += mergedTexts[i]! + mergedWordLike[i - 1] = mergedWordLike[i - 1]! || mergedWordLike[i]! mergedTexts[i] = '' } } - } - let compactLen = 0 - for (let read = 0; read < mergedLen; read++) { - const text = mergedTexts[read]! - if (text.length === 0) continue - if (compactLen !== read) { - mergedTexts[compactLen] = text - mergedWordLike[compactLen] = mergedWordLike[read]! - mergedKinds[compactLen] = mergedKinds[read]! - mergedStarts[compactLen] = mergedStarts[read]! + for (let i = mergedLen - 2; i >= 0; i--) { + if (mergedKinds[i] === 'text' && !mergedWordLike[i]! && isForwardStickyClusterSegment(mergedTexts[i]!)) { + let j = i + 1 + while (j < mergedLen && mergedTexts[j] === '') j++ + if (j < mergedLen && mergedKinds[j] === 'text') { + mergedTexts[j] = mergedTexts[i]! + mergedTexts[j]! + mergedStarts[j] = mergedStarts[i]! + mergedTexts[i] = '' + } + } } - compactLen++ + + let compactLen = 0 + for (let read = 0; read < mergedLen; read++) { + const text = mergedTexts[read]! + if (text.length === 0) continue + if (compactLen !== read) { + mergedTexts[compactLen] = text + mergedWordLike[compactLen] = mergedWordLike[read]! + mergedKinds[compactLen] = mergedKinds[read]! + mergedStarts[compactLen] = mergedStarts[read]! + } + compactLen++ + } + mergedLen = compactLen } - mergedTexts.length = compactLen - mergedWordLike.length = compactLen - mergedKinds.length = compactLen - mergedStarts.length = compactLen + // Truncate builder arrays to final length — ownership transfers to the MergedSegmentation. + // The next reset() call will create fresh arrays for the builder. + mergedTexts.length = mergedLen + mergedWordLike.length = mergedLen + mergedKinds.length = mergedLen + mergedStarts.length = mergedLen - const compacted = mergeGlueConnectedTextRuns({ - len: compactLen, + const seg: MergedSegmentation = { + len: mergedLen, texts: mergedTexts, isWordLike: mergedWordLike, kinds: mergedKinds, starts: mergedStarts, - }) - const withMergedUrls = carryTrailingForwardStickyAcrossCJKBoundary( - mergeAsciiPunctuationChains( - splitHyphenatedNumericRuns(mergeNumericRuns(mergeUrlQueryRuns(mergeUrlLikeRuns(compacted)))), - ), - ) + } - for (let i = 0; i < withMergedUrls.len - 1; i++) { - const split = splitLeadingSpaceAndMarks(withMergedUrls.texts[i]!) - if (split === null) continue - if ( - (withMergedUrls.kinds[i] !== 'space' && withMergedUrls.kinds[i] !== 'preserved-space') || - withMergedUrls.kinds[i + 1] !== 'text' || - !containsArabicScript(withMergedUrls.texts[i + 1]!) - ) { - continue - } + // Skip post-merge passes that can't fire based on content-presence flags. + if (builder.hasGlue) mergeGlueConnectedTextRunsInPlace(seg) + if (hasUrlLikeContent) { + mergeUrlLikeRunsInPlace(seg) + mergeUrlQueryRunsInPlace(seg) + } + if (hasDigit) { + mergeNumericRunsInPlace(seg) + splitHyphenatedNumericRunsInPlace(seg) + } + if (hasAsciiChainJoiner) mergeAsciiPunctuationChainsInPlace(seg) + if (builder.hasCJK) carryTrailingForwardStickyAcrossCJKBoundaryInPlace(seg) + + if (builder.hasArabicSpace) { + for (let i = 0; i < seg.len - 1; i++) { + const split = splitLeadingSpaceAndMarks(seg.texts[i]!) + if (split === null) continue + if ( + (seg.kinds[i] !== 'space' && seg.kinds[i] !== 'preserved-space') || + seg.kinds[i + 1] !== 'text' || + !containsArabicScript(seg.texts[i + 1]!) + ) { + continue + } - withMergedUrls.texts[i] = split.space - withMergedUrls.isWordLike[i] = false - withMergedUrls.kinds[i] = withMergedUrls.kinds[i] === 'preserved-space' ? 'preserved-space' : 'space' - withMergedUrls.texts[i + 1] = split.marks + withMergedUrls.texts[i + 1]! - withMergedUrls.starts[i + 1] = withMergedUrls.starts[i]! + split.space.length + seg.texts[i] = split.space + seg.isWordLike[i] = false + seg.kinds[i] = seg.kinds[i] === 'preserved-space' ? 'preserved-space' : 'space' + seg.texts[i + 1] = split.marks + seg.texts[i + 1]! + seg.starts[i + 1] = seg.starts[i]! + split.space.length + } } - return withMergedUrls + return seg } function compileAnalysisChunks(segmentation: MergedSegmentation, whiteSpaceProfile: WhiteSpaceProfile): AnalysisChunk[] { @@ -1002,6 +1280,10 @@ export function analyzeText( return { normalized, chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile), - ...segmentation, + len: segmentation.len, + texts: segmentation.texts, + isWordLike: segmentation.isWordLike, + kinds: segmentation.kinds, + starts: segmentation.starts, } } diff --git a/src/bidi.ts b/src/bidi.ts index f530ff98..c61be161 100644 --- a/src/bidi.ts +++ b/src/bidi.ts @@ -4,170 +4,265 @@ // segments for custom rendering. The line-breaking engine does not consume // these levels. -type BidiType = 'L' | 'R' | 'AL' | 'AN' | 'EN' | 'ES' | 'ET' | 'CS' | - 'ON' | 'BN' | 'B' | 'S' | 'WS' | 'NSM' - -const baseTypes: BidiType[] = [ - 'BN','BN','BN','BN','BN','BN','BN','BN','BN','S','B','S','WS', - 'B','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN', - 'BN','BN','B','B','B','S','WS','ON','ON','ET','ET','ET','ON', - 'ON','ON','ON','ON','ON','CS','ON','CS','ON','EN','EN','EN', - 'EN','EN','EN','EN','EN','EN','EN','ON','ON','ON','ON','ON', - 'ON','ON','L','L','L','L','L','L','L','L','L','L','L','L','L', - 'L','L','L','L','L','L','L','L','L','L','L','L','L','ON','ON', - 'ON','ON','ON','ON','L','L','L','L','L','L','L','L','L','L', - 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L', - 'L','ON','ON','ON','ON','BN','BN','BN','BN','BN','BN','B','BN', - 'BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN', - 'BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN', - 'BN','CS','ON','ET','ET','ET','ET','ON','ON','ON','ON','L','ON', - 'ON','ON','ON','ON','ET','ET','EN','EN','ON','L','ON','ON','ON', - 'EN','L','ON','ON','ON','ON','ON','L','L','L','L','L','L','L', - 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L', - 'L','ON','L','L','L','L','L','L','L','L','L','L','L','L','L', - 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L', - 'L','L','L','ON','L','L','L','L','L','L','L','L' -] - -const arabicTypes: BidiType[] = [ - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'CS','AL','ON','ON','NSM','NSM','NSM','NSM','NSM','NSM','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','NSM','NSM','NSM','NSM','NSM','NSM','NSM', - 'NSM','NSM','NSM','NSM','NSM','NSM','NSM','AL','AL','AL','AL', - 'AL','AL','AL','AN','AN','AN','AN','AN','AN','AN','AN','AN', - 'AN','ET','AN','AN','AL','AL','AL','NSM','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM', - 'NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','ON','NSM', - 'NSM','NSM','NSM','AL','AL','AL','AL','AL','AL','AL','AL','AL', - 'AL','AL','AL','AL','AL','AL','AL','AL','AL' -] - -function classifyChar(charCode: number): BidiType { - if (charCode <= 0x00ff) return baseTypes[charCode]! - if (0x0590 <= charCode && charCode <= 0x05f4) return 'R' - if (0x0600 <= charCode && charCode <= 0x06ff) return arabicTypes[charCode & 0xff]! - if (0x0700 <= charCode && charCode <= 0x08AC) return 'AL' - return 'L' -} +// Numeric bidi type constants — avoids string comparisons in hot loops +const L = 0 +const R = 1 +const AL = 2 +const AN = 3 +const EN = 4 +const ES = 5 +const ET = 6 +const CS = 7 +const ON = 8 +const BN = 9 +const B = 10 +const S = 11 +const WS = 12 +const NSM = 13 + +const baseTypes = new Uint8Array([ + BN,BN,BN,BN,BN,BN,BN,BN,BN,S,B,S,WS, + B,BN,BN,BN,BN,BN,BN,BN,BN,BN,BN,BN,BN, + BN,BN,B,B,B,S,WS,ON,ON,ET,ET,ET,ON, + ON,ON,ON,ON,ON,CS,ON,CS,ON,EN,EN,EN, + EN,EN,EN,EN,EN,EN,EN,ON,ON,ON,ON,ON, + ON,ON,L,L,L,L,L,L,L,L,L,L,L,L,L, + L,L,L,L,L,L,L,L,L,L,L,L,L,ON,ON, + ON,ON,ON,ON,L,L,L,L,L,L,L,L,L,L, + L,L,L,L,L,L,L,L,L,L,L,L,L,L,L, + L,ON,ON,ON,ON,BN,BN,BN,BN,BN,BN,B,BN, + BN,BN,BN,BN,BN,BN,BN,BN,BN,BN,BN,BN, + BN,BN,BN,BN,BN,BN,BN,BN,BN,BN,BN,BN, + BN,CS,ON,ET,ET,ET,ET,ON,ON,ON,ON,L,ON, + ON,ON,ON,ON,ET,ET,EN,EN,ON,L,ON,ON,ON, + EN,L,ON,ON,ON,ON,ON,L,L,L,L,L,L,L, + L,L,L,L,L,L,L,L,L,L,L,L,L,L,L, + L,ON,L,L,L,L,L,L,L,L,L,L,L,L,L, + L,L,L,L,L,L,L,L,L,L,L,L,L,L,L, + L,L,L,ON,L,L,L,L,L,L,L,L +]) + +// Reusable buffer for bidi type classification. Grows as needed, avoiding +// repeated allocation + zero-init for every computeBidiTypes() call. +// Safe because computeBidiTypes() returns the buffer and its only caller +// (computeSegmentLevels) reads it synchronously before the next call. +let typeBuf = new Uint8Array(256) -function computeBidiLevels(str: string): Int8Array | null { +// Module-scope flag set by computeBidiTypes() and read by computeSegmentLevels(). +// When true, all segment levels are 1 (R) — the caller can skip N1 + level +// computation and just fill. Set when text is pure-R (Hebrew-only, no L, no +// weak types, no AL/NSM) so all neutrals resolve to R under sor=R embedding. +let allRLevels = false + +const arabicTypes = new Uint8Array([ + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + CS,AL,ON,ON,NSM,NSM,NSM,NSM,NSM,NSM,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,NSM,NSM,NSM,NSM,NSM,NSM,NSM, + NSM,NSM,NSM,NSM,NSM,NSM,NSM,AL,AL,AL,AL, + AL,AL,AL,AN,AN,AN,AN,AN,AN,AN,AN,AN, + AN,ET,AN,AN,AL,AL,AL,NSM,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,NSM,NSM,NSM,NSM,NSM,NSM,NSM,NSM,NSM,NSM, + NSM,NSM,NSM,NSM,NSM,NSM,NSM,NSM,NSM,ON,NSM, + NSM,NSM,NSM,AL,AL,AL,AL,AL,AL,AL,AL,AL, + AL,AL,AL,AL,AL,AL,AL,AL,AL +]) + +function computeBidiTypes(str: string): Uint8Array | null { const len = str.length + allRLevels = false if (len === 0) return null - // eslint-disable-next-line unicorn/no-new-array - const types: BidiType[] = new Array(len) - let numBidi = 0 + // Fast pre-scan: check if any bidi characters exist before allocating. + // Most text is LTR-only, so this avoids a Uint8Array allocation in the + // common case. Only chars >= 0x0590 can be R/AL/AN. + let hasBidi = false + for (let i = 0; i < len; i++) { + if (str.charCodeAt(i) >= 0x0590) { + hasBidi = true + break + } + } + + if (!hasBidi) return null + // Full classification pass (only reached when bidi chars are present) + // Reuse module-scope buffer to avoid allocation + zero-init per call. + if (typeBuf.length < len) typeBuf = new Uint8Array(len) + const types = typeBuf + let anyBidi = false + let hasWeak = false // EN/ET/ES/CS exist → W4-W7 needed + let hasALorNSM = false // AL or NSM exist → W1+W2+W3 needed for (let i = 0; i < len; i++) { - const t = classifyChar(str.charCodeAt(i)) - if (t === 'R' || t === 'AL' || t === 'AN') numBidi++ + const c = str.charCodeAt(i) + let t: number + if (c <= 0x00ff) { + t = baseTypes[c]! + if (!hasWeak && (t === EN || t === ET || t === ES || t === CS)) hasWeak = true + } + else if (0x0590 <= c && c <= 0x05f4) { + t = R + anyBidi = true + } + else if (0x0600 <= c && c <= 0x06ff) { + t = arabicTypes[c & 0xff]! + hasALorNSM = true + if (!anyBidi && (t === AL || t === AN)) anyBidi = true + } + else if (0x0700 <= c && c <= 0x08AC) { + t = AL + anyBidi = true + hasALorNSM = true + } + else t = L types[i] = t } - if (numBidi === 0) return null + if (!anyBidi) return null - const startLevel = (len / numBidi) < 0.3 ? 0 : 1 - const levels = new Int8Array(len) - for (let i = 0; i < len; i++) levels[i] = startLevel + // Pure-R fast path: when text has no weak types and no AL/NSM, + // check if any L exists. If not, only R and neutrals survive, and + // since sor=R, N1 resolves every neutral run to R. All segment + // levels are 1. This post-scan only runs for Hebrew-only candidates + // (no Arabic, no mixed), so it doesn't add overhead to Arabic/mixed. + if (!hasWeak && !hasALorNSM) { + let pureR = true + for (let i = 0; i < len; i++) { + if (types[i] === L) { pureR = false; break } + } + if (pureR) { + allRLevels = true + return types // return non-null so caller knows bidi exists + } + } - const e: BidiType = (startLevel & 1) ? 'R' : 'L' - const sor = e + // Paragraph direction heuristic: (len / numBidi) < 0.3 ? 0 : 1 + // Since numBidi <= len, len/numBidi >= 1 > 0.3 always → startLevel = 1. + // Embedding direction is always R for an RTL paragraph. + const e = R + const sor = R - // W1-W7 - let lastType: BidiType = sor - for (let i = 0; i < len; i++) { - if (types[i] === 'NSM') types[i] = lastType - else lastType = types[i]! - } - lastType = sor - for (let i = 0; i < len; i++) { - const t = types[i]! - if (t === 'EN') types[i] = lastType === 'AL' ? 'AN' : 'EN' - else if (t === 'R' || t === 'L' || t === 'AL') lastType = t - } - for (let i = 0; i < len; i++) { - if (types[i] === 'AL') types[i] = 'R' + // W1 + W2 + W3: resolve NSM, convert EN after AL, and AL→R. + // Skip entirely for pure Hebrew (no AL, no NSM, no weak types). + if (hasALorNSM || hasWeak) { + let w1Last = sor + let w2Last = sor + for (let i = 0; i < len; i++) { + let t = types[i]! + if (t === NSM) { + t = w1Last + types[i] = t + } + w1Last = t + if (t === EN) { + if (w2Last === AL) { + types[i] = AN + } + } else if (t === R || t === L || t === AL) { + if (t === AL) { + types[i] = R + } + w2Last = t + } + } } - for (let i = 1; i < len - 1; i++) { - if (types[i] === 'ES' && types[i - 1] === 'EN' && types[i + 1] === 'EN') { - types[i] = 'EN' + + // W4-W7: weak type resolution. Skip when no EN/ET/ES/CS exist. + if (hasWeak) { + // W4-W5: ES between EN→EN, CS between EN/AN matching. + // Use running prev to avoid repeated types[i-1] array reads. + let prev = types[0]! + for (let i = 1; i < len - 1; i++) { + const cur = types[i]! + const next = types[i + 1]! + if (cur === ES && prev === EN && next === EN) { + types[i] = EN + prev = EN + } else if ( + cur === CS && + (prev === EN || prev === AN) && + next === prev + ) { + types[i] = prev + // prev stays unchanged (already the resolved value) + } else { + prev = cur + } } - if ( - types[i] === 'CS' && - (types[i - 1] === 'EN' || types[i - 1] === 'AN') && - types[i + 1] === types[i - 1] - ) { - types[i] = types[i - 1]! + + // W5: ET adjacent to EN → EN + for (let i = 0; i < len; i++) { + if (types[i] !== EN) continue + let j + for (j = i - 1; j >= 0 && types[j] === ET; j--) types[j] = EN + for (j = i + 1; j < len && types[j] === ET; j++) types[j] = EN } - } - for (let i = 0; i < len; i++) { - if (types[i] !== 'EN') continue - let j - for (j = i - 1; j >= 0 && types[j] === 'ET'; j--) types[j] = 'EN' - for (j = i + 1; j < len && types[j] === 'ET'; j++) types[j] = 'EN' - } - for (let i = 0; i < len; i++) { - const t = types[i]! - if (t === 'WS' || t === 'ES' || t === 'ET' || t === 'CS') types[i] = 'ON' - } - lastType = sor - for (let i = 0; i < len; i++) { - const t = types[i]! - if (t === 'EN') types[i] = lastType === 'L' ? 'L' : 'EN' - else if (t === 'R' || t === 'L') lastType = t - } - // N1-N2 - for (let i = 0; i < len; i++) { - if (types[i] !== 'ON') continue - let end = i + 1 - while (end < len && types[end] === 'ON') end++ - const before: BidiType = i > 0 ? types[i - 1]! : sor - const after: BidiType = end < len ? types[end]! : sor - const bDir: BidiType = before !== 'L' ? 'R' : 'L' - const aDir: BidiType = after !== 'L' ? 'R' : 'L' - if (bDir === aDir) { - for (let j = i; j < end; j++) types[j] = bDir + // W6 + W7 merged: neutralize weak types and resolve EN after L + let w7Last = sor // W7: tracks previous strong type (R/L only) + for (let i = 0; i < len; i++) { + let t = types[i]! + // W6: remaining weak types → ON + if (t === WS || t === ES || t === ET || t === CS) { + types[i] = ON + } else if (t === EN) { + // W7: EN after L → L + types[i] = w7Last === L ? L : EN + } else if (t === R || t === L) { + w7Last = t + } } - i = end - 1 - } - for (let i = 0; i < len; i++) { - if (types[i] === 'ON') types[i] = e } - // I1-I2 + // N1: resolve neutral (ON/WS) runs based on surrounding strong types. + // WS is treated as neutral here so we can skip the separate WS→ON + // conversion pass in the no-weak-types branch above. for (let i = 0; i < len; i++) { - const t = types[i]! - if ((levels[i]! & 1) === 0) { - if (t === 'R') levels[i]!++ - else if (t === 'AN' || t === 'EN') levels[i]! += 2 - } else if (t === 'L' || t === 'AN' || t === 'EN') { - levels[i]!++ - } + const ti = types[i]! + if (ti !== ON && ti !== WS) continue + let end = i + 1 + while (end < len && (types[end] === ON || types[end] === WS)) end++ + const before = i > 0 ? types[i - 1]! : sor + const after = end < len ? types[end]! : sor + const bDir = before !== L ? R : L + const aDir = after !== L ? R : L + // N1: if directions agree, use that direction; N2: otherwise use embedding + const resolved = bDir === aDir ? bDir : e + for (let j = i; j < end; j++) types[j] = resolved + i = end - 1 } - return levels + return types } export function computeSegmentLevels(normalized: string, segStarts: number[]): Int8Array | null { - const bidiLevels = computeBidiLevels(normalized) - if (bidiLevels === null) return null + const resolvedTypes = computeBidiTypes(normalized) + if (resolvedTypes === null) return null + + // Pure-R fast path: all levels are 1 (no N1, no per-segment type lookup) + if (allRLevels) { + const segLevels = new Int8Array(segStarts.length) + segLevels.fill(1) + return segLevels + } + // I1-I2 levels at segment-start positions only. + // startLevel is always 1 (odd/RTL). After all W+N rules resolve, + // only L/R/AN/EN survive. R→1 (odd stays odd); L/AN/EN→2 (bump to even). const segLevels = new Int8Array(segStarts.length) for (let i = 0; i < segStarts.length; i++) { - segLevels[i] = bidiLevels[segStarts[i]!]! + segLevels[i] = resolvedTypes[segStarts[i]!]! === R ? 1 : 2 } return segLevels } diff --git a/src/line-break.ts b/src/line-break.ts index 57fa1131..b43308fa 100644 --- a/src/line-break.ts +++ b/src/line-break.ts @@ -1,5 +1,5 @@ -import type { SegmentBreakKind } from './analysis.js' -import { getEngineProfile } from './measurement.js' +import type { SegmentBreakKind } from './analysis.ts' +import { getEngineProfile } from './measurement.ts' export type LineBreakCursor = { segmentIndex: number @@ -32,18 +32,11 @@ export type InternalLayoutLine = { } function canBreakAfter(kind: SegmentBreakKind): boolean { - return ( - kind === 'space' || - kind === 'preserved-space' || - kind === 'tab' || - kind === 'zero-width-break' || - kind === 'soft-hyphen' - ) + // Negative check: 3 comparisons instead of 5. + // 'text' is the most common kind, so it short-circuits first. + return kind !== 'text' && kind !== 'glue' && kind !== 'hard-break' } -function isSimpleCollapsibleSpace(kind: SegmentBreakKind): boolean { - return kind === 'space' -} function getTabAdvance(lineWidth: number, tabStopAdvance: number): number { if (tabStopAdvance <= 0) return 0 @@ -68,8 +61,7 @@ function getBreakableAdvance( function fitSoftHyphenBreak( graphemeWidths: number[], initialWidth: number, - maxWidth: number, - lineFitEpsilon: number, + effectiveMaxWidth: number, discretionaryHyphenWidth: number, cumulativeWidths: boolean, ): { fitCount: number, fittedWidth: number } { @@ -83,7 +75,7 @@ function fitSoftHyphenBreak( const nextLineWidth = fitCount + 1 < graphemeWidths.length ? nextWidth + discretionaryHyphenWidth : nextWidth - if (nextLineWidth > maxWidth + lineFitEpsilon) break + if (nextLineWidth > effectiveMaxWidth) break fittedWidth = nextWidth fitCount++ } @@ -137,31 +129,85 @@ export function countPreparedLines(prepared: PreparedLineBreakData, maxWidth: nu return walkPreparedLines(prepared, maxWidth) } -function countPreparedLinesSimple(prepared: PreparedLineBreakData, maxWidth: number): number { - const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared - if (widths.length === 0) return 0 +// Separate from SimpleLineEngine to keep the layout() resize hot path lean: +// SimpleLineCounter carries only 3 state fields vs SimpleLineEngine's 12+. +class SimpleLineCounter { + private lineCount = 0 + private lineW = 0 + private hasContent = false + + constructor( + private readonly p: PreparedLineBreakData, + private readonly maxWidth: number, + private readonly effectiveMaxWidth: number, + private readonly preferPrefixWidths: boolean, + ) {} + + run(): number { + const { widths, kinds } = this.p + if (widths.length === 0) return 0 + + // Cache this.* in locals for the tight inner loop + let lineW = 0 + let lineCount = 0 + let hasContent = false + const effectiveMaxWidth = this.effectiveMaxWidth + + for (let i = 0; i < widths.length; i++) { + const w = widths[i]! + const kind = kinds[i]! - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon + if (!hasContent) { + // Sync state for placeOnFreshLine + this.lineW = lineW + this.lineCount = lineCount + this.hasContent = hasContent + this.placeOnFreshLine(i) + // Sync back + lineW = this.lineW + lineCount = this.lineCount + hasContent = this.hasContent + continue + } - let lineCount = 0 - let lineW = 0 - let hasContent = false + const newW = lineW + w + if (newW > effectiveMaxWidth) { + if (kind === 'space') continue + lineW = 0 + hasContent = false + // Sync state for placeOnFreshLine + this.lineW = lineW + this.lineCount = lineCount + this.hasContent = hasContent + this.placeOnFreshLine(i) + // Sync back + lineW = this.lineW + lineCount = this.lineCount + hasContent = this.hasContent + continue + } - function placeOnFreshLine(segmentIndex: number): void { + lineW = newW + } + + if (!hasContent) return lineCount + 1 + return lineCount + } + + private placeOnFreshLine(segmentIndex: number): void { + const { widths, breakableWidths, breakablePrefixWidths } = this.p const w = widths[segmentIndex]! + const maxWidth = this.maxWidth if (w > maxWidth && breakableWidths[segmentIndex] !== null) { const gWidths = breakableWidths[segmentIndex]! const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - lineW = 0 + const effectiveMaxWidth = this.effectiveMaxWidth + const preferPrefixWidths = this.preferPrefixWidths + let lineW = 0 + let lineCount = this.lineCount for (let g = 0; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) - if (lineW > 0 && lineW + gw > maxWidth + lineFitEpsilon) { + const gw = getBreakableAdvance(gWidths, gPrefixWidths, g, preferPrefixWidths) + if (lineW > 0 && lineW + gw > effectiveMaxWidth) { lineCount++ lineW = gw } else { @@ -169,658 +215,720 @@ function countPreparedLinesSimple(prepared: PreparedLineBreakData, maxWidth: num lineW += gw } } + this.lineW = lineW + this.lineCount = lineCount } else { - lineW = w - lineCount++ - } - hasContent = true - } - - for (let i = 0; i < widths.length; i++) { - const w = widths[i]! - const kind = kinds[i]! - - if (!hasContent) { - placeOnFreshLine(i) - continue - } - - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - if (isSimpleCollapsibleSpace(kind)) continue - lineW = 0 - hasContent = false - placeOnFreshLine(i) - continue + this.lineW = w + this.lineCount++ } - - lineW = newW + this.hasContent = true } - - if (!hasContent) return lineCount + 1 - return lineCount } -function walkPreparedLinesSimple( - prepared: PreparedLineBreakData, - maxWidth: number, - onLine?: (line: InternalLayoutLine) => void, -): number { - const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared - if (widths.length === 0) return 0 - - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineCount = 0 - let lineW = 0 - let hasContent = false - let lineStartSegmentIndex = 0 - let lineStartGraphemeIndex = 0 - let lineEndSegmentIndex = 0 - let lineEndGraphemeIndex = 0 - let pendingBreakSegmentIndex = -1 - let pendingBreakPaintWidth = 0 - - function clearPendingBreak(): void { - pendingBreakSegmentIndex = -1 - pendingBreakPaintWidth = 0 - } - - function emitCurrentLine( - endSegmentIndex = lineEndSegmentIndex, - endGraphemeIndex = lineEndGraphemeIndex, - width = lineW, - ): void { - lineCount++ - onLine?.({ - startSegmentIndex: lineStartSegmentIndex, - startGraphemeIndex: lineStartGraphemeIndex, - endSegmentIndex, - endGraphemeIndex, - width, - }) - lineW = 0 - hasContent = false - clearPendingBreak() - } - - function startLineAtSegment(segmentIndex: number, width: number): void { - hasContent = true - lineStartSegmentIndex = segmentIndex - lineStartGraphemeIndex = 0 - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - lineW = width - } +function countPreparedLinesSimple(prepared: PreparedLineBreakData, maxWidth: number): number { + const ep = getEngineProfile() + return new SimpleLineCounter( + prepared, maxWidth, maxWidth + ep.lineFitEpsilon, ep.preferPrefixWidthsForBreakableRuns, + ).run() +} - function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { - hasContent = true - lineStartSegmentIndex = segmentIndex - lineStartGraphemeIndex = graphemeIndex - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = graphemeIndex + 1 - lineW = width - } +class SimpleLineEngine { + // Per-run state + private lineCount = 0 + private lineW = 0 + private hasContent = false + private lineStartSegmentIndex = 0 + private lineStartGraphemeIndex = 0 + private lineEndSegmentIndex = 0 + private lineEndGraphemeIndex = 0 + private pendingBreakSegmentIndex = -1 + private pendingBreakPaintWidth = 0 + // Step mode: first completed line captured here + private stepping = false + private result: InternalLayoutLine | null = null + + constructor( + private readonly p: PreparedLineBreakData, + private readonly maxWidth: number, + private readonly effectiveMaxWidth: number, + private readonly preferPrefixWidths: boolean, + private readonly onLine: ((line: InternalLayoutLine) => void) | undefined, + ) {} + + walkAll(): number { + const { widths, kinds, breakableWidths } = this.p + if (widths.length === 0) return 0 + + const maxWidth = this.maxWidth + const effectiveMaxWidth = this.effectiveMaxWidth + + let i = 0 + while (i < widths.length) { + const w = widths[i]! + const kind = kinds[i]! - function appendWholeSegment(segmentIndex: number, width: number): void { - if (!hasContent) { - startLineAtSegment(segmentIndex, width) - return - } - lineW += width - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } + if (!this.hasContent) { + if (w > maxWidth && breakableWidths[i] !== null) { + this.appendBreakableSegmentFrom(i, 0) + } else { + this.startLineAtSegment(i, w) + } + this.updatePendingBreak(i, w) + i++ + continue + } - function updatePendingBreak(segmentIndex: number, segmentWidth: number): void { - if (!canBreakAfter(kinds[segmentIndex]!)) return - pendingBreakSegmentIndex = segmentIndex + 1 - pendingBreakPaintWidth = lineW - segmentWidth - } + const newW = this.lineW + w + if (newW > effectiveMaxWidth) { + if (canBreakAfter(kind)) { + this.appendWholeSegment(i, w) + this.emitCurrentLine(i + 1, 0, this.lineW - w) + i++ + continue + } - function appendBreakableSegment(segmentIndex: number): void { - appendBreakableSegmentFrom(segmentIndex, 0) - } + if (this.pendingBreakSegmentIndex >= 0) { + this.emitCurrentLine(this.pendingBreakSegmentIndex, 0, this.pendingBreakPaintWidth) + continue + } - function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void { - const gWidths = breakableWidths[segmentIndex]! - const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - for (let g = startGraphemeIndex; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) + if (w > maxWidth && breakableWidths[i] !== null) { + this.emitCurrentLine() + this.appendBreakableSegmentFrom(i, 0) + i++ + continue + } - if (!hasContent) { - startLineAtGrapheme(segmentIndex, g, gw) + this.emitCurrentLine() continue } - if (lineW + gw > maxWidth + lineFitEpsilon) { - emitCurrentLine() - startLineAtGrapheme(segmentIndex, g, gw) - } else { - lineW += gw - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = g + 1 - } + this.appendWholeSegment(i, w) + this.updatePendingBreak(i, w) + i++ } - if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } + if (this.hasContent) this.emitCurrentLine() + return this.lineCount } - let i = 0 - while (i < widths.length) { - const w = widths[i]! - const kind = kinds[i]! + stepOne(normalizedStart: LineBreakCursor): InternalLayoutLine | null { + const { widths, kinds, breakableWidths } = this.p + const maxWidth = this.maxWidth + const effectiveMaxWidth = this.effectiveMaxWidth - if (!hasContent) { - if (w > maxWidth && breakableWidths[i] !== null) { - appendBreakableSegment(i) - } else { - startLineAtSegment(i, w) - } - updatePendingBreak(i, w) - i++ - continue - } + this.stepping = true + this.lineStartSegmentIndex = normalizedStart.segmentIndex + this.lineStartGraphemeIndex = normalizedStart.graphemeIndex + this.lineEndSegmentIndex = normalizedStart.segmentIndex + this.lineEndGraphemeIndex = normalizedStart.graphemeIndex - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - if (canBreakAfter(kind)) { - appendWholeSegment(i, w) - emitCurrentLine(i + 1, 0, lineW - w) - i++ + for (let i = normalizedStart.segmentIndex; i < widths.length; i++) { + const w = widths[i]! + const kind = kinds[i]! + const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0 + + if (!this.hasContent) { + if (startGraphemeIndex > 0) { + this.appendBreakableSegmentFrom(i, startGraphemeIndex) + if (this.result !== null) return this.result + } else if (w > maxWidth && breakableWidths[i] !== null) { + this.appendBreakableSegmentFrom(i, 0) + if (this.result !== null) return this.result + } else { + this.startLineAtSegment(i, w) + } + this.updatePendingBreak(i, w) continue } - if (pendingBreakSegmentIndex >= 0) { - emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - continue - } + const newW = this.lineW + w + if (newW > effectiveMaxWidth) { + if (canBreakAfter(kind)) { + this.appendWholeSegment(i, w) + return this.finishLine(i + 1, 0, this.lineW - w) + } - if (w > maxWidth && breakableWidths[i] !== null) { - emitCurrentLine() - appendBreakableSegment(i) - i++ - continue - } + if (this.pendingBreakSegmentIndex >= 0) { + return this.finishLine(this.pendingBreakSegmentIndex, 0, this.pendingBreakPaintWidth) + } - emitCurrentLine() - continue - } + if (w > maxWidth && breakableWidths[i] !== null) { + const currentLine = this.finishLine() + if (currentLine !== null) return currentLine + this.appendBreakableSegmentFrom(i, 0) + if (this.result !== null) return this.result + } - appendWholeSegment(i, w) - updatePendingBreak(i, w) - i++ - } + return this.finishLine() + } - if (hasContent) emitCurrentLine() - return lineCount -} + this.appendWholeSegment(i, w) + this.updatePendingBreak(i, w) + } -export function walkPreparedLines( - prepared: PreparedLineBreakData, - maxWidth: number, - onLine?: (line: InternalLayoutLine) => void, -): number { - if (prepared.simpleLineWalkFastPath) { - return walkPreparedLinesSimple(prepared, maxWidth, onLine) + return this.finishLine() } - const { - widths, - lineEndFitAdvances, - lineEndPaintAdvances, - kinds, - breakableWidths, - breakablePrefixWidths, - discretionaryHyphenWidth, - tabStopAdvance, - chunks, - } = prepared - if (widths.length === 0 || chunks.length === 0) return 0 - - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineCount = 0 - let lineW = 0 - let hasContent = false - let lineStartSegmentIndex = 0 - let lineStartGraphemeIndex = 0 - let lineEndSegmentIndex = 0 - let lineEndGraphemeIndex = 0 - let pendingBreakSegmentIndex = -1 - let pendingBreakFitWidth = 0 - let pendingBreakPaintWidth = 0 - let pendingBreakKind: SegmentBreakKind | null = null - - function clearPendingBreak(): void { - pendingBreakSegmentIndex = -1 - pendingBreakFitWidth = 0 - pendingBreakPaintWidth = 0 - pendingBreakKind = null + private clearPendingBreak(): void { + this.pendingBreakSegmentIndex = -1 + this.pendingBreakPaintWidth = 0 } - function emitCurrentLine( - endSegmentIndex = lineEndSegmentIndex, - endGraphemeIndex = lineEndGraphemeIndex, - width = lineW, + private emitCurrentLine( + endSegmentIndex = this.lineEndSegmentIndex, + endGraphemeIndex = this.lineEndGraphemeIndex, + width = this.lineW, ): void { - lineCount++ - onLine?.({ - startSegmentIndex: lineStartSegmentIndex, - startGraphemeIndex: lineStartGraphemeIndex, + this.lineCount++ + this.onLine?.({ + startSegmentIndex: this.lineStartSegmentIndex, + startGraphemeIndex: this.lineStartGraphemeIndex, endSegmentIndex, endGraphemeIndex, width, }) - lineW = 0 - hasContent = false - clearPendingBreak() + this.lineW = 0 + this.hasContent = false + this.clearPendingBreak() + } + + private finishLine( + endSegmentIndex = this.lineEndSegmentIndex, + endGraphemeIndex = this.lineEndGraphemeIndex, + width = this.lineW, + ): InternalLayoutLine | null { + if (!this.hasContent) return null + return { + startSegmentIndex: this.lineStartSegmentIndex, + startGraphemeIndex: this.lineStartGraphemeIndex, + endSegmentIndex, + endGraphemeIndex, + width, + } } - function startLineAtSegment(segmentIndex: number, width: number): void { - hasContent = true - lineStartSegmentIndex = segmentIndex - lineStartGraphemeIndex = 0 - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - lineW = width + private startLineAtSegment(segmentIndex: number, width: number): void { + this.hasContent = true + this.lineStartSegmentIndex = segmentIndex + this.lineStartGraphemeIndex = 0 + this.lineEndSegmentIndex = segmentIndex + 1 + this.lineEndGraphemeIndex = 0 + this.lineW = width } - function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { - hasContent = true - lineStartSegmentIndex = segmentIndex - lineStartGraphemeIndex = graphemeIndex - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = graphemeIndex + 1 - lineW = width + private startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { + this.hasContent = true + this.lineStartSegmentIndex = segmentIndex + this.lineStartGraphemeIndex = graphemeIndex + this.lineEndSegmentIndex = segmentIndex + this.lineEndGraphemeIndex = graphemeIndex + 1 + this.lineW = width } - function appendWholeSegment(segmentIndex: number, width: number): void { - if (!hasContent) { - startLineAtSegment(segmentIndex, width) + private appendWholeSegment(segmentIndex: number, width: number): void { + if (!this.hasContent) { + this.startLineAtSegment(segmentIndex, width) return } - lineW += width - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - - function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void { - if (!canBreakAfter(kinds[segmentIndex]!)) return - const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]! - const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]! - pendingBreakSegmentIndex = segmentIndex + 1 - pendingBreakFitWidth = lineW - segmentWidth + fitAdvance - pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance - pendingBreakKind = kinds[segmentIndex]! + this.lineW += width + this.lineEndSegmentIndex = segmentIndex + 1 + this.lineEndGraphemeIndex = 0 } - function appendBreakableSegment(segmentIndex: number): void { - appendBreakableSegmentFrom(segmentIndex, 0) + private updatePendingBreak(segmentIndex: number, segmentWidth: number): void { + if (!canBreakAfter(this.p.kinds[segmentIndex]!)) return + this.pendingBreakSegmentIndex = segmentIndex + 1 + this.pendingBreakPaintWidth = this.lineW - segmentWidth } - function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void { + private appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIdx: number): void { + const { breakableWidths, breakablePrefixWidths } = this.p const gWidths = breakableWidths[segmentIndex]! const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - for (let g = startGraphemeIndex; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) + const effectiveMaxWidth = this.effectiveMaxWidth + const preferPrefixWidths = this.preferPrefixWidths - if (!hasContent) { - startLineAtGrapheme(segmentIndex, g, gw) + for (let g = startGraphemeIdx; g < gWidths.length; g++) { + const gw = getBreakableAdvance(gWidths, gPrefixWidths, g, preferPrefixWidths) + + if (!this.hasContent) { + this.startLineAtGrapheme(segmentIndex, g, gw) continue } - if (lineW + gw > maxWidth + lineFitEpsilon) { - emitCurrentLine() - startLineAtGrapheme(segmentIndex, g, gw) + if (this.lineW + gw > effectiveMaxWidth) { + if (!this.stepping) { + // Walk mode: emit and continue + this.emitCurrentLine() + this.startLineAtGrapheme(segmentIndex, g, gw) + } else { + // Step mode: capture result and bail + this.result = this.finishLine() + return + } } else { - lineW += gw - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = g + 1 + this.lineW += gw + this.lineEndSegmentIndex = segmentIndex + this.lineEndGraphemeIndex = g + 1 } } - if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 + if (this.hasContent && this.lineEndSegmentIndex === segmentIndex && this.lineEndGraphemeIndex === gWidths.length) { + this.lineEndSegmentIndex = segmentIndex + 1 + this.lineEndGraphemeIndex = 0 } } +} - function continueSoftHyphenBreakableSegment(segmentIndex: number): boolean { - if (pendingBreakKind !== 'soft-hyphen') return false - const gWidths = breakableWidths[segmentIndex]! - if (gWidths === null) return false - const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns - ? breakablePrefixWidths[segmentIndex] ?? gWidths - : gWidths - const usesPrefixWidths = fitWidths !== gWidths - const { fitCount, fittedWidth } = fitSoftHyphenBreak( - fitWidths, - lineW, - maxWidth, - lineFitEpsilon, +function walkPreparedLinesSimple( + prepared: PreparedLineBreakData, + maxWidth: number, + onLine?: (line: InternalLayoutLine) => void, +): number { + const ep = getEngineProfile() + return new SimpleLineEngine( + prepared, maxWidth, maxWidth + ep.lineFitEpsilon, ep.preferPrefixWidthsForBreakableRuns, onLine, + ).walkAll() +} + +class FullLineEngine { + // Per-run state + private lineCount = 0 + private lineW = 0 + private hasContent = false + private lineStartSegmentIndex = 0 + private lineStartGraphemeIndex = 0 + private lineEndSegmentIndex = 0 + private lineEndGraphemeIndex = 0 + private pendingBreakSegmentIndex = -1 + private pendingBreakFitWidth = 0 + private pendingBreakPaintWidth = 0 + private pendingBreakKind: SegmentBreakKind | null = null + // Step mode: first completed line captured here + private stepping = false + private result: InternalLayoutLine | null = null + + constructor( + private readonly p: PreparedLineBreakData, + private readonly maxWidth: number, + private readonly effectiveMaxWidth: number, + private readonly preferPrefixWidths: boolean, + private readonly preferEarlySoftHyphenBreak: boolean, + private readonly onLine: ((line: InternalLayoutLine) => void) | undefined, + ) {} + + walkAll(): number { + const { + widths, + lineEndFitAdvances, + lineEndPaintAdvances, + kinds, + breakableWidths, discretionaryHyphenWidth, - usesPrefixWidths, - ) - if (fitCount === 0) return false + tabStopAdvance, + chunks, + } = this.p + if (widths.length === 0 || chunks.length === 0) return 0 + + const maxWidth = this.maxWidth + const effectiveMaxWidth = this.effectiveMaxWidth + + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { + const chunk = chunks[chunkIndex]! + if (chunk.startSegmentIndex === chunk.endSegmentIndex) { + this.emitEmptyChunk(chunk) + continue + } - lineW = fittedWidth - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = fitCount - clearPendingBreak() + this.hasContent = false + this.lineW = 0 + this.clearPendingBreak() + + let i = chunk.startSegmentIndex + while (i < chunk.endSegmentIndex) { + const kind = kinds[i]! + const w = kind === 'tab' ? getTabAdvance(this.lineW, tabStopAdvance) : widths[i]! + + if (kind === 'soft-hyphen') { + if (this.hasContent) { + this.lineEndSegmentIndex = i + 1 + this.lineEndGraphemeIndex = 0 + this.pendingBreakSegmentIndex = i + 1 + this.pendingBreakFitWidth = this.lineW + discretionaryHyphenWidth + this.pendingBreakPaintWidth = this.lineW + discretionaryHyphenWidth + this.pendingBreakKind = kind + } + i++ + continue + } - if (fitCount === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - return true + if (!this.hasContent) { + if (w > maxWidth && breakableWidths[i] !== null) { + this.appendBreakableSegmentFrom(i, 0) + } else { + this.startLineAtSegment(i, w) + } + this.updatePendingBreakForWholeSegment(i, w) + i++ + continue + } + + const newW = this.lineW + w + if (newW > effectiveMaxWidth) { + const currentBreakFitWidth = this.lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!) + const currentBreakPaintWidth = this.lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!) + + if ( + this.pendingBreakKind === 'soft-hyphen' && + this.preferEarlySoftHyphenBreak && + this.pendingBreakFitWidth <= effectiveMaxWidth + ) { + this.emitCurrentLine(this.pendingBreakSegmentIndex, 0, this.pendingBreakPaintWidth) + continue + } + + if (this.pendingBreakKind === 'soft-hyphen' && this.continueSoftHyphenBreakableSegment(i)) { + i++ + continue + } + + if (canBreakAfter(kind) && currentBreakFitWidth <= effectiveMaxWidth) { + this.appendWholeSegment(i, w) + this.emitCurrentLine(i + 1, 0, currentBreakPaintWidth) + i++ + continue + } + + if (this.pendingBreakSegmentIndex >= 0 && this.pendingBreakFitWidth <= effectiveMaxWidth) { + this.emitCurrentLine(this.pendingBreakSegmentIndex, 0, this.pendingBreakPaintWidth) + continue + } + + if (w > maxWidth && breakableWidths[i] !== null) { + this.emitCurrentLine() + this.appendBreakableSegmentFrom(i, 0) + i++ + continue + } + + this.emitCurrentLine() + continue + } + + this.appendWholeSegment(i, w) + this.updatePendingBreakForWholeSegment(i, w) + i++ + } + + if (this.hasContent) { + const finalPaintWidth = + this.pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex + ? this.pendingBreakPaintWidth + : this.lineW + this.emitCurrentLine(chunk.consumedEndSegmentIndex, 0, finalPaintWidth) + } } - emitCurrentLine( - segmentIndex, - fitCount, - fittedWidth + discretionaryHyphenWidth, - ) - appendBreakableSegmentFrom(segmentIndex, fitCount) - return true + return this.lineCount } - function emitEmptyChunk(chunk: { startSegmentIndex: number, consumedEndSegmentIndex: number }): void { - lineCount++ - onLine?.({ - startSegmentIndex: chunk.startSegmentIndex, - startGraphemeIndex: 0, - endSegmentIndex: chunk.consumedEndSegmentIndex, - endGraphemeIndex: 0, - width: 0, - }) - clearPendingBreak() - } + stepOne(normalizedStart: LineBreakCursor): InternalLayoutLine | null { + const chunkIndex = findChunkIndexForStart(this.p, normalizedStart.segmentIndex) + if (chunkIndex < 0) return null - for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { - const chunk = chunks[chunkIndex]! + const chunk = this.p.chunks[chunkIndex]! if (chunk.startSegmentIndex === chunk.endSegmentIndex) { - emitEmptyChunk(chunk) - continue + return { + startSegmentIndex: chunk.startSegmentIndex, + startGraphemeIndex: 0, + endSegmentIndex: chunk.consumedEndSegmentIndex, + endGraphemeIndex: 0, + width: 0, + } } - hasContent = false - lineW = 0 - lineStartSegmentIndex = chunk.startSegmentIndex - lineStartGraphemeIndex = 0 - lineEndSegmentIndex = chunk.startSegmentIndex - lineEndGraphemeIndex = 0 - clearPendingBreak() - - let i = chunk.startSegmentIndex - while (i < chunk.endSegmentIndex) { + const { + widths, + lineEndFitAdvances, + lineEndPaintAdvances, + kinds, + breakableWidths, + discretionaryHyphenWidth, + tabStopAdvance, + } = this.p + const maxWidth = this.maxWidth + const effectiveMaxWidth = this.effectiveMaxWidth + + this.stepping = true + this.lineW = 0 + this.hasContent = false + this.lineStartSegmentIndex = normalizedStart.segmentIndex + this.lineStartGraphemeIndex = normalizedStart.graphemeIndex + this.lineEndSegmentIndex = normalizedStart.segmentIndex + this.lineEndGraphemeIndex = normalizedStart.graphemeIndex + this.clearPendingBreak() + + for (let i = normalizedStart.segmentIndex; i < chunk.endSegmentIndex; i++) { const kind = kinds[i]! - const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]! - - if (kind === 'soft-hyphen') { - if (hasContent) { - lineEndSegmentIndex = i + 1 - lineEndGraphemeIndex = 0 - pendingBreakSegmentIndex = i + 1 - pendingBreakFitWidth = lineW + discretionaryHyphenWidth - pendingBreakPaintWidth = lineW + discretionaryHyphenWidth - pendingBreakKind = kind + const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0 + const w = kind === 'tab' ? getTabAdvance(this.lineW, tabStopAdvance) : widths[i]! + + if (kind === 'soft-hyphen' && startGraphemeIndex === 0) { + if (this.hasContent) { + this.lineEndSegmentIndex = i + 1 + this.lineEndGraphemeIndex = 0 + this.pendingBreakSegmentIndex = i + 1 + this.pendingBreakFitWidth = this.lineW + discretionaryHyphenWidth + this.pendingBreakPaintWidth = this.lineW + discretionaryHyphenWidth + this.pendingBreakKind = kind } - i++ continue } - if (!hasContent) { - if (w > maxWidth && breakableWidths[i] !== null) { - appendBreakableSegment(i) + if (!this.hasContent) { + if (startGraphemeIndex > 0) { + this.appendBreakableSegmentFrom(i, startGraphemeIndex) + if (this.result !== null) return this.result + } else if (w > maxWidth && breakableWidths[i] !== null) { + this.appendBreakableSegmentFrom(i, 0) + if (this.result !== null) return this.result } else { - startLineAtSegment(i, w) + this.startLineAtSegment(i, w) } - updatePendingBreakForWholeSegment(i, w) - i++ + this.updatePendingBreakForWholeSegment(i, w) continue } - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!) - const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!) + const newW = this.lineW + w + if (newW > effectiveMaxWidth) { + const currentBreakFitWidth = this.lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!) + const currentBreakPaintWidth = this.lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!) if ( - pendingBreakKind === 'soft-hyphen' && - engineProfile.preferEarlySoftHyphenBreak && - pendingBreakFitWidth <= maxWidth + lineFitEpsilon + this.pendingBreakKind === 'soft-hyphen' && + this.preferEarlySoftHyphenBreak && + this.pendingBreakFitWidth <= effectiveMaxWidth ) { - emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - continue + return this.finishLine(this.pendingBreakSegmentIndex, 0, this.pendingBreakPaintWidth) } - if (pendingBreakKind === 'soft-hyphen' && continueSoftHyphenBreakableSegment(i)) { - i++ - continue - } + const softBreakLine = this.maybeFinishAtSoftHyphen(i) + if (softBreakLine !== null) return softBreakLine - if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) { - appendWholeSegment(i, w) - emitCurrentLine(i + 1, 0, currentBreakPaintWidth) - i++ - continue + if (canBreakAfter(kind) && currentBreakFitWidth <= effectiveMaxWidth) { + this.appendWholeSegment(i, w) + return this.finishLine(i + 1, 0, currentBreakPaintWidth) } - if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { - emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - continue + if (this.pendingBreakSegmentIndex >= 0 && this.pendingBreakFitWidth <= effectiveMaxWidth) { + return this.finishLine(this.pendingBreakSegmentIndex, 0, this.pendingBreakPaintWidth) } if (w > maxWidth && breakableWidths[i] !== null) { - emitCurrentLine() - appendBreakableSegment(i) - i++ - continue + const currentLine = this.finishLine() + if (currentLine !== null) return currentLine + this.appendBreakableSegmentFrom(i, 0) + if (this.result !== null) return this.result } - emitCurrentLine() - continue + return this.finishLine() } - appendWholeSegment(i, w) - updatePendingBreakForWholeSegment(i, w) - i++ + this.appendWholeSegment(i, w) + this.updatePendingBreakForWholeSegment(i, w) } - if (hasContent) { - const finalPaintWidth = - pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex - ? pendingBreakPaintWidth - : lineW - emitCurrentLine(chunk.consumedEndSegmentIndex, 0, finalPaintWidth) + if (this.pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex && this.lineEndGraphemeIndex === 0) { + return this.finishLine(chunk.consumedEndSegmentIndex, 0, this.pendingBreakPaintWidth) } - } - - return lineCount -} - -export function layoutNextLineRange( - prepared: PreparedLineBreakData, - start: LineBreakCursor, - maxWidth: number, -): InternalLayoutLine | null { - const normalizedStart = normalizeLineStart(prepared, start) - if (normalizedStart === null) return null - if (prepared.simpleLineWalkFastPath) { - return layoutNextLineRangeSimple(prepared, normalizedStart, maxWidth) + return this.finishLine(chunk.consumedEndSegmentIndex, 0, this.lineW) } - const chunkIndex = findChunkIndexForStart(prepared, normalizedStart.segmentIndex) - if (chunkIndex < 0) return null - - const chunk = prepared.chunks[chunkIndex]! - if (chunk.startSegmentIndex === chunk.endSegmentIndex) { - return { - startSegmentIndex: chunk.startSegmentIndex, - startGraphemeIndex: 0, - endSegmentIndex: chunk.consumedEndSegmentIndex, - endGraphemeIndex: 0, - width: 0, - } + private clearPendingBreak(): void { + this.pendingBreakSegmentIndex = -1 + this.pendingBreakFitWidth = 0 + this.pendingBreakPaintWidth = 0 + this.pendingBreakKind = null } - const { - widths, - lineEndFitAdvances, - lineEndPaintAdvances, - kinds, - breakableWidths, - breakablePrefixWidths, - discretionaryHyphenWidth, - tabStopAdvance, - } = prepared - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineW = 0 - let hasContent = false - const lineStartSegmentIndex = normalizedStart.segmentIndex - const lineStartGraphemeIndex = normalizedStart.graphemeIndex - let lineEndSegmentIndex = lineStartSegmentIndex - let lineEndGraphemeIndex = lineStartGraphemeIndex - let pendingBreakSegmentIndex = -1 - let pendingBreakFitWidth = 0 - let pendingBreakPaintWidth = 0 - let pendingBreakKind: SegmentBreakKind | null = null - - function clearPendingBreak(): void { - pendingBreakSegmentIndex = -1 - pendingBreakFitWidth = 0 - pendingBreakPaintWidth = 0 - pendingBreakKind = null + private emitCurrentLine( + endSegmentIndex = this.lineEndSegmentIndex, + endGraphemeIndex = this.lineEndGraphemeIndex, + width = this.lineW, + ): void { + this.lineCount++ + this.onLine?.({ + startSegmentIndex: this.lineStartSegmentIndex, + startGraphemeIndex: this.lineStartGraphemeIndex, + endSegmentIndex, + endGraphemeIndex, + width, + }) + this.lineW = 0 + this.hasContent = false + this.clearPendingBreak() } - function finishLine( - endSegmentIndex = lineEndSegmentIndex, - endGraphemeIndex = lineEndGraphemeIndex, - width = lineW, + private finishLine( + endSegmentIndex = this.lineEndSegmentIndex, + endGraphemeIndex = this.lineEndGraphemeIndex, + width = this.lineW, ): InternalLayoutLine | null { - if (!hasContent) return null - + if (!this.hasContent) return null return { - startSegmentIndex: lineStartSegmentIndex, - startGraphemeIndex: lineStartGraphemeIndex, + startSegmentIndex: this.lineStartSegmentIndex, + startGraphemeIndex: this.lineStartGraphemeIndex, endSegmentIndex, endGraphemeIndex, width, } } - function startLineAtSegment(segmentIndex: number, width: number): void { - hasContent = true - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - lineW = width + private startLineAtSegment(segmentIndex: number, width: number): void { + this.hasContent = true + this.lineStartSegmentIndex = segmentIndex + this.lineStartGraphemeIndex = 0 + this.lineEndSegmentIndex = segmentIndex + 1 + this.lineEndGraphemeIndex = 0 + this.lineW = width } - function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { - hasContent = true - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = graphemeIndex + 1 - lineW = width + private startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { + this.hasContent = true + this.lineStartSegmentIndex = segmentIndex + this.lineStartGraphemeIndex = graphemeIndex + this.lineEndSegmentIndex = segmentIndex + this.lineEndGraphemeIndex = graphemeIndex + 1 + this.lineW = width } - function appendWholeSegment(segmentIndex: number, width: number): void { - if (!hasContent) { - startLineAtSegment(segmentIndex, width) + private appendWholeSegment(segmentIndex: number, width: number): void { + if (!this.hasContent) { + this.startLineAtSegment(segmentIndex, width) return } - lineW += width - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 + this.lineW += width + this.lineEndSegmentIndex = segmentIndex + 1 + this.lineEndGraphemeIndex = 0 } - function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void { - if (!canBreakAfter(kinds[segmentIndex]!)) return - const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]! - const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]! - pendingBreakSegmentIndex = segmentIndex + 1 - pendingBreakFitWidth = lineW - segmentWidth + fitAdvance - pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance - pendingBreakKind = kinds[segmentIndex]! + private updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void { + const { kinds, lineEndFitAdvances, lineEndPaintAdvances } = this.p + const kind = kinds[segmentIndex]! + if (!canBreakAfter(kind)) return + const fitAdvance = kind === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]! + const paintAdvance = kind === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]! + this.pendingBreakSegmentIndex = segmentIndex + 1 + this.pendingBreakFitWidth = this.lineW - segmentWidth + fitAdvance + this.pendingBreakPaintWidth = this.lineW - segmentWidth + paintAdvance + this.pendingBreakKind = kind } - function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null { + private appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIdx: number): void { + const { breakableWidths, breakablePrefixWidths } = this.p const gWidths = breakableWidths[segmentIndex]! const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - for (let g = startGraphemeIndex; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) + const effectiveMaxWidth = this.effectiveMaxWidth + const preferPrefixWidths = this.preferPrefixWidths - if (!hasContent) { - startLineAtGrapheme(segmentIndex, g, gw) + for (let g = startGraphemeIdx; g < gWidths.length; g++) { + const gw = getBreakableAdvance(gWidths, gPrefixWidths, g, preferPrefixWidths) + + if (!this.hasContent) { + this.startLineAtGrapheme(segmentIndex, g, gw) continue } - if (lineW + gw > maxWidth + lineFitEpsilon) { - return finishLine() + if (this.lineW + gw > effectiveMaxWidth) { + if (!this.stepping) { + // Walk mode: emit and continue + this.emitCurrentLine() + this.startLineAtGrapheme(segmentIndex, g, gw) + } else { + // Step mode: capture result and bail + this.result = this.finishLine() + return + } + } else { + this.lineW += gw + this.lineEndSegmentIndex = segmentIndex + this.lineEndGraphemeIndex = g + 1 } + } - lineW += gw - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = g + 1 + if (this.hasContent && this.lineEndSegmentIndex === segmentIndex && this.lineEndGraphemeIndex === gWidths.length) { + this.lineEndSegmentIndex = segmentIndex + 1 + this.lineEndGraphemeIndex = 0 } + } + + private continueSoftHyphenBreakableSegment(segmentIndex: number): boolean { + const { breakableWidths, breakablePrefixWidths, discretionaryHyphenWidth } = this.p + const gWidths = breakableWidths[segmentIndex]! + if (gWidths === null) return false + const fitWidths = this.preferPrefixWidths + ? breakablePrefixWidths[segmentIndex] ?? gWidths + : gWidths + const usesPrefixWidths = fitWidths !== gWidths + const { fitCount, fittedWidth } = fitSoftHyphenBreak( + fitWidths, + this.lineW, + this.effectiveMaxWidth, + discretionaryHyphenWidth, + usesPrefixWidths, + ) + if (fitCount === 0) return false - if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 + this.lineW = fittedWidth + this.lineEndSegmentIndex = segmentIndex + this.lineEndGraphemeIndex = fitCount + this.clearPendingBreak() + + if (fitCount === gWidths.length) { + this.lineEndSegmentIndex = segmentIndex + 1 + this.lineEndGraphemeIndex = 0 + return true } - return null + + this.emitCurrentLine( + segmentIndex, + fitCount, + fittedWidth + discretionaryHyphenWidth, + ) + this.appendBreakableSegmentFrom(segmentIndex, fitCount) + return true } - function maybeFinishAtSoftHyphen(segmentIndex: number): InternalLayoutLine | null { - if (pendingBreakKind !== 'soft-hyphen' || pendingBreakSegmentIndex < 0) return null + private maybeFinishAtSoftHyphen(segmentIndex: number): InternalLayoutLine | null { + if (this.pendingBreakKind !== 'soft-hyphen' || this.pendingBreakSegmentIndex < 0) return null + const { breakableWidths, breakablePrefixWidths, discretionaryHyphenWidth } = this.p const gWidths = breakableWidths[segmentIndex] ?? null if (gWidths !== null) { - const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns + const fitWidths = this.preferPrefixWidths ? breakablePrefixWidths[segmentIndex] ?? gWidths : gWidths const usesPrefixWidths = fitWidths !== gWidths const { fitCount, fittedWidth } = fitSoftHyphenBreak( fitWidths, - lineW, - maxWidth, - lineFitEpsilon, + this.lineW, + this.effectiveMaxWidth, discretionaryHyphenWidth, usesPrefixWidths, ) if (fitCount === gWidths.length) { - lineW = fittedWidth - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - clearPendingBreak() + this.lineW = fittedWidth + this.lineEndSegmentIndex = segmentIndex + 1 + this.lineEndGraphemeIndex = 0 + this.clearPendingBreak() return null } if (fitCount > 0) { - return finishLine( + return this.finishLine( segmentIndex, fitCount, fittedWidth + discretionaryHyphenWidth, @@ -828,229 +936,65 @@ export function layoutNextLineRange( } } - if (pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { - return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) + if (this.pendingBreakFitWidth <= this.effectiveMaxWidth) { + return this.finishLine(this.pendingBreakSegmentIndex, 0, this.pendingBreakPaintWidth) } return null } - for (let i = normalizedStart.segmentIndex; i < chunk.endSegmentIndex; i++) { - const kind = kinds[i]! - const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0 - const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]! - - if (kind === 'soft-hyphen' && startGraphemeIndex === 0) { - if (hasContent) { - lineEndSegmentIndex = i + 1 - lineEndGraphemeIndex = 0 - pendingBreakSegmentIndex = i + 1 - pendingBreakFitWidth = lineW + discretionaryHyphenWidth - pendingBreakPaintWidth = lineW + discretionaryHyphenWidth - pendingBreakKind = kind - } - continue - } - - if (!hasContent) { - if (startGraphemeIndex > 0) { - const line = appendBreakableSegmentFrom(i, startGraphemeIndex) - if (line !== null) return line - } else if (w > maxWidth && breakableWidths[i] !== null) { - const line = appendBreakableSegmentFrom(i, 0) - if (line !== null) return line - } else { - startLineAtSegment(i, w) - } - updatePendingBreakForWholeSegment(i, w) - continue - } - - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!) - const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!) - - if ( - pendingBreakKind === 'soft-hyphen' && - engineProfile.preferEarlySoftHyphenBreak && - pendingBreakFitWidth <= maxWidth + lineFitEpsilon - ) { - return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - } - - const softBreakLine = maybeFinishAtSoftHyphen(i) - if (softBreakLine !== null) return softBreakLine - - if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) { - appendWholeSegment(i, w) - return finishLine(i + 1, 0, currentBreakPaintWidth) - } - - if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { - return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - } - - if (w > maxWidth && breakableWidths[i] !== null) { - const currentLine = finishLine() - if (currentLine !== null) return currentLine - const line = appendBreakableSegmentFrom(i, 0) - if (line !== null) return line - } - - return finishLine() - } - - appendWholeSegment(i, w) - updatePendingBreakForWholeSegment(i, w) + private emitEmptyChunk(chunk: { startSegmentIndex: number, consumedEndSegmentIndex: number }): void { + this.lineCount++ + this.onLine?.({ + startSegmentIndex: chunk.startSegmentIndex, + startGraphemeIndex: 0, + endSegmentIndex: chunk.consumedEndSegmentIndex, + endGraphemeIndex: 0, + width: 0, + }) } +} - if (pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex && lineEndGraphemeIndex === 0) { - return finishLine(chunk.consumedEndSegmentIndex, 0, pendingBreakPaintWidth) +export function walkPreparedLines( + prepared: PreparedLineBreakData, + maxWidth: number, + onLine?: (line: InternalLayoutLine) => void, +): number { + if (prepared.simpleLineWalkFastPath) { + return walkPreparedLinesSimple(prepared, maxWidth, onLine) } - - return finishLine(chunk.consumedEndSegmentIndex, 0, lineW) + const ep = getEngineProfile() + return new FullLineEngine( + prepared, maxWidth, maxWidth + ep.lineFitEpsilon, + ep.preferPrefixWidthsForBreakableRuns, ep.preferEarlySoftHyphenBreak, onLine, + ).walkAll() } -function layoutNextLineRangeSimple( +export function layoutNextLineRange( prepared: PreparedLineBreakData, - normalizedStart: LineBreakCursor, + start: LineBreakCursor, maxWidth: number, ): InternalLayoutLine | null { - const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineW = 0 - let hasContent = false - const lineStartSegmentIndex = normalizedStart.segmentIndex - const lineStartGraphemeIndex = normalizedStart.graphemeIndex - let lineEndSegmentIndex = lineStartSegmentIndex - let lineEndGraphemeIndex = lineStartGraphemeIndex - let pendingBreakSegmentIndex = -1 - let pendingBreakPaintWidth = 0 - - function finishLine( - endSegmentIndex = lineEndSegmentIndex, - endGraphemeIndex = lineEndGraphemeIndex, - width = lineW, - ): InternalLayoutLine | null { - if (!hasContent) return null - - return { - startSegmentIndex: lineStartSegmentIndex, - startGraphemeIndex: lineStartGraphemeIndex, - endSegmentIndex, - endGraphemeIndex, - width, - } - } - - function startLineAtSegment(segmentIndex: number, width: number): void { - hasContent = true - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - lineW = width - } - - function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { - hasContent = true - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = graphemeIndex + 1 - lineW = width - } - - function appendWholeSegment(segmentIndex: number, width: number): void { - if (!hasContent) { - startLineAtSegment(segmentIndex, width) - return - } - lineW += width - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - - function updatePendingBreak(segmentIndex: number, segmentWidth: number): void { - if (!canBreakAfter(kinds[segmentIndex]!)) return - pendingBreakSegmentIndex = segmentIndex + 1 - pendingBreakPaintWidth = lineW - segmentWidth - } - - function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null { - const gWidths = breakableWidths[segmentIndex]! - const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - for (let g = startGraphemeIndex; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) - - if (!hasContent) { - startLineAtGrapheme(segmentIndex, g, gw) - continue - } - - if (lineW + gw > maxWidth + lineFitEpsilon) { - return finishLine() - } - - lineW += gw - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = g + 1 - } - - if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - return null - } - - for (let i = normalizedStart.segmentIndex; i < widths.length; i++) { - const w = widths[i]! - const kind = kinds[i]! - const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0 - - if (!hasContent) { - if (startGraphemeIndex > 0) { - const line = appendBreakableSegmentFrom(i, startGraphemeIndex) - if (line !== null) return line - } else if (w > maxWidth && breakableWidths[i] !== null) { - const line = appendBreakableSegmentFrom(i, 0) - if (line !== null) return line - } else { - startLineAtSegment(i, w) - } - updatePendingBreak(i, w) - continue - } - - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - if (canBreakAfter(kind)) { - appendWholeSegment(i, w) - return finishLine(i + 1, 0, lineW - w) - } - - if (pendingBreakSegmentIndex >= 0) { - return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - } - - if (w > maxWidth && breakableWidths[i] !== null) { - const currentLine = finishLine() - if (currentLine !== null) return currentLine - const line = appendBreakableSegmentFrom(i, 0) - if (line !== null) return line - } - - return finishLine() - } + const normalizedStart = normalizeLineStart(prepared, start) + if (normalizedStart === null) return null - appendWholeSegment(i, w) - updatePendingBreak(i, w) + if (prepared.simpleLineWalkFastPath) { + return layoutNextLineRangeSimple(prepared, normalizedStart, maxWidth) } + const ep = getEngineProfile() + return new FullLineEngine( + prepared, maxWidth, maxWidth + ep.lineFitEpsilon, + ep.preferPrefixWidthsForBreakableRuns, ep.preferEarlySoftHyphenBreak, undefined, + ).stepOne(normalizedStart) +} - return finishLine() +function layoutNextLineRangeSimple( + prepared: PreparedLineBreakData, + normalizedStart: LineBreakCursor, + maxWidth: number, +): InternalLayoutLine | null { + const ep = getEngineProfile() + return new SimpleLineEngine( + prepared, maxWidth, maxWidth + ep.lineFitEpsilon, ep.preferPrefixWidthsForBreakableRuns, undefined, + ).stepOne(normalizedStart) }