From 27ccde3c458b5ab19de76304b8c3917b0cd89e8c Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Tue, 11 Nov 2025 14:36:18 -0600 Subject: [PATCH 1/3] change(web): renames and restructures TokenInputSource as PathInputProperties --- .../src/main/correction/context-token.ts | 59 ++-- .../main/correction/context-tokenization.ts | 9 +- .../main/correction/legacy-quotient-spur.ts | 4 +- .../main/correction/search-quotient-node.ts | 34 ++- .../main/correction/search-quotient-root.ts | 4 +- .../main/correction/search-quotient-spur.ts | 31 +- .../context/context-token.tests.ts | 282 +++++++++++++----- .../context/context-tokenization.tests.ts | 10 +- .../context/tokenization-subsets.tests.ts | 40 +-- .../search-quotient-spur.tests.ts | 41 ++- 10 files changed, 349 insertions(+), 165 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index 408a7e19ac3..0b6adb33bd5 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -11,7 +11,7 @@ import { applyTransform, buildMergedTransform } from "@keymanapp/models-template import { LexicalModelTypes } from '@keymanapp/common-types'; import { deepCopy, KMWString } from "@keymanapp/web-utils"; -import { SearchQuotientNode, TokenInputSource } from "./search-quotient-node.js"; +import { SearchQuotientNode, PathInputProperties } from "./search-quotient-node.js"; import { TokenSplitMap } from "./context-tokenization.js"; import { LegacyQuotientSpur } from "./legacy-quotient-spur.js"; import { LegacyQuotientRoot } from "./legacy-quotient-root.js"; @@ -107,9 +107,12 @@ export class ContextToken { let searchModule: SearchQuotientNode = new LegacyQuotientRoot(model); const BASE_PROBABILITY = 1; textToCharTransforms(rawText).forEach((transform) => { - let inputMetadata: TokenInputSource = { - trueTransform: transform, - inputStartIndex: 0, + let inputMetadata: PathInputProperties = { + segment: { + trueTransform: transform, + start: 0, + transitionId: undefined + }, bestProbFromSet: BASE_PROBABILITY }; searchModule = new LegacyQuotientSpur(searchModule, [{sample: transform, p: BASE_PROBABILITY}], inputMetadata); @@ -123,7 +126,7 @@ export class ContextToken { * Call this to record the original keystroke Transforms for the context range * corresponding to this token. */ - addInput(inputSource: TokenInputSource, distribution: Distribution) { + addInput(inputSource: PathInputProperties, distribution: Distribution) { this._searchModule = new LegacyQuotientSpur(this._searchModule, distribution, inputSource); } @@ -142,8 +145,8 @@ export class ContextToken { * Denotes the original keystroke Transforms comprising the range corresponding * to this token. */ - get inputRange() { - return this.searchModule.sourceIdentifiers; + get inputSegments() { + return this.searchModule.inputSegments; } /** @@ -160,11 +163,11 @@ export class ContextToken { */ get sourceRangeKey(): string { const components: string[] = []; - const sources = this.searchModule.sourceIdentifiers; + const sources = this.searchModule.inputSegments; for(const source of sources) { - const i = source.inputStartIndex; - components.push(`T${source.trueTransform.id}${i != 0 ? '@' + i : ''}`); + const i = source.segment.start; + components.push(`T${source.segment.transitionId}${i != 0 ? '@' + i : ''}`); } return components.join('+'); @@ -190,7 +193,7 @@ export class ContextToken { // Thus, we don't set the .isWhitespace flag field. const resultToken = new ContextToken(lexicalModel); - let lastSourceInput: TokenInputSource; + let lastSourceInput: PathInputProperties; let lastInputDistrib: Distribution; for(const token of tokensToMerge) { const inputCount = token.inputCount; @@ -201,7 +204,7 @@ export class ContextToken { } // Are we re-merging on a previously split transform? - if(lastSourceInput?.trueTransform != token.inputRange[0].trueTransform) { + if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) { if(lastSourceInput) { resultToken.addInput(lastSourceInput, lastInputDistrib); } // else: there's nothing to add as input @@ -230,9 +233,9 @@ export class ContextToken { // Ignore the last entry for now - it may need to merge with a matching // entry in the next token! for(let i = startIndex; i < inputCount - 1; i++) { - resultToken.addInput(token.inputRange[i], token.searchModule.inputSequence[i]); + resultToken.addInput(token.inputSegments[i], token.searchModule.inputSequence[i]); } - lastSourceInput = token.inputRange[inputCount-1]; + lastSourceInput = token.inputSegments[inputCount-1]; lastInputDistrib = token.searchModule.inputSequence[inputCount-1]; } @@ -255,7 +258,7 @@ export class ContextToken { // Build an alternate version of the transforms: if we preprocess all deleteLefts, // what text remains from each? - const alteredSources = preprocessInputSources(this.inputRange); + const alteredSources = preprocessInputSources(this.inputSegments); const blankContext = { left: '', startOfBuffer: true, endOfBuffer: true }; const splitSpecs = split.matches.slice(); @@ -311,15 +314,17 @@ export class ContextToken { }; }); - const priorSourceInput = overextendedToken.inputRange[lastInputIndex]; + const priorSourceInput = overextendedToken.inputSegments[lastInputIndex]; constructingToken.addInput(priorSourceInput, headDistribution); tokensFromSplit.push(constructingToken); constructingToken = new ContextToken(lexicalModel); backupToken = new ContextToken(constructingToken); constructingToken.addInput({ - trueTransform: priorSourceInput.trueTransform, - inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded, + segment: { + ...priorSourceInput.segment, + start: priorSourceInput.segment.start + extraCharsAdded + }, bestProbFromSet: priorSourceInput.bestProbFromSet }, tailDistribution); @@ -336,8 +341,8 @@ export class ContextToken { backupToken = new ContextToken(constructingToken); lenBeforeLastApply = KMWString.length(currentText.left); - currentText = applyTransform(alteredSources[transformIndex].trueTransform, currentText); - constructingToken.addInput(this.inputRange[transformIndex], this.searchModule.inputSequence[transformIndex]); + currentText = applyTransform(alteredSources[transformIndex].segment.trueTransform, currentText); + constructingToken.addInput(this.inputSegments[transformIndex], this.searchModule.inputSequence[transformIndex]); transformIndex++; } @@ -345,25 +350,25 @@ export class ContextToken { } } -export function preprocessInputSources(inputSources: ReadonlyArray) { +export function preprocessInputSources(inputSources: ReadonlyArray) { const alteredSources = deepCopy(inputSources); let trickledDeleteLeft = 0; for(let i = alteredSources.length - 1; i >= 0; i--) { const source = alteredSources[i]; if(trickledDeleteLeft) { - const insLen = KMWString.length(source.trueTransform.insert); + const insLen = KMWString.length(source.segment.trueTransform.insert); if(insLen <= trickledDeleteLeft) { - source.trueTransform.insert = ''; + source.segment.trueTransform.insert = ''; trickledDeleteLeft -= insLen; } else { - source.trueTransform.insert = KMWString.substring(source.trueTransform.insert, 0, insLen - trickledDeleteLeft); + source.segment.trueTransform.insert = KMWString.substring(source.segment.trueTransform.insert, 0, insLen - trickledDeleteLeft); trickledDeleteLeft = 0; } } - trickledDeleteLeft += source.trueTransform.deleteLeft; - source.trueTransform.deleteLeft = 0; + trickledDeleteLeft += source.segment.trueTransform.deleteLeft; + source.segment.trueTransform.deleteLeft = 0; } - alteredSources[0].trueTransform.deleteLeft = trickledDeleteLeft; + alteredSources[0].segment.trueTransform.deleteLeft = trickledDeleteLeft; return alteredSources; } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts index 72f0f53d246..20c95b1ecc2 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts @@ -585,7 +585,14 @@ export class ContextTokenization { if(affectedToken.inputCount == 0 && distribution[0].sample.deleteLeft != 0) { distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p })); } - affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength, bestProbFromSet}, distribution); + affectedToken.addInput({ + segment: { + trueTransform: sourceInput, + transitionId: sourceInput.id, + start: appliedLength + }, + bestProbFromSet: bestProbFromSet + }, distribution); appliedLength += KMWString.length(distribution[0].sample.insert); const tokenize = determineModelTokenizer(lexicalModel); diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts index 4e67adabaa9..973e4fb0a8e 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts @@ -11,7 +11,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { SearchNode } from './distance-modeler.js'; -import { PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js'; +import { PathResult, SearchQuotientNode, PathInputProperties } from './search-quotient-node.js'; import { SearchQuotientSpur } from './search-quotient-spur.js'; import Distribution = LexicalModelTypes.Distribution; @@ -28,7 +28,7 @@ export class LegacyQuotientSpur extends SearchQuotientSpur { * @param inputs * @param bestProbFromSet */ - constructor(space: SearchQuotientNode, inputs: Distribution, inputSource: TokenInputSource | ProbabilityMass) { + constructor(space: SearchQuotientNode, inputs: Distribution, inputSource: PathInputProperties | ProbabilityMass) { super(space, inputs, inputSource); this.queueNodes(this.buildEdgesForNodes(space.previousResults.map(r => r.node))); return; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index 7a490acb7ca..6a2c12d1eec 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -39,26 +39,38 @@ type CompleteSearchPath = { export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath; -/** - * Models the properties and portion of an input event applied by a SearchSpace for - * correction-search purposes. - */ -export interface TokenInputSource { +export interface InputSegment { /** * The Transform corresponding to the keystroke applied to the true context * for this input event. * - * NOTE: outside of use for .sourceText / .likeliestSourceText, the only part - * that should actually be referenced is the Transform / transition ID. + * @deprecated Slated for removal within epic/autocorrect. */ trueTransform: Transform; + /** + * The transform / transition ID of the corresponding input event. + */ + transitionId: number, + /** * Marks the initial index (inclusive) within the insert strings for the - * corresponding transitions' Transforms that is applied by the corresponding + * corresponding transitions' Transforms that are applied by the corresponding * tokenized correction-search input. */ - inputStartIndex: number; + start: number +} + +/** + * Models the properties and portion of an input event applied by a SearchSpace for + * correction-search purposes. + */ +export interface PathInputProperties { + /** + * Denotes the portion of the ongoing input stream represented by the corresponding + * input distribution(s) of a SearchSpace. + */ + segment: InputSegment; /** * Notes the highest probability found in the input event's transform @@ -154,8 +166,10 @@ export interface SearchQuotientNode { /** * Gets components useful for building a string-based representation of the * keystroke range corrected by this search space. + * + * TODO: will return only the `inputSegment` part of each entry in the future. */ - readonly sourceIdentifiers: TokenInputSource[]; + readonly inputSegments: PathInputProperties[]; } /** diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts index 7bef9bfae68..d1e1ce6d248 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts @@ -2,7 +2,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { SearchNode, SearchResult } from './distance-modeler.js'; -import { generateSpaceSeed, PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js'; +import { generateSpaceSeed, PathInputProperties, PathResult, SearchQuotientNode } from './search-quotient-node.js'; import LexicalModel = LexicalModelTypes.LexicalModel; @@ -90,7 +90,7 @@ export class SearchQuotientRoot implements SearchQuotientNode { } } - get sourceIdentifiers(): TokenInputSource[] { + get inputSegments(): PathInputProperties[] { return []; } } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index ef0e3b22de1..f6cf4a52119 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -12,7 +12,7 @@ import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keyman import { LexicalModelTypes } from '@keymanapp/common-types'; import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js'; -import { generateSpaceSeed, PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js'; +import { generateSpaceSeed, PathResult, SearchQuotientNode, PathInputProperties } from './search-quotient-node.js'; import Distribution = LexicalModelTypes.Distribution; import ProbabilityMass = LexicalModelTypes.ProbabilityMass; @@ -27,7 +27,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator = function(arg1, arg2 export abstract class SearchQuotientSpur implements SearchQuotientNode { private selectionQueue: PriorityQueue = new PriorityQueue(QUEUE_NODE_COMPARATOR); readonly inputs?: Distribution; - readonly inputSource?: TokenInputSource; + readonly inputSource?: PathInputProperties; private parentNode: SearchQuotientNode; readonly spaceId: number; @@ -61,28 +61,31 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { constructor( parentNode: SearchQuotientNode, inputs: Distribution>, - inputSource: TokenInputSource | ProbabilityMass + inputSource: PathInputProperties | ProbabilityMass ) { this.spaceId = generateSpaceSeed(); // Coerce inputSource to TokenInputSource format. - if(inputSource && (inputSource as TokenInputSource).trueTransform == undefined) { + if(inputSource && (inputSource as ProbabilityMass).sample != undefined) { const keystroke = inputSource as ProbabilityMass; inputSource = { - trueTransform: keystroke.sample, - bestProbFromSet: keystroke.p, - inputStartIndex: 0 + segment: { + trueTransform: keystroke.sample, + transitionId: keystroke.sample.id, + start: 0 + }, + bestProbFromSet: keystroke.p } }; - const inputSrc = inputSource as TokenInputSource; + const inputSrc = inputSource as PathInputProperties; const transitionId = (inputs?.[0].sample.id); - if(transitionId !== undefined && inputSrc?.trueTransform.id != transitionId) { + if(transitionId !== undefined && inputSrc?.segment.transitionId != transitionId) { throw new Error("Input distribution and input-source transition IDs must match"); } this.parentNode = parentNode; - this.inputSource = inputSource as TokenInputSource; + this.inputSource = inputSource as PathInputProperties; this.lowestPossibleSingleCost = (parentNode?.lowestPossibleSingleCost ?? 0) - Math.log(inputSrc?.bestProbFromSet ?? 1); this.inputs = inputs?.length > 0 ? inputs : null; this.inputCount = (parentNode?.inputCount ?? 0) + (this.inputs ? 1 : 0); @@ -249,15 +252,15 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { return Object.values(this.returnedValues ?? {}).map(v => new SearchResult(v)); } - public get sourceIdentifiers(): TokenInputSource[] { + public get inputSegments(): PathInputProperties[] { if(!this.parentNode) { return []; } - const parentSources = this.parentNode.sourceIdentifiers; + const parentSources = this.parentNode.inputSegments; if(this.inputSource) { - const inputId = this.inputSource.trueTransform.id; - if(inputId && parentSources.length > 0 && parentSources[parentSources.length - 1].trueTransform.id == inputId) { + const inputId = this.inputSource.segment.transitionId; + if(inputId && parentSources.length > 0 && parentSources[parentSources.length - 1].segment.transitionId == inputId) { return parentSources; } diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index d8ba6d2476d..1a78e7f96d3 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -107,9 +107,9 @@ describe('ContextToken', function() { const merged = ContextToken.merge([token1, token2, token3], plainModel); assert.equal(merged.exampleInput, "can't"); - token1.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); - token2.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); - token3.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); + token1.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1)); + token2.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1)); + token3.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1)); assert.isTrue(quotientPathHasInputs( merged.searchModule, [ @@ -129,26 +129,35 @@ describe('ContextToken', function() { const token3 = new ContextToken(plainModel); token1.addInput({ - trueTransform: srcTransform, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform, + transitionId: srcTransform.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: {insert: 'can', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); token2.addInput({ - trueTransform: srcTransform, - inputStartIndex: 3, + segment: { + trueTransform: srcTransform, + transitionId: srcTransform.id, + start: 3 + }, bestProbFromSet: 1 }, [{sample: {insert: "'", deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); token3.addInput({ - trueTransform: srcTransform, - inputStartIndex: 4, + segment: { + trueTransform: srcTransform, + transitionId: srcTransform.id, + start: 4 + }, bestProbFromSet: 1 }, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); const merged = ContextToken.merge([token1, token2, token3], plainModel); assert.equal(merged.exampleInput, "can't"); - assert.deepEqual(merged.inputRange, [ { trueTransform: srcTransform, inputStartIndex: 0, bestProbFromSet: 1 } ]); + assert.deepEqual(merged.inputSegments, [ { segment: {trueTransform: srcTransform, transitionId: srcTransform.id, start: 0}, bestProbFromSet: 1 } ]); assert.equal(merged.searchModule.inputCount, 1); assert.deepEqual((merged.searchModule as SearchQuotientSpur).lastInput, [{sample: srcTransform, p: 1}]); }); @@ -173,42 +182,60 @@ describe('ContextToken', function() { const tokensToMerge = [token1, token2, token3, token4] token1.addInput({ - trueTransform: srcTransform1, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform1, + transitionId: srcTransform1.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: srcTransform1, p: 1}]); token1.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform2, + transitionId: srcTransform2.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token2.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 1, + segment: { + trueTransform: srcTransform2, + transitionId: srcTransform2.id, + start: 1 + }, bestProbFromSet: 1 }, [{sample: {insert: "and", deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 4, + segment: { + trueTransform: srcTransform2, + transitionId: srcTransform2.id, + start: 4 + }, bestProbFromSet: 1 }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ - trueTransform: srcTransform3, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform3, + transitionId: srcTransform3.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: srcTransform3, p: 1}]); token4.addInput({ - trueTransform: srcTransform4, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform4, + transitionId: srcTransform4.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: srcTransform4, p: 1}]); const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, "applesandsourgrapes"); - assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); + assert.deepEqual(merged.inputSegments, srcTransforms.map((t) => ({ segment: {trueTransform: t, transitionId: t.id, start: 0}, bestProbFromSet: 1 }) )); assert.isTrue(quotientPathHasInputs( merged.searchModule, srcTransforms.map((t) => ([{sample: t, p: 1}])) @@ -235,42 +262,60 @@ describe('ContextToken', function() { const tokensToMerge = [token1, token2, token3, token4] token1.addInput({ - trueTransform: srcTransform1, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform1, + transitionId: srcTransform1.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: srcTransform1, p: 1}]); token1.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform2, + transitionId: srcTransform2.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token2.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 1, + segment: { + trueTransform: srcTransform2, + transitionId: srcTransform2.id, + start: 1 + }, bestProbFromSet: 1 }, [{sample: {insert: toMathematicalSMP("and"), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 4, + segment: { + trueTransform: srcTransform2, + transitionId: srcTransform2.id, + start: 4 + }, bestProbFromSet: 1 }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ - trueTransform: srcTransform3, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform3, + transitionId: srcTransform3.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: srcTransform3, p: 1}]); token4.addInput({ - trueTransform: srcTransform4, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform4, + transitionId: srcTransform4.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: srcTransform4, p: 1}]); const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); - assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); + assert.deepEqual(merged.inputSegments, srcTransforms.map((t) => ({segment: { trueTransform: t, transitionId: t.id, start: 0}, bestProbFromSet: 1 }) )); assert.isTrue(quotientPathHasInputs( merged.searchModule, srcTransforms.map((t) => ([{sample: t, p: 1}])) @@ -302,7 +347,13 @@ describe('ContextToken', function() { const tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: .75}, keystrokeDistributions[i]); + tokenToSplit.addInput({ + segment: { + trueTransform: keystrokeDistributions[i][0].sample, + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, bestProbFromSet: .75 + }, keystrokeDistributions[i]); }; assert.equal(tokenToSplit.sourceRangeKey, 'T11+T12+T13+T14'); @@ -338,7 +389,14 @@ describe('ContextToken', function() { const tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]); + tokenToSplit.addInput({ + segment: { + trueTransform: keystrokeDistributions[i][0].sample, + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, keystrokeDistributions[i]); }; assert.equal(tokenToSplit.sourceRangeKey, `T${keystrokeDistributions[0][0].sample.id}`); @@ -360,14 +418,17 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 3); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray); - assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.inputRange[0]), [0, 3, 8].map(i => ({ - trueTransform: { - insert: 'biglargetransform', - deleteLeft: 0, - deleteRight: 0, - id: keystrokeDistributions[0][0].sample.id + assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.inputSegments[0]), [0, 3, 8].map(i => ({ + segment: { + trueTransform: { + insert: 'biglargetransform', + id: keystrokeDistributions[0][0].sample.id, + deleteLeft: 0, + deleteRight: 0 + }, + transitionId: keystrokeDistributions[0][0].sample.id, + start: i }, - inputStartIndex: i, bestProbFromSet: 1 }))); @@ -394,7 +455,14 @@ describe('ContextToken', function() { const tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]); + tokenToSplit.addInput({ + segment: { + trueTransform: keystrokeDistributions[i][0].sample, + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, keystrokeDistributions[i]); }; assert.equal(tokenToSplit.exampleInput, 'largelongtransforms'); @@ -416,16 +484,49 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 3); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray); - assert.deepEqual(resultsOfSplit[0].inputRange, [ - { trueTransform: keystrokeDistributions[0][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, - { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, + assert.deepEqual(resultsOfSplit[0].inputSegments, [ + { + segment: { + trueTransform: keystrokeDistributions[0][0].sample, + transitionId: keystrokeDistributions[0][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, { + segment: { + trueTransform: keystrokeDistributions[1][0].sample, + transitionId: keystrokeDistributions[1][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, ]); - assert.deepEqual(resultsOfSplit[1].inputRange, [ - { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 'arge'.length, bestProbFromSet: 1 }, - { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, + assert.deepEqual(resultsOfSplit[1].inputSegments, [ + { + segment: { + trueTransform: keystrokeDistributions[1][0].sample, + transitionId: keystrokeDistributions[1][0].sample.id, + start: 'arge'.length + }, + bestProbFromSet: 1 + }, { + segment: { + trueTransform: keystrokeDistributions[2][0].sample, + transitionId: keystrokeDistributions[2][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, ]); - assert.deepEqual(resultsOfSplit[2].inputRange, [ - { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } + assert.deepEqual(resultsOfSplit[2].inputSegments, [ + { + segment: { + trueTransform: keystrokeDistributions[2][0].sample, + transitionId: keystrokeDistributions[2][0].sample.id, + start: 'ng'.length, + }, + bestProbFromSet: 1 + } ]); assert.isTrue(quotientPathHasInputs( @@ -494,7 +595,14 @@ describe('ContextToken', function() { const tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]); + tokenToSplit.addInput({ + segment: { + trueTransform: keystrokeDistributions[i][0].sample, + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, keystrokeDistributions[i]); }; assert.equal(tokenToSplit.exampleInput, toMathematicalSMP('largelongtransforms')); @@ -516,17 +624,46 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 3); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray); - assert.deepEqual(resultsOfSplit[0].inputRange, [ - { trueTransform: keystrokeDistributions[0][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, - { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, - ]); - assert.deepEqual(resultsOfSplit[1].inputRange, [ - { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 'arge'.length, bestProbFromSet: 1 }, - { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, + assert.deepEqual(resultsOfSplit[0].inputSegments, [{ + segment: { + trueTransform: keystrokeDistributions[0][0].sample, + transitionId: keystrokeDistributions[0][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, { + segment: { + trueTransform: keystrokeDistributions[1][0].sample, + transitionId: keystrokeDistributions[1][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, ]); - assert.deepEqual(resultsOfSplit[2].inputRange, [ - { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } + assert.deepEqual(resultsOfSplit[1].inputSegments, [{ + segment: { + trueTransform: keystrokeDistributions[1][0].sample, + transitionId: keystrokeDistributions[1][0].sample.id, + start: 'arge'.length + }, + bestProbFromSet: 1 + }, { + segment: { + trueTransform: keystrokeDistributions[2][0].sample, + transitionId: keystrokeDistributions[2][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + } ]); + assert.deepEqual(resultsOfSplit[2].inputSegments, [{ + segment: { + trueTransform: keystrokeDistributions[2][0].sample, + transitionId: keystrokeDistributions[2][0].sample.id, + start: 'ng'.length + }, + bestProbFromSet: 1 + }]); assert.isTrue(quotientPathHasInputs( resultsOfSplit[0].searchModule, [ @@ -584,19 +721,22 @@ describe('ContextToken', function() { describe('preprocessInputSources', () => { it('properly preprocesses deleteLefts in the transforms', () => { const transforms: Transform[] = [ - { insert: 'long', deleteLeft: 0, deleteRight: 0 }, - { insert: 'argelovely', deleteLeft: 3, deleteRight: 0 }, - { insert: 'ngtransforms', deleteLeft: 4, deleteRight: 0 } + { insert: 'long', deleteLeft: 0, deleteRight: 0, id: 11 }, + { insert: 'argelovely', deleteLeft: 3, deleteRight: 0, id: 12 }, + { insert: 'ngtransforms', deleteLeft: 4, deleteRight: 0, id: 13 } ]; const results = preprocessInputSources(transforms.map((t) => ({ - trueTransform: t, - inputStartIndex: 0, + segment: { + trueTransform: t, + transitionId: t.id, + start: 0 + }, bestProbFromSet: 1 }))); assert.equal(results.length, transforms.length); - assert.sameOrderedMembers(results.map((entry) => entry.trueTransform.insert), ['l', 'argelo', 'ngtransforms']); - assert.sameOrderedMembers(results.map((entry) => entry.trueTransform.deleteLeft), [0, 0, 0]); + assert.sameOrderedMembers(results.map((entry) => entry.segment.trueTransform.insert), ['l', 'argelo', 'ngtransforms']); + assert.sameOrderedMembers(results.map((entry) => entry.segment.trueTransform.deleteLeft), [0, 0, 0]); }); }); \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts index b65e046ba57..7e8e6fa6c26 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts @@ -49,7 +49,7 @@ function toTransformToken(text: string, transformId?: number) { let isWhitespace = text == ' '; let token = new ContextToken(plainModel); const textAsTransform = { insert: text, deleteLeft: 0, id: idSeed }; - token.addInput({trueTransform: textAsTransform, inputStartIndex: 0, bestProbFromSet: 1}, [ { sample: textAsTransform, p: 1 } ]); + token.addInput({segment: { trueTransform: textAsTransform, transitionId: textAsTransform.id, start: 0 }, bestProbFromSet: 1}, [ { sample: textAsTransform, p: 1 } ]); token.isWhitespace = isWhitespace; return token; } @@ -457,13 +457,13 @@ describe('ContextTokenization', function() { ); const boundaryToken = tokenization.tokens[tokenization.tokens.length-3]; - const boundaryTailInput = boundaryToken.inputRange[boundaryToken.inputRange.length - 1]; - assert.deepEqual(boundaryTailInput, {trueTransform: inputTransform, inputStartIndex: 0, bestProbFromSet: 1}); + const boundaryTailInput = boundaryToken.inputSegments[boundaryToken.inputSegments.length - 1]; + assert.deepEqual(boundaryTailInput, {segment: {trueTransform: inputTransform, transitionId: inputTransform.id, start: 0}, bestProbFromSet: 1}); // The new tail tokens should not include anything from the original tail; // the token should be replaced. - assert.deepEqual(tokenization.tokens[tokenization.tokens.length-2].inputRange, [{trueTransform: inputTransform, inputStartIndex: 0, bestProbFromSet: 1}]); - assert.deepEqual(tokenization.tokens[tokenization.tokens.length-1].inputRange, [{trueTransform: inputTransform, inputStartIndex: 1, bestProbFromSet: 1}]); + assert.deepEqual(tokenization.tokens[tokenization.tokens.length-2].inputSegments, [{segment: {trueTransform: inputTransform, transitionId: inputTransform.id, start: 0}, bestProbFromSet: 1}]); + assert.deepEqual(tokenization.tokens[tokenization.tokens.length-1].inputSegments, [{segment: {trueTransform: inputTransform, transitionId: inputTransform.id, start: 1}, bestProbFromSet: 1}]); const tailIndex = tokenization.tokens.length - 1; for(let i of inputTransformMap.keys()) { diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts index fbd013ddeb3..67358bb0a81 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts @@ -173,8 +173,8 @@ describe('precomputationSubsetKeyer', function() { const token = new ContextToken(plainModel, 'da'); // source text: 'date' token.addInput( - {trueTransform: {insert: 'te', deleteLeft: 0}, inputStartIndex: 0, bestProbFromSet: 1}, - [{sample: {insert: 'te', deleteLeft: 0}, p: 1}] + {segment: {trueTransform: {insert: 'te', deleteLeft: 0, id: 13}, transitionId: 13, start: 0}, bestProbFromSet: 1}, + [{sample: {insert: 'te', deleteLeft: 0, id: 13}, p: 1}] ); return token; })()], @@ -187,7 +187,7 @@ describe('precomputationSubsetKeyer', function() { }, tokenizedTransform: (() => { const map = new Map(); - map.set(0, { insert: 's', deleteLeft: 0 }); + map.set(0, { insert: 's', deleteLeft: 0, id: 14 }); return map; })() }; @@ -200,12 +200,12 @@ describe('precomputationSubsetKeyer', function() { const token = new ContextToken(plainModel, 'da'); // source text: 'date' token.addInput( - {trueTransform: {insert: 'te', deleteLeft: 0}, inputStartIndex: 0, bestProbFromSet: 1}, + {segment: {trueTransform: {insert: 'te', deleteLeft: 0, id: 13}, transitionId: 13, start: 0}, bestProbFromSet: 1}, [{sample: {insert: 't', deleteLeft: 0}, p: 1}] ); return token; })()], - { insert: 'es', deleteLeft: 0, deleteRight: 0 }, + { insert: 'es', deleteLeft: 0, deleteRight: 0, id: 14 }, false ), retokenization: [...rawTextTokens] @@ -241,12 +241,12 @@ describe('precomputationSubsetKeyer', function() { token.isPartial = true; // source text: 'dat' token.addInput( - {trueTransform: {insert: 't', deleteLeft: 0}, inputStartIndex: 0, bestProbFromSet: 1}, - [{sample: {insert: 'ts', deleteLeft: 0}, p: 1}] + {segment: {trueTransform: {insert: 't', deleteLeft: 0, id: 13}, transitionId: 13, start: 0}, bestProbFromSet: 1}, + [{sample: {insert: 'ts', deleteLeft: 0, id: 13}, p: 1}] ); return token; })()], - { insert: 'e', deleteLeft: 1, deleteRight: 0 }, + { insert: 'e', deleteLeft: 1, deleteRight: 0, id: 14 }, false ), retokenization: [...rawTextTokens] @@ -255,7 +255,7 @@ describe('precomputationSubsetKeyer', function() { }, tokenizedTransform: (() => { const map = new Map(); - map.set(0, { insert: 'e', deleteLeft: 1 }); + map.set(0, { insert: 'e', deleteLeft: 1, id: 14 }); return map; })() }; @@ -269,12 +269,12 @@ describe('precomputationSubsetKeyer', function() { token.isPartial = true; // source text: 'dat' token.addInput( - {trueTransform: {insert: 't', deleteLeft: 0}, inputStartIndex: 0, bestProbFromSet: 1}, - [{sample: {insert: 't', deleteLeft: 0}, p: 1}] + {segment: {trueTransform: {insert: 't', deleteLeft: 0, id: 13}, transitionId: 13, start: 0}, bestProbFromSet: 1}, + [{sample: {insert: 't', deleteLeft: 0, id: 13}, p: 1}] ); return token; })()], - { insert: 'e', deleteLeft: 0, deleteRight: 0 }, + { insert: 'e', deleteLeft: 0, deleteRight: 0, id: 14 }, false ), retokenization: [...rawTextTokens] @@ -717,18 +717,18 @@ describe('TokenizationSubsetBuilder', function() { const baseRawTextTokens = ['drink', ' ', 'coffee', ' ', 'at', ' ', 'a', ' ', 'cafe']; const baseTokenization = new ContextTokenization(baseRawTextTokens.map((text => toToken(text)))); - const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1 }; + const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; const fourCharTailToken = new ContextToken(baseTokenization.tail); fourCharTailToken.addInput( - {trueTransform: { insert: 'é', deleteLeft: 1 }, inputStartIndex: 0, bestProbFromSet: 1}, + {segment: {trueTransform: { insert: 'é', deleteLeft: 1, id: 13 }, transitionId: 13, start: 0}, bestProbFromSet: 1}, [{ sample: trueSourceTransform, p: .6 }] ); const fiveCharTailToken = new ContextToken(baseTokenization.tail); fiveCharTailToken.addInput( - {trueTransform: { insert: 'é', deleteLeft: 1 }, inputStartIndex: 0, bestProbFromSet: 1}, - [{ sample: { insert: 's', deleteLeft: 0 }, p: .4 }] + {segment: {trueTransform: { insert: 'é', deleteLeft: 1, id: 13 }, transitionId: 13, start: 0}, bestProbFromSet: 1}, + [{ sample: { insert: 's', deleteLeft: 0, id: 13 }, p: .4 }] ); const subsetBuilder = new TokenizationSubsetBuilder(); @@ -756,18 +756,18 @@ describe('TokenizationSubsetBuilder', function() { // target accented word: séance const baseTokenization = new ContextTokenization(baseRawTextTokens.map((text => toToken(text)))); - const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1 }; + const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; const twoCharTailToken = new ContextToken(baseTokenization.tail); twoCharTailToken.addInput( - {trueTransform: { insert: 'é', deleteLeft: 1 }, inputStartIndex: 0, bestProbFromSet: .6}, + {segment: {trueTransform: { insert: 'é', deleteLeft: 1, id: 13 }, transitionId: 13, start: 0}, bestProbFromSet: .6}, [{ sample: trueSourceTransform, p: .6 }] ); const threeCharTailToken = new ContextToken(baseTokenization.tail); threeCharTailToken.addInput( - {trueTransform: { insert: 'é', deleteLeft: 1 }, inputStartIndex: 0, bestProbFromSet: .6}, - [{ sample: { insert: 'a', deleteLeft: 0 }, p: .4 }] + {segment: {trueTransform: { insert: 'é', deleteLeft: 1 }, transitionId: 13, start: 0}, bestProbFromSet: .6}, + [{ sample: { insert: 'a', deleteLeft: 0, id: 13}, p: .4 }] ); const subsetBuilder = new TokenizationSubsetBuilder(); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts index 189790b7d30..5554e05e1ed 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts @@ -76,10 +76,13 @@ describe('SearchQuotientSpur', () => { assert.deepEqual(extendedPath.bestExample, {text: 't', p: 0.5}); assert.deepEqual(extendedPath.parents, [rootPath]); assert.deepEqual(extendedPath.inputs, leadEdgeDistribution); - assert.deepEqual(extendedPath.sourceIdentifiers, [ + assert.deepEqual(extendedPath.inputSegments, [ { - trueTransform: leadEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: leadEdgeDistribution[0].sample, + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: leadEdgeDistribution[0].p } ]); @@ -128,14 +131,20 @@ describe('SearchQuotientSpur', () => { assert.deepEqual(length2Path.bestExample, {text: 'tr', p: leadEdgeDistribution[0].p * tailEdgeDistribution[0].p}); assert.deepEqual(length2Path.parents, [length1Path]); assert.deepEqual(length2Path.inputs, tailEdgeDistribution); - assert.deepEqual(length2Path.sourceIdentifiers, [ + assert.deepEqual(length2Path.inputSegments, [ { - trueTransform: leadEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: leadEdgeDistribution[0].sample, + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: leadEdgeDistribution[0].p }, { - trueTransform: tailEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: tailEdgeDistribution[0].sample, + transitionId: tailEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: tailEdgeDistribution[0].p } ]); @@ -200,14 +209,20 @@ describe('SearchQuotientSpur', () => { assert.deepEqual(length2Path.bestExample, {text: 'tri', p: leadEdgeDistribution[0].p * tailEdgeDistribution[0].p}); assert.deepEqual(length2Path.parents, [length1Path]); assert.deepEqual(length2Path.inputs, tailEdgeDistribution); - assert.deepEqual(length2Path.sourceIdentifiers, [ + assert.deepEqual(length2Path.inputSegments, [ { - trueTransform: leadEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: leadEdgeDistribution[0].sample, + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: leadEdgeDistribution[0].p }, { - trueTransform: tailEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: tailEdgeDistribution[0].sample, + transitionId: tailEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: tailEdgeDistribution[0].p } ]); From 3a613df9540966f0db616050e21adf4eb5aa24df Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Mon, 12 Jan 2026 10:37:47 -0600 Subject: [PATCH 2/3] change(web): hoist unit-test format changes from descendant PR --- .../context/context-token.tests.ts | 108 ++++++++------ .../context/context-tokenization.tests.ts | 37 ++++- .../context/tokenization-subsets.tests.ts | 136 +++++++++++++----- 3 files changed, 198 insertions(+), 83 deletions(-) diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index 1a78e7f96d3..49e17f81c97 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -157,7 +157,13 @@ describe('ContextToken', function() { const merged = ContextToken.merge([token1, token2, token3], plainModel); assert.equal(merged.exampleInput, "can't"); - assert.deepEqual(merged.inputSegments, [ { segment: {trueTransform: srcTransform, transitionId: srcTransform.id, start: 0}, bestProbFromSet: 1 } ]); + assert.deepEqual(merged.inputSegments, [ { + segment: { + trueTransform: srcTransform, + transitionId: srcTransform.id, + start: 0 + }, bestProbFromSet: 1 + } ]); assert.equal(merged.searchModule.inputCount, 1); assert.deepEqual((merged.searchModule as SearchQuotientSpur).lastInput, [{sample: srcTransform, p: 1}]); }); @@ -165,11 +171,12 @@ describe('ContextToken', function() { it("merges four tokens with previously-split transforms", () => { // TODO: need another case - pref where there are two diff boundary transforms // and where each token has multiple constituent transforms. - const srcTransform1 = { insert: "apple", deleteLeft: 0, deleteRight: 0, id: 1 }; - const srcTransform2 = { insert: "sands", deleteLeft: 0, deleteRight: 0, id: 2 }; - const srcTransform3 = { insert: "our", deleteLeft: 0, deleteRight: 0, id: 3 }; - const srcTransform4 = { insert: "grapes", deleteLeft: 0, deleteRight: 0, id: 4 }; - const srcTransforms = [srcTransform1, srcTransform2, srcTransform3, srcTransform4]; + const srcTransforms = [ + { insert: "apple", deleteLeft: 0, deleteRight: 0, id: 1 }, + { insert: "sands", deleteLeft: 0, deleteRight: 0, id: 2 }, + { insert: "our", deleteLeft: 0, deleteRight: 0, id: 3 }, + { insert: "grapes", deleteLeft: 0, deleteRight: 0, id: 4 } + ]; // apples const token1 = new ContextToken(plainModel); @@ -183,16 +190,16 @@ describe('ContextToken', function() { token1.addInput({ segment: { - trueTransform: srcTransform1, - transitionId: srcTransform1.id, + trueTransform: srcTransforms[0], + transitionId: srcTransforms[0].id, start: 0 }, bestProbFromSet: 1 - }, [{sample: srcTransform1, p: 1}]); + }, [{sample: srcTransforms[0], p: 1}]); token1.addInput({ segment: { - trueTransform: srcTransform2, - transitionId: srcTransform2.id, + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, start: 0 }, bestProbFromSet: 1 @@ -200,8 +207,8 @@ describe('ContextToken', function() { token2.addInput({ segment: { - trueTransform: srcTransform2, - transitionId: srcTransform2.id, + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, start: 1 }, bestProbFromSet: 1 @@ -209,33 +216,39 @@ describe('ContextToken', function() { token3.addInput({ segment: { - trueTransform: srcTransform2, - transitionId: srcTransform2.id, + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, start: 4 }, bestProbFromSet: 1 }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ segment: { - trueTransform: srcTransform3, - transitionId: srcTransform3.id, + trueTransform: srcTransforms[2], + transitionId: srcTransforms[2].id, start: 0 }, bestProbFromSet: 1 - }, [{sample: srcTransform3, p: 1}]); + }, [{sample: srcTransforms[2], p: 1}]); token4.addInput({ segment: { - trueTransform: srcTransform4, - transitionId: srcTransform4.id, + trueTransform: srcTransforms[3], + transitionId: srcTransforms[3].id, start: 0 }, bestProbFromSet: 1 - }, [{sample: srcTransform4, p: 1}]); + }, [{sample: srcTransforms[3], p: 1}]); const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, "applesandsourgrapes"); - assert.deepEqual(merged.inputSegments, srcTransforms.map((t) => ({ segment: {trueTransform: t, transitionId: t.id, start: 0}, bestProbFromSet: 1 }) )); + assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ + segment: { + trueTransform: t, + transitionId: t.id, + start: 0 + }, bestProbFromSet: 1 + }))); assert.isTrue(quotientPathHasInputs( merged.searchModule, srcTransforms.map((t) => ([{sample: t, p: 1}])) @@ -245,11 +258,12 @@ describe('ContextToken', function() { it("merges four tokens with previously-split transforms - non-BMP text", () => { // TODO: need another case - pref where there are two diff boundary transforms // and where each token has multiple constituent transforms. - const srcTransform1 = { insert: toMathematicalSMP("apple"), deleteLeft: 0, deleteRight: 0, id: 1 }; - const srcTransform2 = { insert: toMathematicalSMP("sands"), deleteLeft: 0, deleteRight: 0, id: 2 }; - const srcTransform3 = { insert: toMathematicalSMP("our"), deleteLeft: 0, deleteRight: 0, id: 3 }; - const srcTransform4 = { insert: toMathematicalSMP("grapes"), deleteLeft: 0, deleteRight: 0, id: 4 }; - const srcTransforms = [srcTransform1, srcTransform2, srcTransform3, srcTransform4]; + const srcTransforms = [ + { insert: toMathematicalSMP("apple"), deleteLeft: 0, deleteRight: 0, id: 1 }, + { insert: toMathematicalSMP("sands"), deleteLeft: 0, deleteRight: 0, id: 2 }, + { insert: toMathematicalSMP("our"), deleteLeft: 0, deleteRight: 0, id: 3 }, + { insert: toMathematicalSMP("grapes"), deleteLeft: 0, deleteRight: 0, id: 4 } + ]; // apples const token1 = new ContextToken(plainModel); @@ -263,16 +277,16 @@ describe('ContextToken', function() { token1.addInput({ segment: { - trueTransform: srcTransform1, - transitionId: srcTransform1.id, + trueTransform: srcTransforms[0], + transitionId: srcTransforms[0].id, start: 0 }, bestProbFromSet: 1 - }, [{sample: srcTransform1, p: 1}]); + }, [{sample: srcTransforms[0], p: 1}]); token1.addInput({ segment: { - trueTransform: srcTransform2, - transitionId: srcTransform2.id, + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, start: 0 }, bestProbFromSet: 1 @@ -280,8 +294,8 @@ describe('ContextToken', function() { token2.addInput({ segment: { - trueTransform: srcTransform2, - transitionId: srcTransform2.id, + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, start: 1 }, bestProbFromSet: 1 @@ -289,33 +303,39 @@ describe('ContextToken', function() { token3.addInput({ segment: { - trueTransform: srcTransform2, - transitionId: srcTransform2.id, + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, start: 4 }, bestProbFromSet: 1 }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ segment: { - trueTransform: srcTransform3, - transitionId: srcTransform3.id, + trueTransform: srcTransforms[2], + transitionId: srcTransforms[2].id, start: 0 }, bestProbFromSet: 1 - }, [{sample: srcTransform3, p: 1}]); + }, [{sample: srcTransforms[2], p: 1}]); token4.addInput({ segment: { - trueTransform: srcTransform4, - transitionId: srcTransform4.id, + trueTransform: srcTransforms[3], + transitionId: srcTransforms[3].id, start: 0 }, bestProbFromSet: 1 - }, [{sample: srcTransform4, p: 1}]); + }, [{sample: srcTransforms[3], p: 1}]); const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); - assert.deepEqual(merged.inputSegments, srcTransforms.map((t) => ({segment: { trueTransform: t, transitionId: t.id, start: 0}, bestProbFromSet: 1 }) )); + assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ + segment: { + trueTransform: t, + transitionId: t.id, + start: 0 + }, bestProbFromSet: 1 + }))); assert.isTrue(quotientPathHasInputs( merged.searchModule, srcTransforms.map((t) => ([{sample: t, p: 1}])) @@ -726,7 +746,7 @@ describe('preprocessInputSources', () => { { insert: 'ngtransforms', deleteLeft: 4, deleteRight: 0, id: 13 } ]; - const results = preprocessInputSources(transforms.map((t) => ({ + const results = preprocessInputSources(transforms.map((t, i) => ({ segment: { trueTransform: t, transitionId: t.id, diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts index 7e8e6fa6c26..2a5dcd59614 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts @@ -49,7 +49,13 @@ function toTransformToken(text: string, transformId?: number) { let isWhitespace = text == ' '; let token = new ContextToken(plainModel); const textAsTransform = { insert: text, deleteLeft: 0, id: idSeed }; - token.addInput({segment: { trueTransform: textAsTransform, transitionId: textAsTransform.id, start: 0 }, bestProbFromSet: 1}, [ { sample: textAsTransform, p: 1 } ]); + token.addInput({ + segment: { + trueTransform: textAsTransform, + transitionId: textAsTransform.id, + start: 0 + }, bestProbFromSet: 1 + }, [ { sample: textAsTransform, p: 1 } ]); token.isWhitespace = isWhitespace; return token; } @@ -200,7 +206,7 @@ describe('ContextTokenization', function() { }, removedTokenCount: 0 }, - inputs: [{ sample: inputTransformMap, p: 1 }], + inputs: [{ sample: inputTransformMap, p: 1 }] }, plainModel, inputTransform, @@ -290,7 +296,7 @@ describe('ContextTokenization', function() { }, removedTokenCount: 0 }, - inputs: [{ sample: inputTransformMap, p: 1 }], + inputs: [{ sample: inputTransformMap, p: 1 }] }, plainModel, inputTransform, @@ -458,12 +464,31 @@ describe('ContextTokenization', function() { const boundaryToken = tokenization.tokens[tokenization.tokens.length-3]; const boundaryTailInput = boundaryToken.inputSegments[boundaryToken.inputSegments.length - 1]; - assert.deepEqual(boundaryTailInput, {segment: {trueTransform: inputTransform, transitionId: inputTransform.id, start: 0}, bestProbFromSet: 1}); + assert.deepEqual(boundaryTailInput, { + segment: { + trueTransform: inputTransform, + transitionId: inputTransform.id, + start: 0 + }, bestProbFromSet: 1 + }); // The new tail tokens should not include anything from the original tail; // the token should be replaced. - assert.deepEqual(tokenization.tokens[tokenization.tokens.length-2].inputSegments, [{segment: {trueTransform: inputTransform, transitionId: inputTransform.id, start: 0}, bestProbFromSet: 1}]); - assert.deepEqual(tokenization.tokens[tokenization.tokens.length-1].inputSegments, [{segment: {trueTransform: inputTransform, transitionId: inputTransform.id, start: 1}, bestProbFromSet: 1}]); + assert.deepEqual(tokenization.tokens[tokenization.tokens.length-2].inputSegments, [{ + segment: { + trueTransform: inputTransform, + transitionId: inputTransform.id, + start: 0 + }, bestProbFromSet: 1 + }]); + assert.deepEqual(tokenization.tokens[tokenization.tokens.length-1].inputSegments, [{ + segment: { + trueTransform: inputTransform, + transitionId: inputTransform.id, + start: 1 + }, + bestProbFromSet: 1 + }]); const tailIndex = tokenization.tokens.length - 1; for(let i of inputTransformMap.keys()) { diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts index 67358bb0a81..880ad7782f3 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts @@ -16,7 +16,15 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { deepCopy } from '@keymanapp/web-utils'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; -import { buildEdgeWindow, ContextToken, ContextTokenization, models, precomputationSubsetKeyer, TokenizationTransitionEdits, TokenizationSubsetBuilder } from '@keymanapp/lm-worker/test-index'; +import { + buildEdgeWindow, + ContextToken, + ContextTokenization, + models, + precomputationSubsetKeyer, + TokenizationTransitionEdits, + TokenizationSubsetBuilder +} from '@keymanapp/lm-worker/test-index'; import Distribution = LexicalModelTypes.Distribution; import Transform = LexicalModelTypes.Transform; @@ -172,10 +180,18 @@ describe('precomputationSubsetKeyer', function() { [...tokenization.tokens, (() => { const token = new ContextToken(plainModel, 'da'); // source text: 'date' - token.addInput( - {segment: {trueTransform: {insert: 'te', deleteLeft: 0, id: 13}, transitionId: 13, start: 0}, bestProbFromSet: 1}, - [{sample: {insert: 'te', deleteLeft: 0, id: 13}, p: 1}] - ); + token.addInput({ + segment: { + trueTransform: { + insert: 'te', + deleteLeft: 0, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + {sample: {insert: 'te', deleteLeft: 0, id: 13}, p: 1} + ]); return token; })()], { insert: 's', deleteLeft: 0, deleteRight: 0 }, @@ -199,10 +215,18 @@ describe('precomputationSubsetKeyer', function() { [...tokenization.tokens, (() => { const token = new ContextToken(plainModel, 'da'); // source text: 'date' - token.addInput( - {segment: {trueTransform: {insert: 'te', deleteLeft: 0, id: 13}, transitionId: 13, start: 0}, bestProbFromSet: 1}, - [{sample: {insert: 't', deleteLeft: 0}, p: 1}] - ); + token.addInput({ + segment: { + trueTransform: { + insert: 'te', + deleteLeft: 0, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + {sample: {insert: 't', deleteLeft: 0}, p: 1} + ]); return token; })()], { insert: 'es', deleteLeft: 0, deleteRight: 0, id: 14 }, @@ -240,10 +264,17 @@ describe('precomputationSubsetKeyer', function() { const token = new ContextToken(plainModel, 'da'); token.isPartial = true; // source text: 'dat' - token.addInput( - {segment: {trueTransform: {insert: 't', deleteLeft: 0, id: 13}, transitionId: 13, start: 0}, bestProbFromSet: 1}, - [{sample: {insert: 'ts', deleteLeft: 0, id: 13}, p: 1}] - ); + token.addInput({ + segment: { + trueTransform: { + insert: 't', + deleteLeft: 0, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [{sample: {insert: 'ts', deleteLeft: 0, id: 13}, p: 1} + ]); return token; })()], { insert: 'e', deleteLeft: 1, deleteRight: 0, id: 14 }, @@ -268,10 +299,18 @@ describe('precomputationSubsetKeyer', function() { const token = new ContextToken(plainModel, 'da'); token.isPartial = true; // source text: 'dat' - token.addInput( - {segment: {trueTransform: {insert: 't', deleteLeft: 0, id: 13}, transitionId: 13, start: 0}, bestProbFromSet: 1}, - [{sample: {insert: 't', deleteLeft: 0, id: 13}, p: 1}] - ); + token.addInput({ + segment: { + trueTransform: { + insert: 't', + deleteLeft: 0, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + {sample: {insert: 't', deleteLeft: 0, id: 13}, p: 1} + ]); return token; })()], { insert: 'e', deleteLeft: 0, deleteRight: 0, id: 14 }, @@ -720,16 +759,32 @@ describe('TokenizationSubsetBuilder', function() { const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; const fourCharTailToken = new ContextToken(baseTokenization.tail); - fourCharTailToken.addInput( - {segment: {trueTransform: { insert: 'é', deleteLeft: 1, id: 13 }, transitionId: 13, start: 0}, bestProbFromSet: 1}, - [{ sample: trueSourceTransform, p: .6 }] - ); + fourCharTailToken.addInput({ + segment: { + trueTransform: { + insert: 'é', + deleteLeft: 1, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + { sample: trueSourceTransform, p: .6 } + ]); const fiveCharTailToken = new ContextToken(baseTokenization.tail); - fiveCharTailToken.addInput( - {segment: {trueTransform: { insert: 'é', deleteLeft: 1, id: 13 }, transitionId: 13, start: 0}, bestProbFromSet: 1}, - [{ sample: { insert: 's', deleteLeft: 0, id: 13 }, p: .4 }] - ); + fiveCharTailToken.addInput({ + segment: { + trueTransform: { + insert: 'é', + deleteLeft: 1, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + { sample: { insert: 's', deleteLeft: 0, id: 13 }, p: .4 } + ]); const subsetBuilder = new TokenizationSubsetBuilder(); const fourCharTokenization = new ContextTokenization([...baseTokenization.tokens.slice(0, -1), fourCharTailToken]); @@ -759,16 +814,31 @@ describe('TokenizationSubsetBuilder', function() { const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; const twoCharTailToken = new ContextToken(baseTokenization.tail); - twoCharTailToken.addInput( - {segment: {trueTransform: { insert: 'é', deleteLeft: 1, id: 13 }, transitionId: 13, start: 0}, bestProbFromSet: .6}, - [{ sample: trueSourceTransform, p: .6 }] - ); + twoCharTailToken.addInput({ + segment: { + trueTransform: { + insert: 'é', + deleteLeft: 1, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: .6 + }, [ + { sample: trueSourceTransform, p: .6 } + ]); const threeCharTailToken = new ContextToken(baseTokenization.tail); - threeCharTailToken.addInput( - {segment: {trueTransform: { insert: 'é', deleteLeft: 1 }, transitionId: 13, start: 0}, bestProbFromSet: .6}, - [{ sample: { insert: 'a', deleteLeft: 0, id: 13}, p: .4 }] - ); + threeCharTailToken.addInput({ + segment: { + trueTransform: { + insert: 'é', + deleteLeft: 1 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: .6 + }, [ + { sample: { insert: 'a', deleteLeft: 0, id: 13}, p: .4 } + ]); const subsetBuilder = new TokenizationSubsetBuilder(); const twoCharTokenization = new ContextTokenization([...baseTokenization.tokens.slice(0, -1), twoCharTailToken]); From e5231ff435db7454ae6592e068c7d031e359ed71 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Wed, 28 Jan 2026 16:04:26 -0600 Subject: [PATCH 3/3] change(web): improve .transitionId typing --- .../worker-thread/src/main/correction/search-quotient-node.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index 6a2c12d1eec..12fdf9a1501 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -51,7 +51,7 @@ export interface InputSegment { /** * The transform / transition ID of the corresponding input event. */ - transitionId: number, + transitionId: number | undefined, /** * Marks the initial index (inclusive) within the insert strings for the