diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index 8264a13100d..243c637ca3f 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -11,7 +11,7 @@ import { applyTransform, buildMergedTransform } from "@keymanapp/models-template import { LexicalModelTypes } from '@keymanapp/common-types'; import { deepCopy, KMWString } from "@keymanapp/web-utils"; -import { SearchQuotientNode, TokenInputSource } from "./search-quotient-node.js"; +import { SearchQuotientNode, PathInputProperties } from "./search-quotient-node.js"; import { TokenSplitMap } from "./context-tokenization.js"; import { LegacyQuotientSpur } from "./legacy-quotient-spur.js"; import { LegacyQuotientRoot } from "./legacy-quotient-root.js"; @@ -107,9 +107,12 @@ export class ContextToken { let searchModule: SearchQuotientNode = new LegacyQuotientRoot(model); const BASE_PROBABILITY = 1; textToCharTransforms(rawText).forEach((transform) => { - let inputMetadata: TokenInputSource = { - trueTransform: transform, - inputStartIndex: 0, + let inputMetadata: PathInputProperties = { + segment: { + trueTransform: transform, + start: 0, + transitionId: undefined + }, bestProbFromSet: BASE_PROBABILITY }; searchModule = new LegacyQuotientSpur(searchModule, [{sample: transform, p: BASE_PROBABILITY}], inputMetadata); @@ -123,7 +126,7 @@ export class ContextToken { * Call this to record the original keystroke Transforms for the context range * corresponding to this token. */ - addInput(inputSource: TokenInputSource, distribution: Distribution) { + addInput(inputSource: PathInputProperties, distribution: Distribution) { this._searchModule = new LegacyQuotientSpur(this._searchModule, distribution, inputSource); } @@ -142,8 +145,8 @@ export class ContextToken { * Denotes the original keystroke Transforms comprising the range corresponding * to this token. */ - get inputRange(): TokenInputSource[] { - return this.searchModule.sourceIdentifiers; + get inputSegments() { + return this.searchModule.inputSegments; } /** @@ -161,9 +164,9 @@ export class ContextToken { get sourceRangeKey(): string { const components: string[] = []; - for(const source of this.inputRange) { - const i = source.inputStartIndex; - components.push(`T${source.trueTransform.id}${i != 0 ? '@' + i : ''}`); + for(const source of this.inputSegments) { + const i = source.segment.start; + components.push(`T${source.segment.transitionId}${i != 0 ? `@${i}` : ''}`); } return components.join('+'); @@ -189,7 +192,7 @@ export class ContextToken { // Thus, we don't set the .isWhitespace flag field. const resultToken = new ContextToken(lexicalModel); - let lastSourceInput: TokenInputSource; + let lastSourceInput: PathInputProperties; let lastInputDistrib: Distribution; for(const token of tokensToMerge) { const inputCount = token.inputCount; @@ -200,7 +203,7 @@ export class ContextToken { } // Are we re-merging on a previously split transform? - if(lastSourceInput?.trueTransform != token.inputRange[0].trueTransform) { + if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) { if(lastSourceInput) { resultToken.addInput(lastSourceInput, lastInputDistrib); } // else: there's nothing to add as input @@ -229,9 +232,9 @@ export class ContextToken { // Ignore the last entry for now - it may need to merge with a matching // entry in the next token! for(let i = startIndex; i < inputCount - 1; i++) { - resultToken.addInput(token.inputRange[i], token.searchModule.inputSequence[i]); + resultToken.addInput(token.inputSegments[i], token.searchModule.inputSequence[i]); } - lastSourceInput = token.inputRange[inputCount-1]; + lastSourceInput = token.inputSegments[inputCount-1]; lastInputDistrib = token.searchModule.inputSequence[inputCount-1]; } @@ -254,7 +257,7 @@ export class ContextToken { // Build an alternate version of the transforms: if we preprocess all deleteLefts, // what text remains from each? - const alteredSources = preprocessInputSources(this.inputRange); + const alteredSources = preprocessInputSources(this.inputSegments); const blankContext = { left: '', startOfBuffer: true, endOfBuffer: true }; const splitSpecs = split.matches.slice(); @@ -310,15 +313,17 @@ export class ContextToken { }; }); - const priorSourceInput = overextendedToken.inputRange[lastInputIndex]; + const priorSourceInput = overextendedToken.inputSegments[lastInputIndex]; constructingToken.addInput(priorSourceInput, headDistribution); tokensFromSplit.push(constructingToken); constructingToken = new ContextToken(lexicalModel); backupToken = new ContextToken(constructingToken); constructingToken.addInput({ - trueTransform: priorSourceInput.trueTransform, - inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded, + segment: { + ...priorSourceInput.segment, + start: priorSourceInput.segment.start + extraCharsAdded + }, bestProbFromSet: priorSourceInput.bestProbFromSet }, tailDistribution); @@ -335,8 +340,8 @@ export class ContextToken { backupToken = new ContextToken(constructingToken); lenBeforeLastApply = KMWString.length(currentText.left); - currentText = applyTransform(alteredSources[transformIndex].trueTransform, currentText); - constructingToken.addInput(this.inputRange[transformIndex], this.searchModule.inputSequence[transformIndex]); + currentText = applyTransform(alteredSources[transformIndex].segment.trueTransform, currentText); + constructingToken.addInput(this.inputSegments[transformIndex], this.searchModule.inputSequence[transformIndex]); transformIndex++; } @@ -344,25 +349,25 @@ export class ContextToken { } } -export function preprocessInputSources(inputSources: ReadonlyArray) { +export function preprocessInputSources(inputSources: ReadonlyArray) { const alteredSources = deepCopy(inputSources); let trickledDeleteLeft = 0; for(let i = alteredSources.length - 1; i >= 0; i--) { const source = alteredSources[i]; if(trickledDeleteLeft) { - const insLen = KMWString.length(source.trueTransform.insert); + const insLen = KMWString.length(source.segment.trueTransform.insert); if(insLen <= trickledDeleteLeft) { - source.trueTransform.insert = ''; + source.segment.trueTransform.insert = ''; trickledDeleteLeft -= insLen; } else { - source.trueTransform.insert = KMWString.substring(source.trueTransform.insert, 0, insLen - trickledDeleteLeft); + source.segment.trueTransform.insert = KMWString.substring(source.segment.trueTransform.insert, 0, insLen - trickledDeleteLeft); trickledDeleteLeft = 0; } } - trickledDeleteLeft += source.trueTransform.deleteLeft; - source.trueTransform.deleteLeft = 0; + trickledDeleteLeft += source.segment.trueTransform.deleteLeft; + source.segment.trueTransform.deleteLeft = 0; } - alteredSources[0].trueTransform.deleteLeft = trickledDeleteLeft; + alteredSources[0].segment.trueTransform.deleteLeft = trickledDeleteLeft; return alteredSources; } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts index 72f0f53d246..20c95b1ecc2 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts @@ -585,7 +585,14 @@ export class ContextTokenization { if(affectedToken.inputCount == 0 && distribution[0].sample.deleteLeft != 0) { distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p })); } - affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength, bestProbFromSet}, distribution); + affectedToken.addInput({ + segment: { + trueTransform: sourceInput, + transitionId: sourceInput.id, + start: appliedLength + }, + bestProbFromSet: bestProbFromSet + }, distribution); appliedLength += KMWString.length(distribution[0].sample.insert); const tokenize = determineModelTokenizer(lexicalModel); diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts index 4e67adabaa9..973e4fb0a8e 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts @@ -11,7 +11,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { SearchNode } from './distance-modeler.js'; -import { PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js'; +import { PathResult, SearchQuotientNode, PathInputProperties } from './search-quotient-node.js'; import { SearchQuotientSpur } from './search-quotient-spur.js'; import Distribution = LexicalModelTypes.Distribution; @@ -28,7 +28,7 @@ export class LegacyQuotientSpur extends SearchQuotientSpur { * @param inputs * @param bestProbFromSet */ - constructor(space: SearchQuotientNode, inputs: Distribution, inputSource: TokenInputSource | ProbabilityMass) { + constructor(space: SearchQuotientNode, inputs: Distribution, inputSource: PathInputProperties | ProbabilityMass) { super(space, inputs, inputSource); this.queueNodes(this.buildEdgesForNodes(space.previousResults.map(r => r.node))); return; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index 7a490acb7ca..12fdf9a1501 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -39,26 +39,38 @@ type CompleteSearchPath = { export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath; -/** - * Models the properties and portion of an input event applied by a SearchSpace for - * correction-search purposes. - */ -export interface TokenInputSource { +export interface InputSegment { /** * The Transform corresponding to the keystroke applied to the true context * for this input event. * - * NOTE: outside of use for .sourceText / .likeliestSourceText, the only part - * that should actually be referenced is the Transform / transition ID. + * @deprecated Slated for removal within epic/autocorrect. */ trueTransform: Transform; + /** + * The transform / transition ID of the corresponding input event. + */ + transitionId: number | undefined, + /** * Marks the initial index (inclusive) within the insert strings for the - * corresponding transitions' Transforms that is applied by the corresponding + * corresponding transitions' Transforms that are applied by the corresponding * tokenized correction-search input. */ - inputStartIndex: number; + start: number +} + +/** + * Models the properties and portion of an input event applied by a SearchSpace for + * correction-search purposes. + */ +export interface PathInputProperties { + /** + * Denotes the portion of the ongoing input stream represented by the corresponding + * input distribution(s) of a SearchSpace. + */ + segment: InputSegment; /** * Notes the highest probability found in the input event's transform @@ -154,8 +166,10 @@ export interface SearchQuotientNode { /** * Gets components useful for building a string-based representation of the * keystroke range corrected by this search space. + * + * TODO: will return only the `inputSegment` part of each entry in the future. */ - readonly sourceIdentifiers: TokenInputSource[]; + readonly inputSegments: PathInputProperties[]; } /** diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts index 7bef9bfae68..d1e1ce6d248 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts @@ -2,7 +2,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { SearchNode, SearchResult } from './distance-modeler.js'; -import { generateSpaceSeed, PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js'; +import { generateSpaceSeed, PathInputProperties, PathResult, SearchQuotientNode } from './search-quotient-node.js'; import LexicalModel = LexicalModelTypes.LexicalModel; @@ -90,7 +90,7 @@ export class SearchQuotientRoot implements SearchQuotientNode { } } - get sourceIdentifiers(): TokenInputSource[] { + get inputSegments(): PathInputProperties[] { return []; } } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index f0860f7f7a6..45a44084a0b 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -12,7 +12,7 @@ import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keyman import { LexicalModelTypes } from '@keymanapp/common-types'; import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js'; -import { generateSpaceSeed, PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js'; +import { generateSpaceSeed, PathResult, SearchQuotientNode, PathInputProperties } from './search-quotient-node.js'; import Distribution = LexicalModelTypes.Distribution; import ProbabilityMass = LexicalModelTypes.ProbabilityMass; @@ -27,7 +27,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator = function(arg1, arg2 export abstract class SearchQuotientSpur implements SearchQuotientNode { private selectionQueue: PriorityQueue = new PriorityQueue(QUEUE_NODE_COMPARATOR); readonly inputs?: Distribution; - readonly inputSource?: TokenInputSource; + readonly inputSource?: PathInputProperties; private parentNode: SearchQuotientNode; readonly spaceId: number; @@ -61,23 +61,26 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { constructor( parentNode: SearchQuotientNode, inputs: Distribution>, - inputSource: TokenInputSource | ProbabilityMass + inputSource: PathInputProperties | ProbabilityMass ) { this.spaceId = generateSpaceSeed(); // Coerce inputSource to TokenInputSource format. - if(inputSource && (inputSource as TokenInputSource).trueTransform == undefined) { + if(inputSource && (inputSource as ProbabilityMass).sample != undefined) { const keystroke = inputSource as ProbabilityMass; inputSource = { - trueTransform: keystroke.sample, - bestProbFromSet: keystroke.p, - inputStartIndex: 0 + segment: { + trueTransform: keystroke.sample, + transitionId: keystroke.sample.id, + start: 0 + }, + bestProbFromSet: keystroke.p } }; - const inputSrc = inputSource as TokenInputSource; + const inputSrc = inputSource as PathInputProperties; const transitionId = (inputs?.[0].sample.id); - if(transitionId !== undefined && inputSrc?.trueTransform.id != transitionId) { + if(transitionId !== undefined && inputSrc?.segment.transitionId != transitionId) { throw new Error("Input distribution and input-source transition IDs must match"); } @@ -249,15 +252,15 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { return Object.values(this.returnedValues ?? {}).map(v => new SearchResult(v)); } - public get sourceIdentifiers(): TokenInputSource[] { + public get inputSegments(): PathInputProperties[] { if(!this.parentNode) { return []; } - const parentSources = this.parentNode.sourceIdentifiers; + const parentSources = this.parentNode.inputSegments; if(this.inputSource) { - const inputId = this.inputSource.trueTransform.id; - if(inputId !== undefined && parentSources.length > 0 && parentSources[parentSources.length - 1].trueTransform.id == inputId) { + const inputId = this.inputSource.segment.transitionId; + if(inputId !== undefined && parentSources.length > 0 && parentSources[parentSources.length - 1].segment.transitionId == inputId) { return parentSources; } diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index d8ba6d2476d..49e17f81c97 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -107,9 +107,9 @@ describe('ContextToken', function() { const merged = ContextToken.merge([token1, token2, token3], plainModel); assert.equal(merged.exampleInput, "can't"); - token1.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); - token2.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); - token3.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); + token1.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1)); + token2.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1)); + token3.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1)); assert.isTrue(quotientPathHasInputs( merged.searchModule, [ @@ -129,26 +129,41 @@ describe('ContextToken', function() { const token3 = new ContextToken(plainModel); token1.addInput({ - trueTransform: srcTransform, - inputStartIndex: 0, + segment: { + trueTransform: srcTransform, + transitionId: srcTransform.id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: {insert: 'can', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); token2.addInput({ - trueTransform: srcTransform, - inputStartIndex: 3, + segment: { + trueTransform: srcTransform, + transitionId: srcTransform.id, + start: 3 + }, bestProbFromSet: 1 }, [{sample: {insert: "'", deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); token3.addInput({ - trueTransform: srcTransform, - inputStartIndex: 4, + segment: { + trueTransform: srcTransform, + transitionId: srcTransform.id, + start: 4 + }, bestProbFromSet: 1 }, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); const merged = ContextToken.merge([token1, token2, token3], plainModel); assert.equal(merged.exampleInput, "can't"); - assert.deepEqual(merged.inputRange, [ { trueTransform: srcTransform, inputStartIndex: 0, bestProbFromSet: 1 } ]); + assert.deepEqual(merged.inputSegments, [ { + segment: { + trueTransform: srcTransform, + transitionId: srcTransform.id, + start: 0 + }, bestProbFromSet: 1 + } ]); assert.equal(merged.searchModule.inputCount, 1); assert.deepEqual((merged.searchModule as SearchQuotientSpur).lastInput, [{sample: srcTransform, p: 1}]); }); @@ -156,11 +171,12 @@ describe('ContextToken', function() { it("merges four tokens with previously-split transforms", () => { // TODO: need another case - pref where there are two diff boundary transforms // and where each token has multiple constituent transforms. - const srcTransform1 = { insert: "apple", deleteLeft: 0, deleteRight: 0, id: 1 }; - const srcTransform2 = { insert: "sands", deleteLeft: 0, deleteRight: 0, id: 2 }; - const srcTransform3 = { insert: "our", deleteLeft: 0, deleteRight: 0, id: 3 }; - const srcTransform4 = { insert: "grapes", deleteLeft: 0, deleteRight: 0, id: 4 }; - const srcTransforms = [srcTransform1, srcTransform2, srcTransform3, srcTransform4]; + const srcTransforms = [ + { insert: "apple", deleteLeft: 0, deleteRight: 0, id: 1 }, + { insert: "sands", deleteLeft: 0, deleteRight: 0, id: 2 }, + { insert: "our", deleteLeft: 0, deleteRight: 0, id: 3 }, + { insert: "grapes", deleteLeft: 0, deleteRight: 0, id: 4 } + ]; // apples const token1 = new ContextToken(plainModel); @@ -173,42 +189,66 @@ describe('ContextToken', function() { const tokensToMerge = [token1, token2, token3, token4] token1.addInput({ - trueTransform: srcTransform1, - inputStartIndex: 0, + segment: { + trueTransform: srcTransforms[0], + transitionId: srcTransforms[0].id, + start: 0 + }, bestProbFromSet: 1 - }, [{sample: srcTransform1, p: 1}]); + }, [{sample: srcTransforms[0], p: 1}]); token1.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 0, + segment: { + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token2.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 1, + segment: { + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, + start: 1 + }, bestProbFromSet: 1 }, [{sample: {insert: "and", deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 4, + segment: { + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, + start: 4 + }, bestProbFromSet: 1 }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ - trueTransform: srcTransform3, - inputStartIndex: 0, + segment: { + trueTransform: srcTransforms[2], + transitionId: srcTransforms[2].id, + start: 0 + }, bestProbFromSet: 1 - }, [{sample: srcTransform3, p: 1}]); + }, [{sample: srcTransforms[2], p: 1}]); token4.addInput({ - trueTransform: srcTransform4, - inputStartIndex: 0, + segment: { + trueTransform: srcTransforms[3], + transitionId: srcTransforms[3].id, + start: 0 + }, bestProbFromSet: 1 - }, [{sample: srcTransform4, p: 1}]); + }, [{sample: srcTransforms[3], p: 1}]); const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, "applesandsourgrapes"); - assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); + assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ + segment: { + trueTransform: t, + transitionId: t.id, + start: 0 + }, bestProbFromSet: 1 + }))); assert.isTrue(quotientPathHasInputs( merged.searchModule, srcTransforms.map((t) => ([{sample: t, p: 1}])) @@ -218,11 +258,12 @@ describe('ContextToken', function() { it("merges four tokens with previously-split transforms - non-BMP text", () => { // TODO: need another case - pref where there are two diff boundary transforms // and where each token has multiple constituent transforms. - const srcTransform1 = { insert: toMathematicalSMP("apple"), deleteLeft: 0, deleteRight: 0, id: 1 }; - const srcTransform2 = { insert: toMathematicalSMP("sands"), deleteLeft: 0, deleteRight: 0, id: 2 }; - const srcTransform3 = { insert: toMathematicalSMP("our"), deleteLeft: 0, deleteRight: 0, id: 3 }; - const srcTransform4 = { insert: toMathematicalSMP("grapes"), deleteLeft: 0, deleteRight: 0, id: 4 }; - const srcTransforms = [srcTransform1, srcTransform2, srcTransform3, srcTransform4]; + const srcTransforms = [ + { insert: toMathematicalSMP("apple"), deleteLeft: 0, deleteRight: 0, id: 1 }, + { insert: toMathematicalSMP("sands"), deleteLeft: 0, deleteRight: 0, id: 2 }, + { insert: toMathematicalSMP("our"), deleteLeft: 0, deleteRight: 0, id: 3 }, + { insert: toMathematicalSMP("grapes"), deleteLeft: 0, deleteRight: 0, id: 4 } + ]; // apples const token1 = new ContextToken(plainModel); @@ -235,42 +276,66 @@ describe('ContextToken', function() { const tokensToMerge = [token1, token2, token3, token4] token1.addInput({ - trueTransform: srcTransform1, - inputStartIndex: 0, + segment: { + trueTransform: srcTransforms[0], + transitionId: srcTransforms[0].id, + start: 0 + }, bestProbFromSet: 1 - }, [{sample: srcTransform1, p: 1}]); + }, [{sample: srcTransforms[0], p: 1}]); token1.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 0, + segment: { + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, + start: 0 + }, bestProbFromSet: 1 }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token2.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 1, + segment: { + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, + start: 1 + }, bestProbFromSet: 1 }, [{sample: {insert: toMathematicalSMP("and"), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ - trueTransform: srcTransform2, - inputStartIndex: 4, + segment: { + trueTransform: srcTransforms[1], + transitionId: srcTransforms[1].id, + start: 4 + }, bestProbFromSet: 1 }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); token3.addInput({ - trueTransform: srcTransform3, - inputStartIndex: 0, + segment: { + trueTransform: srcTransforms[2], + transitionId: srcTransforms[2].id, + start: 0 + }, bestProbFromSet: 1 - }, [{sample: srcTransform3, p: 1}]); + }, [{sample: srcTransforms[2], p: 1}]); token4.addInput({ - trueTransform: srcTransform4, - inputStartIndex: 0, + segment: { + trueTransform: srcTransforms[3], + transitionId: srcTransforms[3].id, + start: 0 + }, bestProbFromSet: 1 - }, [{sample: srcTransform4, p: 1}]); + }, [{sample: srcTransforms[3], p: 1}]); const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); - assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); + assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ + segment: { + trueTransform: t, + transitionId: t.id, + start: 0 + }, bestProbFromSet: 1 + }))); assert.isTrue(quotientPathHasInputs( merged.searchModule, srcTransforms.map((t) => ([{sample: t, p: 1}])) @@ -302,7 +367,13 @@ describe('ContextToken', function() { const tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: .75}, keystrokeDistributions[i]); + tokenToSplit.addInput({ + segment: { + trueTransform: keystrokeDistributions[i][0].sample, + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, bestProbFromSet: .75 + }, keystrokeDistributions[i]); }; assert.equal(tokenToSplit.sourceRangeKey, 'T11+T12+T13+T14'); @@ -338,7 +409,14 @@ describe('ContextToken', function() { const tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]); + tokenToSplit.addInput({ + segment: { + trueTransform: keystrokeDistributions[i][0].sample, + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, keystrokeDistributions[i]); }; assert.equal(tokenToSplit.sourceRangeKey, `T${keystrokeDistributions[0][0].sample.id}`); @@ -360,14 +438,17 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 3); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray); - assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.inputRange[0]), [0, 3, 8].map(i => ({ - trueTransform: { - insert: 'biglargetransform', - deleteLeft: 0, - deleteRight: 0, - id: keystrokeDistributions[0][0].sample.id + assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.inputSegments[0]), [0, 3, 8].map(i => ({ + segment: { + trueTransform: { + insert: 'biglargetransform', + id: keystrokeDistributions[0][0].sample.id, + deleteLeft: 0, + deleteRight: 0 + }, + transitionId: keystrokeDistributions[0][0].sample.id, + start: i }, - inputStartIndex: i, bestProbFromSet: 1 }))); @@ -394,7 +475,14 @@ describe('ContextToken', function() { const tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]); + tokenToSplit.addInput({ + segment: { + trueTransform: keystrokeDistributions[i][0].sample, + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, keystrokeDistributions[i]); }; assert.equal(tokenToSplit.exampleInput, 'largelongtransforms'); @@ -416,16 +504,49 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 3); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray); - assert.deepEqual(resultsOfSplit[0].inputRange, [ - { trueTransform: keystrokeDistributions[0][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, - { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, + assert.deepEqual(resultsOfSplit[0].inputSegments, [ + { + segment: { + trueTransform: keystrokeDistributions[0][0].sample, + transitionId: keystrokeDistributions[0][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, { + segment: { + trueTransform: keystrokeDistributions[1][0].sample, + transitionId: keystrokeDistributions[1][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, ]); - assert.deepEqual(resultsOfSplit[1].inputRange, [ - { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 'arge'.length, bestProbFromSet: 1 }, - { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, + assert.deepEqual(resultsOfSplit[1].inputSegments, [ + { + segment: { + trueTransform: keystrokeDistributions[1][0].sample, + transitionId: keystrokeDistributions[1][0].sample.id, + start: 'arge'.length + }, + bestProbFromSet: 1 + }, { + segment: { + trueTransform: keystrokeDistributions[2][0].sample, + transitionId: keystrokeDistributions[2][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, ]); - assert.deepEqual(resultsOfSplit[2].inputRange, [ - { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } + assert.deepEqual(resultsOfSplit[2].inputSegments, [ + { + segment: { + trueTransform: keystrokeDistributions[2][0].sample, + transitionId: keystrokeDistributions[2][0].sample.id, + start: 'ng'.length, + }, + bestProbFromSet: 1 + } ]); assert.isTrue(quotientPathHasInputs( @@ -494,7 +615,14 @@ describe('ContextToken', function() { const tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]); + tokenToSplit.addInput({ + segment: { + trueTransform: keystrokeDistributions[i][0].sample, + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, keystrokeDistributions[i]); }; assert.equal(tokenToSplit.exampleInput, toMathematicalSMP('largelongtransforms')); @@ -516,17 +644,46 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 3); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray); - assert.deepEqual(resultsOfSplit[0].inputRange, [ - { trueTransform: keystrokeDistributions[0][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, - { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, - ]); - assert.deepEqual(resultsOfSplit[1].inputRange, [ - { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 'arge'.length, bestProbFromSet: 1 }, - { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 0, bestProbFromSet: 1 }, + assert.deepEqual(resultsOfSplit[0].inputSegments, [{ + segment: { + trueTransform: keystrokeDistributions[0][0].sample, + transitionId: keystrokeDistributions[0][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, { + segment: { + trueTransform: keystrokeDistributions[1][0].sample, + transitionId: keystrokeDistributions[1][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + }, ]); - assert.deepEqual(resultsOfSplit[2].inputRange, [ - { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } + assert.deepEqual(resultsOfSplit[1].inputSegments, [{ + segment: { + trueTransform: keystrokeDistributions[1][0].sample, + transitionId: keystrokeDistributions[1][0].sample.id, + start: 'arge'.length + }, + bestProbFromSet: 1 + }, { + segment: { + trueTransform: keystrokeDistributions[2][0].sample, + transitionId: keystrokeDistributions[2][0].sample.id, + start: 0 + }, + bestProbFromSet: 1 + } ]); + assert.deepEqual(resultsOfSplit[2].inputSegments, [{ + segment: { + trueTransform: keystrokeDistributions[2][0].sample, + transitionId: keystrokeDistributions[2][0].sample.id, + start: 'ng'.length + }, + bestProbFromSet: 1 + }]); assert.isTrue(quotientPathHasInputs( resultsOfSplit[0].searchModule, [ @@ -584,19 +741,22 @@ describe('ContextToken', function() { describe('preprocessInputSources', () => { it('properly preprocesses deleteLefts in the transforms', () => { const transforms: Transform[] = [ - { insert: 'long', deleteLeft: 0, deleteRight: 0 }, - { insert: 'argelovely', deleteLeft: 3, deleteRight: 0 }, - { insert: 'ngtransforms', deleteLeft: 4, deleteRight: 0 } + { insert: 'long', deleteLeft: 0, deleteRight: 0, id: 11 }, + { insert: 'argelovely', deleteLeft: 3, deleteRight: 0, id: 12 }, + { insert: 'ngtransforms', deleteLeft: 4, deleteRight: 0, id: 13 } ]; - const results = preprocessInputSources(transforms.map((t) => ({ - trueTransform: t, - inputStartIndex: 0, + const results = preprocessInputSources(transforms.map((t, i) => ({ + segment: { + trueTransform: t, + transitionId: t.id, + start: 0 + }, bestProbFromSet: 1 }))); assert.equal(results.length, transforms.length); - assert.sameOrderedMembers(results.map((entry) => entry.trueTransform.insert), ['l', 'argelo', 'ngtransforms']); - assert.sameOrderedMembers(results.map((entry) => entry.trueTransform.deleteLeft), [0, 0, 0]); + assert.sameOrderedMembers(results.map((entry) => entry.segment.trueTransform.insert), ['l', 'argelo', 'ngtransforms']); + assert.sameOrderedMembers(results.map((entry) => entry.segment.trueTransform.deleteLeft), [0, 0, 0]); }); }); \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts index b65e046ba57..2a5dcd59614 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts @@ -49,7 +49,13 @@ function toTransformToken(text: string, transformId?: number) { let isWhitespace = text == ' '; let token = new ContextToken(plainModel); const textAsTransform = { insert: text, deleteLeft: 0, id: idSeed }; - token.addInput({trueTransform: textAsTransform, inputStartIndex: 0, bestProbFromSet: 1}, [ { sample: textAsTransform, p: 1 } ]); + token.addInput({ + segment: { + trueTransform: textAsTransform, + transitionId: textAsTransform.id, + start: 0 + }, bestProbFromSet: 1 + }, [ { sample: textAsTransform, p: 1 } ]); token.isWhitespace = isWhitespace; return token; } @@ -200,7 +206,7 @@ describe('ContextTokenization', function() { }, removedTokenCount: 0 }, - inputs: [{ sample: inputTransformMap, p: 1 }], + inputs: [{ sample: inputTransformMap, p: 1 }] }, plainModel, inputTransform, @@ -290,7 +296,7 @@ describe('ContextTokenization', function() { }, removedTokenCount: 0 }, - inputs: [{ sample: inputTransformMap, p: 1 }], + inputs: [{ sample: inputTransformMap, p: 1 }] }, plainModel, inputTransform, @@ -457,13 +463,32 @@ describe('ContextTokenization', function() { ); const boundaryToken = tokenization.tokens[tokenization.tokens.length-3]; - const boundaryTailInput = boundaryToken.inputRange[boundaryToken.inputRange.length - 1]; - assert.deepEqual(boundaryTailInput, {trueTransform: inputTransform, inputStartIndex: 0, bestProbFromSet: 1}); + const boundaryTailInput = boundaryToken.inputSegments[boundaryToken.inputSegments.length - 1]; + assert.deepEqual(boundaryTailInput, { + segment: { + trueTransform: inputTransform, + transitionId: inputTransform.id, + start: 0 + }, bestProbFromSet: 1 + }); // The new tail tokens should not include anything from the original tail; // the token should be replaced. - assert.deepEqual(tokenization.tokens[tokenization.tokens.length-2].inputRange, [{trueTransform: inputTransform, inputStartIndex: 0, bestProbFromSet: 1}]); - assert.deepEqual(tokenization.tokens[tokenization.tokens.length-1].inputRange, [{trueTransform: inputTransform, inputStartIndex: 1, bestProbFromSet: 1}]); + assert.deepEqual(tokenization.tokens[tokenization.tokens.length-2].inputSegments, [{ + segment: { + trueTransform: inputTransform, + transitionId: inputTransform.id, + start: 0 + }, bestProbFromSet: 1 + }]); + assert.deepEqual(tokenization.tokens[tokenization.tokens.length-1].inputSegments, [{ + segment: { + trueTransform: inputTransform, + transitionId: inputTransform.id, + start: 1 + }, + bestProbFromSet: 1 + }]); const tailIndex = tokenization.tokens.length - 1; for(let i of inputTransformMap.keys()) { diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts index fbd013ddeb3..880ad7782f3 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts @@ -16,7 +16,15 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { deepCopy } from '@keymanapp/web-utils'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; -import { buildEdgeWindow, ContextToken, ContextTokenization, models, precomputationSubsetKeyer, TokenizationTransitionEdits, TokenizationSubsetBuilder } from '@keymanapp/lm-worker/test-index'; +import { + buildEdgeWindow, + ContextToken, + ContextTokenization, + models, + precomputationSubsetKeyer, + TokenizationTransitionEdits, + TokenizationSubsetBuilder +} from '@keymanapp/lm-worker/test-index'; import Distribution = LexicalModelTypes.Distribution; import Transform = LexicalModelTypes.Transform; @@ -172,10 +180,18 @@ describe('precomputationSubsetKeyer', function() { [...tokenization.tokens, (() => { const token = new ContextToken(plainModel, 'da'); // source text: 'date' - token.addInput( - {trueTransform: {insert: 'te', deleteLeft: 0}, inputStartIndex: 0, bestProbFromSet: 1}, - [{sample: {insert: 'te', deleteLeft: 0}, p: 1}] - ); + token.addInput({ + segment: { + trueTransform: { + insert: 'te', + deleteLeft: 0, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + {sample: {insert: 'te', deleteLeft: 0, id: 13}, p: 1} + ]); return token; })()], { insert: 's', deleteLeft: 0, deleteRight: 0 }, @@ -187,7 +203,7 @@ describe('precomputationSubsetKeyer', function() { }, tokenizedTransform: (() => { const map = new Map(); - map.set(0, { insert: 's', deleteLeft: 0 }); + map.set(0, { insert: 's', deleteLeft: 0, id: 14 }); return map; })() }; @@ -199,13 +215,21 @@ describe('precomputationSubsetKeyer', function() { [...tokenization.tokens, (() => { const token = new ContextToken(plainModel, 'da'); // source text: 'date' - token.addInput( - {trueTransform: {insert: 'te', deleteLeft: 0}, inputStartIndex: 0, bestProbFromSet: 1}, - [{sample: {insert: 't', deleteLeft: 0}, p: 1}] - ); + token.addInput({ + segment: { + trueTransform: { + insert: 'te', + deleteLeft: 0, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + {sample: {insert: 't', deleteLeft: 0}, p: 1} + ]); return token; })()], - { insert: 'es', deleteLeft: 0, deleteRight: 0 }, + { insert: 'es', deleteLeft: 0, deleteRight: 0, id: 14 }, false ), retokenization: [...rawTextTokens] @@ -240,13 +264,20 @@ describe('precomputationSubsetKeyer', function() { const token = new ContextToken(plainModel, 'da'); token.isPartial = true; // source text: 'dat' - token.addInput( - {trueTransform: {insert: 't', deleteLeft: 0}, inputStartIndex: 0, bestProbFromSet: 1}, - [{sample: {insert: 'ts', deleteLeft: 0}, p: 1}] - ); + token.addInput({ + segment: { + trueTransform: { + insert: 't', + deleteLeft: 0, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [{sample: {insert: 'ts', deleteLeft: 0, id: 13}, p: 1} + ]); return token; })()], - { insert: 'e', deleteLeft: 1, deleteRight: 0 }, + { insert: 'e', deleteLeft: 1, deleteRight: 0, id: 14 }, false ), retokenization: [...rawTextTokens] @@ -255,7 +286,7 @@ describe('precomputationSubsetKeyer', function() { }, tokenizedTransform: (() => { const map = new Map(); - map.set(0, { insert: 'e', deleteLeft: 1 }); + map.set(0, { insert: 'e', deleteLeft: 1, id: 14 }); return map; })() }; @@ -268,13 +299,21 @@ describe('precomputationSubsetKeyer', function() { const token = new ContextToken(plainModel, 'da'); token.isPartial = true; // source text: 'dat' - token.addInput( - {trueTransform: {insert: 't', deleteLeft: 0}, inputStartIndex: 0, bestProbFromSet: 1}, - [{sample: {insert: 't', deleteLeft: 0}, p: 1}] - ); + token.addInput({ + segment: { + trueTransform: { + insert: 't', + deleteLeft: 0, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + {sample: {insert: 't', deleteLeft: 0, id: 13}, p: 1} + ]); return token; })()], - { insert: 'e', deleteLeft: 0, deleteRight: 0 }, + { insert: 'e', deleteLeft: 0, deleteRight: 0, id: 14 }, false ), retokenization: [...rawTextTokens] @@ -717,19 +756,35 @@ describe('TokenizationSubsetBuilder', function() { const baseRawTextTokens = ['drink', ' ', 'coffee', ' ', 'at', ' ', 'a', ' ', 'cafe']; const baseTokenization = new ContextTokenization(baseRawTextTokens.map((text => toToken(text)))); - const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1 }; + const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; const fourCharTailToken = new ContextToken(baseTokenization.tail); - fourCharTailToken.addInput( - {trueTransform: { insert: 'é', deleteLeft: 1 }, inputStartIndex: 0, bestProbFromSet: 1}, - [{ sample: trueSourceTransform, p: .6 }] - ); + fourCharTailToken.addInput({ + segment: { + trueTransform: { + insert: 'é', + deleteLeft: 1, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + { sample: trueSourceTransform, p: .6 } + ]); const fiveCharTailToken = new ContextToken(baseTokenization.tail); - fiveCharTailToken.addInput( - {trueTransform: { insert: 'é', deleteLeft: 1 }, inputStartIndex: 0, bestProbFromSet: 1}, - [{ sample: { insert: 's', deleteLeft: 0 }, p: .4 }] - ); + fiveCharTailToken.addInput({ + segment: { + trueTransform: { + insert: 'é', + deleteLeft: 1, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: 1 + }, [ + { sample: { insert: 's', deleteLeft: 0, id: 13 }, p: .4 } + ]); const subsetBuilder = new TokenizationSubsetBuilder(); const fourCharTokenization = new ContextTokenization([...baseTokenization.tokens.slice(0, -1), fourCharTailToken]); @@ -756,19 +811,34 @@ describe('TokenizationSubsetBuilder', function() { // target accented word: séance const baseTokenization = new ContextTokenization(baseRawTextTokens.map((text => toToken(text)))); - const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1 }; + const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; const twoCharTailToken = new ContextToken(baseTokenization.tail); - twoCharTailToken.addInput( - {trueTransform: { insert: 'é', deleteLeft: 1 }, inputStartIndex: 0, bestProbFromSet: .6}, - [{ sample: trueSourceTransform, p: .6 }] - ); + twoCharTailToken.addInput({ + segment: { + trueTransform: { + insert: 'é', + deleteLeft: 1, + id: 13 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: .6 + }, [ + { sample: trueSourceTransform, p: .6 } + ]); const threeCharTailToken = new ContextToken(baseTokenization.tail); - threeCharTailToken.addInput( - {trueTransform: { insert: 'é', deleteLeft: 1 }, inputStartIndex: 0, bestProbFromSet: .6}, - [{ sample: { insert: 'a', deleteLeft: 0 }, p: .4 }] - ); + threeCharTailToken.addInput({ + segment: { + trueTransform: { + insert: 'é', + deleteLeft: 1 + }, transitionId: 13, + start: 0 + }, bestProbFromSet: .6 + }, [ + { sample: { insert: 'a', deleteLeft: 0, id: 13}, p: .4 } + ]); const subsetBuilder = new TokenizationSubsetBuilder(); const twoCharTokenization = new ContextTokenization([...baseTokenization.tokens.slice(0, -1), twoCharTailToken]); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts index 189790b7d30..5554e05e1ed 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts @@ -76,10 +76,13 @@ describe('SearchQuotientSpur', () => { assert.deepEqual(extendedPath.bestExample, {text: 't', p: 0.5}); assert.deepEqual(extendedPath.parents, [rootPath]); assert.deepEqual(extendedPath.inputs, leadEdgeDistribution); - assert.deepEqual(extendedPath.sourceIdentifiers, [ + assert.deepEqual(extendedPath.inputSegments, [ { - trueTransform: leadEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: leadEdgeDistribution[0].sample, + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: leadEdgeDistribution[0].p } ]); @@ -128,14 +131,20 @@ describe('SearchQuotientSpur', () => { assert.deepEqual(length2Path.bestExample, {text: 'tr', p: leadEdgeDistribution[0].p * tailEdgeDistribution[0].p}); assert.deepEqual(length2Path.parents, [length1Path]); assert.deepEqual(length2Path.inputs, tailEdgeDistribution); - assert.deepEqual(length2Path.sourceIdentifiers, [ + assert.deepEqual(length2Path.inputSegments, [ { - trueTransform: leadEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: leadEdgeDistribution[0].sample, + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: leadEdgeDistribution[0].p }, { - trueTransform: tailEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: tailEdgeDistribution[0].sample, + transitionId: tailEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: tailEdgeDistribution[0].p } ]); @@ -200,14 +209,20 @@ describe('SearchQuotientSpur', () => { assert.deepEqual(length2Path.bestExample, {text: 'tri', p: leadEdgeDistribution[0].p * tailEdgeDistribution[0].p}); assert.deepEqual(length2Path.parents, [length1Path]); assert.deepEqual(length2Path.inputs, tailEdgeDistribution); - assert.deepEqual(length2Path.sourceIdentifiers, [ + assert.deepEqual(length2Path.inputSegments, [ { - trueTransform: leadEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: leadEdgeDistribution[0].sample, + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: leadEdgeDistribution[0].p }, { - trueTransform: tailEdgeDistribution[0].sample, - inputStartIndex: 0, + segment: { + trueTransform: tailEdgeDistribution[0].sample, + transitionId: tailEdgeDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: tailEdgeDistribution[0].p } ]);