From 340406cdfccdf3f8968ef751988cd8af426e7ff1 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Fri, 9 Jan 2026 10:21:20 -0600 Subject: [PATCH 1/4] change(web): rename .searchSpace property to .searchModule Build-bot: skip build:web Test-bot: skip --- .../src/main/correction/context-token.ts | 24 ++++---- .../worker-thread/src/main/predict-helpers.ts | 2 +- .../context/context-state.tests.ts | 24 ++++---- .../context/context-token.tests.ts | 58 +++++++++---------- .../context/context-tokenization.tests.ts | 40 ++++++------- 5 files changed, 74 insertions(+), 74 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index 03769a5f3fd..aea2a7e4786 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -58,10 +58,10 @@ export class ContextToken { * Contains all relevant correction-search data for use in generating * corrections for this ContextToken instance. */ - public get searchSpace(): SearchQuotientSpur { - return this._searchSpace; + public get searchModule(): SearchQuotientSpur { + return this._searchModule; } - private _searchSpace: SearchQuotientSpur; + private _searchModule: SearchQuotientSpur; isPartial: boolean; @@ -107,7 +107,7 @@ export class ContextToken { // // In case we are unable to perfectly track context (say, due to multitaps) // we need to ensure that only fully-utilized keystrokes are considered. - this._searchSpace = priorToken.searchSpace; + this._searchModule = priorToken.searchModule; this._inputRange = priorToken._inputRange.slice(); // Preserve any annotated applied-suggestion transition ID data; it's useful @@ -138,7 +138,7 @@ export class ContextToken { searchSpace = searchSpace.addInput([{sample: transform, p: BASE_PROBABILITY}], 1); }); - this._searchSpace = searchSpace; + this._searchModule = searchSpace; } } @@ -148,7 +148,7 @@ export class ContextToken { */ addInput(inputSource: TokenInputSource, distribution: Distribution) { this._inputRange.push(inputSource); - this._searchSpace = this._searchSpace.addInput(distribution, inputSource.bestProbFromSet); + this._searchModule = this._searchModule.addInput(distribution, inputSource.bestProbFromSet); } /** @@ -210,7 +210,7 @@ export class ContextToken { * If not possible, find the best of the deepest search paths and append the * most likely keystroke data afterward. */ - const transforms = this.searchSpace.inputSequence.map((dist) => dist[0].sample) + const transforms = this.searchModule.inputSequence.map((dist) => dist[0].sample) const composite = transforms.reduce((accum, current) => buildMergedTransform(accum, current), {insert: '', deleteLeft: 0}); return composite.insert; } @@ -248,7 +248,7 @@ export class ContextToken { lastInputDistrib = lastInputDistrib?.map((entry, index) => { return { - sample: buildMergedTransform(entry.sample, token.searchSpace.inputSequence[0][index].sample), + sample: buildMergedTransform(entry.sample, token.searchModule.inputSequence[0][index].sample), p: entry.p } }); @@ -267,10 +267,10 @@ export class ContextToken { // Ignore the last entry for now - it may need to merge with a matching // entry in the next token! for(let i = startIndex; i < inputCount - 1; i++) { - resultToken.addInput(token.inputRange[i], token.searchSpace.inputSequence[i]); + resultToken.addInput(token.inputRange[i], token.searchModule.inputSequence[i]); } lastSourceInput = token.inputRange[inputCount-1]; - lastInputDistrib = token.searchSpace.inputSequence[inputCount-1]; + lastInputDistrib = token.searchModule.inputSequence[inputCount-1]; } resultToken.addInput(lastSourceInput, lastInputDistrib); @@ -326,7 +326,7 @@ export class ContextToken { const totalLenBeforeLastApply = committedLen + lenBeforeLastApply; // We read the start position for the NEXT token to know the split position. const extraCharsAdded = splitSpecs[1].textOffset - totalLenBeforeLastApply; - const tokenSequence = overextendedToken.searchSpace.inputSequence; + const tokenSequence = overextendedToken.searchModule.inputSequence; const lastInputIndex = tokenSequence.length - 1; const inputDistribution = tokenSequence[lastInputIndex]; const headDistribution = inputDistribution.map((m) => { @@ -374,7 +374,7 @@ export class ContextToken { backupToken = new ContextToken(constructingToken); lenBeforeLastApply = KMWString.length(currentText.left); currentText = applyTransform(alteredSources[transformIndex].trueTransform, currentText); - constructingToken.addInput(this.inputRange[transformIndex], this.searchSpace.inputSequence[transformIndex]); + constructingToken.addInput(this.inputRange[transformIndex], this.searchModule.inputSequence[transformIndex]); transformIndex++; } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts index f475bedf361..fe7cebefc77 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts @@ -460,7 +460,7 @@ export async function correctAndEnumerate( // Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue. // The 'eventual' logic will be significantly more complex, though still manageable. - const searchSpace = transition.final.tokenization.tail.searchSpace; + const searchSpace = transition.final.tokenization.tail.searchModule; // If corrections are not enabled, bypass the correction search aspect // entirely. No need to 'search' - just do a direct lookup. diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts index d83915a3b5f..93b6d5b164d 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts @@ -252,9 +252,9 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch?.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence); + assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); assert.sameDeepMembers( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence, + state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence, [[{sample: { insert: '', deleteLeft: 0 }, p: 1}]] ); @@ -284,9 +284,9 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch?.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence); + assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); assert.deepEqual( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence, + state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence, [[{ sample: {insert: '', deleteLeft: 0}, p: 1 }]] ); @@ -337,8 +337,8 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence); - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence); + assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); + assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -370,9 +370,9 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence); + assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); assert.deepEqual( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence, + state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence, [[{sample: {insert: '', deleteLeft: 0}, p: 1}]] ); @@ -402,9 +402,9 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence); + assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); assert.deepEqual( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence, + state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence, [[{sample: {insert: '', deleteLeft: 0}, p: 1}]] ); @@ -434,8 +434,8 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence); - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence); + assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); + assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index b22128b12b1..aad94c6bb37 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -54,12 +54,12 @@ describe('ContextToken', function() { it("(model: LexicalModel)", async () => { let token = new ContextToken(plainModel); - assert.isEmpty(token.searchSpace.inputSequence); + assert.isEmpty(token.searchModule.inputSequence); assert.isEmpty(token.exampleInput); assert.isFalse(token.isWhitespace); // While searchSpace has no inputs, it _can_ match lexicon entries (via insertions). - let searchIterator = getBestMatches(token.searchSpace, new ExecutionTimer(Number.POSITIVE_INFINITY, Number.POSITIVE_INFINITY)); + let searchIterator = getBestMatches(token.searchModule, new ExecutionTimer(Number.POSITIVE_INFINITY, Number.POSITIVE_INFINITY)); let firstEntry = await searchIterator.next(); assert.isFalse(firstEntry.done); }); @@ -67,11 +67,11 @@ describe('ContextToken', function() { it("(model: LexicalModel, text: string)", () => { let token = new ContextToken(plainModel, "and"); - assert.isNotEmpty(token.searchSpace.inputSequence); + assert.isNotEmpty(token.searchModule.inputSequence); - assert.equal(token.searchSpace.inputSequence.map((entry) => entry[0].sample.insert).join(''), 'and'); - token.searchSpace.inputSequence.forEach((entry) => assert.equal(entry[0].sample.deleteLeft, 0)); - assert.deepEqual(token.searchSpace.inputSequence, [..."and"].map((char) => { + assert.equal(token.searchModule.inputSequence.map((entry) => entry[0].sample.insert).join(''), 'and'); + token.searchModule.inputSequence.forEach((entry) => assert.equal(entry[0].sample.deleteLeft, 0)); + assert.deepEqual(token.searchModule.inputSequence, [..."and"].map((char) => { return [{ sample: { insert: char, @@ -90,14 +90,14 @@ describe('ContextToken', function() { let baseToken = new ContextToken(plainModel, "and"); let clonedToken = new ContextToken(baseToken); - assert.equal(clonedToken.searchSpace, baseToken.searchSpace); - // Deep equality on .searchSpace can't be directly checked due to the internal complexities involved. + assert.equal(clonedToken.searchModule, baseToken.searchModule); + // Deep equality on .searchModule can't be directly checked due to the internal complexities involved. // We CAN check for the most important members, though. - assert.notEqual(clonedToken.searchSpace.inputSequence, baseToken.searchSpace.inputSequence); - assert.deepEqual(clonedToken.searchSpace.inputSequence, baseToken.searchSpace.inputSequence); + assert.notEqual(clonedToken.searchModule.inputSequence, baseToken.searchModule.inputSequence); + assert.deepEqual(clonedToken.searchModule.inputSequence, baseToken.searchModule.inputSequence); assert.notEqual(clonedToken, baseToken); - // Perfectly deep-equal when we ignore .searchSpace. + // Perfectly deep-equal when we ignore .searchModule. assert.deepEqual({...clonedToken, searchSpace: null}, {...baseToken, searchSpace: null}); }); }); @@ -114,9 +114,9 @@ describe('ContextToken', function() { token2.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); token3.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); - assert.sameOrderedMembers(merged.searchSpace.inputSequence.slice(0, 3), token1.searchSpace.inputSequence); - assert.sameOrderedMembers(merged.searchSpace.inputSequence.slice(3, 4), token2.searchSpace.inputSequence); - assert.sameOrderedMembers(merged.searchSpace.inputSequence.slice(4), token3.searchSpace.inputSequence); + assert.sameOrderedMembers(merged.searchModule.inputSequence.slice(0, 3), token1.searchModule.inputSequence); + assert.sameOrderedMembers(merged.searchModule.inputSequence.slice(3, 4), token2.searchModule.inputSequence); + assert.sameOrderedMembers(merged.searchModule.inputSequence.slice(4), token3.searchModule.inputSequence); }); it("merges three tokens from single previously-split transforms", () => { @@ -147,7 +147,7 @@ describe('ContextToken', function() { const merged = ContextToken.merge([token1, token2, token3], plainModel); assert.equal(merged.exampleInput, "can't"); assert.deepEqual(merged.inputRange, [ { trueTransform: srcTransform, inputStartIndex: 0, bestProbFromSet: 1 } ]); - assert.deepEqual(merged.searchSpace.inputSequence, [[{sample: srcTransform, p: 1}]]); + assert.deepEqual(merged.searchModule.inputSequence, [[{sample: srcTransform, p: 1}]]); }); it("merges four tokens with previously-split transforms", () => { @@ -206,7 +206,7 @@ describe('ContextToken', function() { const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, "applesandsourgrapes"); assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); - assert.deepEqual(merged.searchSpace.inputSequence, srcTransforms.map((t) => [{sample: t, p: 1}])); + assert.deepEqual(merged.searchModule.inputSequence, srcTransforms.map((t) => [{sample: t, p: 1}])); }); it("merges four tokens with previously-split transforms - non-BMP text", () => { @@ -265,7 +265,7 @@ describe('ContextToken', function() { const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); - assert.deepEqual(merged.searchSpace.inputSequence, srcTransforms.map((t) => [{sample: t, p: 1}])); + assert.deepEqual(merged.searchModule.inputSequence, srcTransforms.map((t) => [{sample: t, p: 1}])); }); }); @@ -297,7 +297,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.sourceText, 'can\''); - assert.deepEqual(tokenToSplit.searchSpace.inputSequence, keystrokeDistributions); + assert.deepEqual(tokenToSplit.searchModule.inputSequence, keystrokeDistributions); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -314,7 +314,7 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 2); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), ['can', '\'']); - assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.searchSpace.inputSequence), [ + assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.searchModule.inputSequence), [ keystrokeDistributions.slice(0, 3), [keystrokeDistributions[3]] ]); @@ -335,7 +335,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.sourceText, 'biglargetransform'); - assert.deepEqual(tokenToSplit.searchSpace.inputSequence, keystrokeDistributions); + assert.deepEqual(tokenToSplit.searchModule.inputSequence, keystrokeDistributions); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -362,7 +362,7 @@ describe('ContextToken', function() { inputStartIndex: i, bestProbFromSet: 1 }))); - assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.searchSpace.inputSequence[0]), splitTextArray.map(t => [{ + assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.searchModule.inputSequence[0]), splitTextArray.map(t => [{ sample: { insert: t, deleteLeft: 0, deleteRight: 0 }, p: 1 }])); }); @@ -386,7 +386,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.exampleInput, 'largelongtransforms'); - assert.deepEqual(tokenToSplit.searchSpace.inputSequence, keystrokeDistributions); + assert.deepEqual(tokenToSplit.searchModule.inputSequence, keystrokeDistributions); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -416,7 +416,7 @@ describe('ContextToken', function() { { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } ]); - assert.deepEqual(resultsOfSplit[0].searchSpace.inputSequence, [ + assert.deepEqual(resultsOfSplit[0].searchModule.inputSequence, [ keystrokeDistributions[0], keystrokeDistributions[1].map((entry) => { return { @@ -428,7 +428,7 @@ describe('ContextToken', function() { }), ]); - assert.deepEqual(resultsOfSplit[1].searchSpace.inputSequence, [ + assert.deepEqual(resultsOfSplit[1].searchModule.inputSequence, [ keystrokeDistributions[1].map((entry) => { return { sample: { @@ -448,7 +448,7 @@ describe('ContextToken', function() { }), ]); - assert.deepEqual(resultsOfSplit[2].searchSpace.inputSequence, [ + assert.deepEqual(resultsOfSplit[2].searchModule.inputSequence, [ keystrokeDistributions[2].map((entry) => { return { sample: { @@ -480,7 +480,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.exampleInput, toMathematicalSMP('largelongtransforms')); - assert.deepEqual(tokenToSplit.searchSpace.inputSequence, keystrokeDistributions); + assert.deepEqual(tokenToSplit.searchModule.inputSequence, keystrokeDistributions); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -510,7 +510,7 @@ describe('ContextToken', function() { { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } ]); - assert.deepEqual(resultsOfSplit[0].searchSpace.inputSequence, [ + assert.deepEqual(resultsOfSplit[0].searchModule.inputSequence, [ keystrokeDistributions[0], keystrokeDistributions[1].map((entry) => { return { @@ -522,7 +522,7 @@ describe('ContextToken', function() { }), ]); - assert.deepEqual(resultsOfSplit[1].searchSpace.inputSequence, [ + assert.deepEqual(resultsOfSplit[1].searchModule.inputSequence, [ keystrokeDistributions[1].map((entry) => { return { sample: { @@ -542,7 +542,7 @@ describe('ContextToken', function() { }), ]); - assert.deepEqual(resultsOfSplit[2].searchSpace.inputSequence, [ + assert.deepEqual(resultsOfSplit[2].searchModule.inputSequence, [ keystrokeDistributions[2].map((entry) => { return { sample: { diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts index 92fe49674ff..8b630891917 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts @@ -139,16 +139,16 @@ describe('ContextTokenization', function() { let baseTokenization = new ContextTokenization(tokens, transitionEdits, null /* dummy val */); let cloned = new ContextTokenization(baseTokenization); - assert.deepEqual(cloned.tokens.map((token) => token.searchSpace.inputSequence), - baseTokenization.tokens.map((token) => token.searchSpace.inputSequence)); + assert.deepEqual(cloned.tokens.map((token) => token.searchModule.inputSequence), + baseTokenization.tokens.map((token) => token.searchModule.inputSequence)); - // The `.searchSpace` instances will not be deep-equal; there are class properties + // The `.searchModule` instances will not be deep-equal; there are class properties // that hold functions with closures, configured at runtime. // @ts-ignore - TS2704 b/c deleting a readonly property. - baseTokenization.tokens.forEach((token) => delete token.searchSpace); + baseTokenization.tokens.forEach((token) => delete token.searchModule); // @ts-ignore - TS2704 b/c deleting a readonly property. - cloned.tokens.forEach((token) => delete token.searchSpace); + cloned.tokens.forEach((token) => delete token.searchModule); assert.deepEqual(cloned, baseTokenization); }); @@ -199,11 +199,11 @@ describe('ContextTokenization', function() { targetTokens ); assert.includeDeepMembers( - tokenization.tokens[tokenization.tokens.length - 2].searchSpace.inputSequence, + tokenization.tokens[tokenization.tokens.length - 2].searchModule.inputSequence, [[{sample: inputTransformMap.get(1), p: 1}]] ); assert.includeDeepMembers( - tokenization.tail.searchSpace.inputSequence, + tokenization.tail.searchModule.inputSequence, [[{sample: inputTransformMap.get(2), p: 1}]] ); }); @@ -282,7 +282,7 @@ describe('ContextTokenization', function() { targetTokens ); assert.includeDeepMembers( - tokenization.tail.searchSpace.inputSequence, + tokenization.tail.searchModule.inputSequence, [[{sample: inputTransformMap.get(0), p: 1}]] ); }); @@ -323,7 +323,7 @@ describe('ContextTokenization', function() { targetTokens ); assert.includeDeepMembers( - tokenization.tail.searchSpace.inputSequence, + tokenization.tail.searchModule.inputSequence, // As we fully deleted the old token, the new one "starts" after the deleteLeft. // The deleteLeft component should not be included here. [[{sample: { insert: 'week', deleteLeft: 0 /* NOT 3 */ }, p: 1}]] @@ -377,7 +377,7 @@ describe('ContextTokenization', function() { } assert.includeDeepMembers( - tokenization.tokens[tailIndex + i].searchSpace.inputSequence, + tokenization.tokens[tailIndex + i].searchModule.inputSequence, [[{sample: transform, p: 1}]] ); } @@ -439,7 +439,7 @@ describe('ContextTokenization', function() { } assert.includeDeepMembers( - tokenization.tokens[tailIndex + i].searchSpace.inputSequence, + tokenization.tokens[tailIndex + i].searchModule.inputSequence, [[{sample: transform, p: 1}]] ); } @@ -493,7 +493,7 @@ describe('ContextTokenization', function() { } assert.includeDeepMembers( - tokenization.tokens[tailIndex + i].searchSpace.inputSequence, + tokenization.tokens[tailIndex + i].searchModule.inputSequence, [[{sample: transform, p: 1}]] ); } @@ -553,8 +553,8 @@ describe('ContextTokenization', function() { [...baseTokenization.tokens[baseTokenization.tokens.length - 2].inputRange] ); assert.includeDeepMembers( - tokenization.tail.searchSpace.inputSequence, - baseTokenization.tokens[baseTokenization.tokens.length - 2].searchSpace.inputSequence + tokenization.tail.searchModule.inputSequence, + baseTokenization.tokens[baseTokenization.tokens.length - 2].searchModule.inputSequence ); assert.includeDeepMembers( @@ -562,8 +562,8 @@ describe('ContextTokenization', function() { [...baseTokenization.tokens[baseTokenization.tokens.length - 1].inputRange] ); assert.includeDeepMembers( - tokenization.tail.searchSpace.inputSequence, - baseTokenization.tokens[baseTokenization.tokens.length - 1].searchSpace.inputSequence + tokenization.tail.searchModule.inputSequence, + baseTokenization.tokens[baseTokenization.tokens.length - 1].searchModule.inputSequence ); }); @@ -624,8 +624,8 @@ describe('ContextTokenization', function() { [...tokenization.tokens[tokenization.tokens.length - 2].inputRange] ); assert.includeDeepMembers( - baseTokenization.tail.searchSpace.inputSequence, - tokenization.tokens[tokenization.tokens.length - 2].searchSpace.inputSequence + baseTokenization.tail.searchModule.inputSequence, + tokenization.tokens[tokenization.tokens.length - 2].searchModule.inputSequence ); // We've also appended a '.' to the final split-off token. Thus, we need @@ -635,8 +635,8 @@ describe('ContextTokenization', function() { [...tokenization.tokens[tokenization.tokens.length - 1].inputRange] ); assert.includeDeepMembers( - [...baseTokenization.tail.searchSpace.inputSequence, [{sample: { insert: '.', deleteLeft: 0 }, p: 1}]], - tokenization.tokens[tokenization.tokens.length - 1].searchSpace.inputSequence + [...baseTokenization.tail.searchModule.inputSequence, [{sample: { insert: '.', deleteLeft: 0 }, p: 1}]], + tokenization.tokens[tokenization.tokens.length - 1].searchModule.inputSequence ); }); }); From d0469ff3f0a147be4b2928949b25ee5c6218ce05 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Fri, 17 Oct 2025 14:50:35 -0500 Subject: [PATCH 2/4] refactor(web): adds SearchQuotientNode interface This new interface is being added in preparation for efficient multi-tokenization correction-search. SearchQuotientSpur has been modified to implement it, and a new type (SearchQuotientNodeImpl) will be added in the near future as an additional implementing type. Build-bot: skip build:web Test-bot: skip --- .../src/main/correction/context-token.ts | 13 +-- .../src/main/correction/distance-modeler.ts | 18 +---- .../main/correction/search-quotient-node.ts | 81 +++++++++++++++++++ .../main/correction/search-quotient-spur.ts | 23 +++++- .../worker-thread/src/main/predict-helpers.ts | 2 +- 5 files changed, 103 insertions(+), 34 deletions(-) create mode 100644 web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index aea2a7e4786..5387c0da414 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -201,18 +201,7 @@ export class ContextToken { * received that can correspond to the current instance. */ get exampleInput(): string { - /* - * TODO: with clear limits (strict cost minimization?) / prior calculation - * attempts, return the best _suggestion_ for this token. This is - * especially relevant for epic/dict-breaker - we want to best model the token - * as it would apply within the word-breaking algorithm. - * - * If not possible, find the best of the deepest search paths and append the - * most likely keystroke data afterward. - */ - const transforms = this.searchModule.inputSequence.map((dist) => dist[0].sample) - const composite = transforms.reduce((accum, current) => buildMergedTransform(accum, current), {insert: '', deleteLeft: 0}); - return composite.insert; + return this.searchModule.bestExample.text; } /** diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts index b23d5aebc83..b9d674ecca1 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts @@ -6,6 +6,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { ClassicalDistanceCalculation } from './classical-calculation.js'; import { ExecutionTimer, STANDARD_TIME_BETWEEN_DEFERS } from './execution-timer.js'; import { QUEUE_NODE_COMPARATOR, SearchQuotientSpur } from './search-quotient-spur.js'; +import { PathResult } from './search-quotient-node.js'; import { subsetByChar, subsetByInterval, mergeSubset, TransformSubset } from '../transform-subsets.js'; import Distribution = LexicalModelTypes.Distribution; @@ -599,23 +600,6 @@ export class SearchResult { } } -type NullPath = { - type: 'none' -} - -type IntermediateSearchPath = { - type: 'intermediate', - cost: number -} - -type CompleteSearchPath = { - type: 'complete', - cost: number, - finalNode: SearchNode -} - -export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath; - // Current best guesstimate of how compositor will retrieve ideal corrections. export async function *getBestMatches(searchSpace: SearchQuotientSpur, timer: ExecutionTimer): AsyncGenerator { let currentReturns: {[resultKey: string]: SearchNode} = {}; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts new file mode 100644 index 00000000000..c1a3a6051bd --- /dev/null +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -0,0 +1,81 @@ +/* + * Keyman is copyright (C) SIL Global. MIT License. + * + * Created by jahorton on 2025-10-09 + * + * This file the predictive-text engine's SearchSpace class, which is used to + * manage the search-space(s) for text corrections within the engine. + */ + +import { SearchNode, SearchResult } from "./distance-modeler.js"; + +export let SPACE_ID_SEED = 0; + +export function generateSpaceSeed(): number { + return SPACE_ID_SEED++; +} + +type NullPath = { + type: 'none' +} + +type IntermediateSearchPath = { + type: 'intermediate', + cost: number +} + +type CompleteSearchPath = { + type: 'complete', + cost: number, + finalNode: SearchNode +} + +export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath; + +/** + * Represents all or a portion of the dynamically-generated graph used to search + * for predictive-text corrections. + */ +export interface SearchQuotientNode { + /** + * Retrieves the lowest-cost / lowest-distance edge from the batcher's search + * area, checks its validity as a correction to the input text, and reports on + * what sort of result the edge's destination node represents. + * @returns + */ + handleNextNode(): PathResult; + + /** + * Reports the cost of the lowest-cost / lowest-distance edge held within the + * batcher's search area. + * @returns + */ + readonly currentCost: number; + + /** + * Returns the set of previously-processed results under this batcher's domain. + */ + readonly previousResults: SearchResult[]; + + /** + * When true, this indicates that the currently-represented portion of context + * has fat-finger data available, which itself indicates that the user has + * corrections enabled. + */ + readonly correctionsEnabled: boolean; + + /** + * Reports the total number of input keystrokes represented by this + * graph/subgraph. + * + * (Their fat-finger alternates, when provided, do not influence this count - + * they're associated with the original keystroke that affected the context.) + */ + readonly inputCount: number; + + /** + * Determines the best example text representable by this batcher's portion of + * the correction-search graph and its paths. + */ + readonly bestExample: { text: string, p: number }; +} \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index a8cb2e4468f..40dddf48dcd 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -8,10 +8,11 @@ * engine. */ -import { QueueComparator as Comparator, PriorityQueue } from '@keymanapp/web-utils'; +import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { EDIT_DISTANCE_COST_SCALE, PathResult, SearchNode, SearchResult } from './distance-modeler.js'; +import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js'; +import { PathResult, SearchQuotientNode } from './search-quotient-node.js'; import Distribution = LexicalModelTypes.Distribution; import LexicalModel = LexicalModelTypes.LexicalModel; @@ -25,7 +26,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator = function(arg1, arg2 // The set of search spaces corresponding to the same 'context' for search. // Whenever a wordbreak boundary is crossed, a new instance should be made. -export class SearchQuotientSpur { +export class SearchQuotientSpur implements SearchQuotientNode { private selectionQueue: PriorityQueue = new PriorityQueue(QUEUE_NODE_COMPARATOR); private inputs: Distribution; @@ -114,6 +115,20 @@ export class SearchQuotientSpur { } } + public get inputCount(): number { + return (this.parentPath?.inputCount ?? 0) + (this.inputs ? 1 : 0); + } + + public get bestExample(): {text: string, p: number} { + const bestPrefix = this.parentPath?.bestExample ?? { text: '', p: 1 }; + const bestLocalInput = this.inputs?.reduce((max, curr) => max.p < curr.p ? curr : max) ?? { sample: { insert: '', deleteLeft: 0 }, p: 1}; + + return { + text: KMWString.substring(bestPrefix.text, 0, KMWString.length(bestPrefix.text) - bestLocalInput.sample.deleteLeft) + bestLocalInput.sample.insert, + p: bestPrefix.p * bestLocalInput.p + } + } + increaseMaxEditDistance() { this.parentPath.increaseMaxEditDistance(); @@ -308,7 +323,7 @@ export class SearchQuotientSpur { }; } - public previousResults(): SearchResult[] { + public get previousResults(): SearchResult[] { return Object.values(this.returnedValues).map(v => new SearchResult(v)); } } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts index fe7cebefc77..fb3fb234866 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts @@ -532,7 +532,7 @@ export async function correctAndEnumerate( * Worst-case, it's possible to temporarily add normalization if a code deep-dive * is needed in the future. */ - if(searchSpace.inputSequence.length <= 1) { + if(searchSpace.inputCount <= 1) { /* Suppose a key distribution: most likely with p=0.5, second-most with 0.4 - a pretty * ambiguous case that would only arise very near the center of the boundary between two keys. * Raising (0.5/0.4)^16 ~= 35.53. (At time of writing, SINGLE_CHAR_KEY_PROB_EXPONENT = 16.) From af61a56db9ed54c9e0d339fce2bb1bfbf126dd4b Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Mon, 5 Jan 2026 15:13:21 -0600 Subject: [PATCH 3/4] docs(web): fix doc-comment word omisson per review --- .../worker-thread/src/main/correction/search-quotient-node.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index c1a3a6051bd..7f8377459a3 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -3,7 +3,7 @@ * * Created by jahorton on 2025-10-09 * - * This file the predictive-text engine's SearchSpace class, which is used to + * This file defines the predictive-text engine's SearchSpace class, which is used to * manage the search-space(s) for text corrections within the engine. */ From ce3f90dd81c32839f6f2cd1c337195dd1521f635 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Wed, 7 Jan 2026 10:56:26 -0600 Subject: [PATCH 4/4] change(web): do not export SPACE_ID_SEED --- .../worker-thread/src/main/correction/search-quotient-node.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index 7f8377459a3..590d1b79657 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -9,7 +9,7 @@ import { SearchNode, SearchResult } from "./distance-modeler.js"; -export let SPACE_ID_SEED = 0; +let SPACE_ID_SEED = 0; export function generateSpaceSeed(): number { return SPACE_ID_SEED++;