From 3a56e2375332e866baec58e7d50df8c4dfcdb24e Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Tue, 21 Oct 2025 14:04:37 -0500 Subject: [PATCH 1/5] refactor(web): generalize SearchQuotientSpur parent requirements As an upcoming goal is to introduce a new SearchQuotientNode type that will assist with context-caching across multiple tokenizaitons, it is wise to generalize SearchQuotientSpur and functions utilizing it to accept any SearchQuotientNode-implementing type as its parent. Build-bot: skip build:web Test-bot: skip --- .../src/main/correction/context-token.ts | 5 +- .../src/main/correction/distance-modeler.ts | 2 + .../main/correction/search-quotient-node.ts | 25 ++ .../main/correction/search-quotient-spur.ts | 130 +++++++--- .../src/main/model-compositor.ts | 2 +- .../context/context-state.tests.ts | 49 ++-- .../context/context-token.tests.ts | 96 +++---- .../context/context-tokenization.tests.ts | 146 ++++++----- .../search-quotient-spur.tests.ts | 242 ++++++++++++++++++ 9 files changed, 525 insertions(+), 172 deletions(-) create mode 100644 web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index 2ac222f32b0..8c121fb1b99 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -12,6 +12,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { deepCopy, KMWString } from "@keymanapp/web-utils"; import { SearchQuotientSpur } from "./search-quotient-spur.js"; +import { SearchQuotientNode } from "./search-quotient-node.js"; import { TokenSplitMap } from "./context-tokenization.js"; import Distribution = LexicalModelTypes.Distribution; @@ -58,10 +59,10 @@ export class ContextToken { * Contains all relevant correction-search data for use in generating * corrections for this ContextToken instance. */ - public get searchModule(): SearchQuotientSpur { + public get searchModule(): SearchQuotientNode { return this._searchModule; } - private _searchModule: SearchQuotientSpur; + private _searchModule: SearchQuotientNode; isPartial: boolean; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts index 14df6264b5a..c7f3efb3d9b 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts @@ -22,6 +22,8 @@ type RealizedInput = ProbabilityMass[]; // NOT Distribution - they'r export const EDIT_DISTANCE_COST_SCALE = 5; export const MIN_KEYSTROKE_PROBABILITY = 0.0001; +export const DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL = 33; // in milliseconds. + export type TraversableToken = { key: TUnit, traversal: LexiconTraversal diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index e22f38c4f8c..153edbe02c4 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -49,6 +49,12 @@ export interface SearchQuotientNode { */ readonly spaceId: number; + /** + * Notes the SearchQuotientNode(s) whose correction-search paths are extended + * by this SearchQuotientNode. + */ + readonly parents: SearchQuotientNode[]; + /** * Retrieves the lowest-cost / lowest-distance edge from the batcher's search * area, checks its validity as a correction to the input text, and reports on @@ -57,6 +63,25 @@ export interface SearchQuotientNode { */ handleNextNode(): PathResult; + /** + * Denotes whether or not the represented search space includes paths built from + * the specified set of keystroke input distributions. The distribution count + * should match .inputCount - no omissions or extras are permitted. + * + * Designed explicitly for use in unit testing; it's not super-efficient, so + * avoid live use. + * + * @param keystrokeDistributions + * @internal + */ + hasInputs(keystrokeDistributions: Distribution[]): boolean; + + /** + * Increases the editing range that will be considered for determining + * correction distances. + */ + increaseMaxEditDistance(): void; + /** * Reports the cost of the lowest-cost / lowest-distance edge held within the * batcher's search area. diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index 536e9a9ec27..b877aaf2868 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -18,8 +18,6 @@ import Distribution = LexicalModelTypes.Distribution; import LexicalModel = LexicalModelTypes.LexicalModel; import Transform = LexicalModelTypes.Transform; -export const DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL = 33; // in milliseconds. - export const QUEUE_NODE_COMPARATOR: Comparator = function(arg1, arg2) { return arg1.currentCost - arg2.currentCost; } @@ -30,9 +28,11 @@ export class SearchQuotientSpur implements SearchQuotientNode { private selectionQueue: PriorityQueue = new PriorityQueue(QUEUE_NODE_COMPARATOR); readonly inputs?: Distribution>; - private parentPath: SearchQuotientSpur; + private parentNode: SearchQuotientNode; readonly spaceId: number; + readonly inputCount: number; + /** * Marks all results that have already been returned from this instance of SearchPath. * Should be deleted and cleared if any paths consider this one as a parent. @@ -52,19 +52,20 @@ export class SearchQuotientSpur implements SearchQuotientNode { * @param model */ constructor(model: LexicalModel); - constructor(space: SearchQuotientSpur, inputs: Distribution, bestProbFromSet: number); - constructor(arg1: LexicalModel | SearchQuotientSpur, inputs?: Distribution, bestProbFromSet?: number) { + constructor(space: SearchQuotientNode, inputs: Distribution, bestProbFromSet: number); + constructor(arg1: LexicalModel | SearchQuotientNode, inputs?: Distribution, bestProbFromSet?: number) { this.spaceId = generateSpaceSeed(); if(arg1 instanceof SearchQuotientSpur) { - const parentNode = arg1 as SearchQuotientSpur; + const parentNode = arg1 as SearchQuotientNode; const logTierCost = -Math.log(bestProbFromSet); this.inputs = inputs; + this.inputCount = parentNode.inputCount + 1; this.lowestPossibleSingleCost = parentNode.lowestPossibleSingleCost + logTierCost; - this.parentPath = parentNode; + this.parentNode = parentNode; - this.addEdgesForNodes(parentNode.previousResults.map(v => v.node)); + this.addEdgesForNodes(parentNode.previousResults.map(r => r.node)); return; } @@ -72,14 +73,15 @@ export class SearchQuotientSpur implements SearchQuotientNode { const model = arg1 as LexicalModel; this.selectionQueue.enqueue(new SearchNode(model.traverseFromRoot(), this.spaceId, t => model.toKey(t))); this.lowestPossibleSingleCost = 0; + this.inputCount = 0; } /** * Retrieves the sequences of inputs that led to this SearchPath. */ public get inputSequence(): Distribution[] { - if(this.parentPath) { - return [...this.parentPath.inputSequence, this.inputs]; + if(this.parentNode) { + return [...this.parentNode.inputSequence, this.inputs]; } else if(this.inputs) { return [this.inputs]; } else { @@ -87,12 +89,59 @@ export class SearchQuotientSpur implements SearchQuotientNode { } } - public get inputCount(): number { - return (this.parentPath?.inputCount ?? 0) + (this.inputs ? 1 : 0); + public hasInputs(keystrokeDistributions: Distribution[]): boolean { + if(this.inputCount == 0) { + return keystrokeDistributions.length == 0; + } else if(keystrokeDistributions.length != this.inputCount) { + return false; + } + + const tailInput = [...keystrokeDistributions[keystrokeDistributions.length - 1]]; + keystrokeDistributions = keystrokeDistributions.slice(0, keystrokeDistributions.length - 1); + const localInput = this.lastInput; + + const parentHasInput = () => !!this.parents.find(p => p.hasInputs(keystrokeDistributions)); + + // Actual reference match? Easy mode. + if(localInput == tailInput) { + return parentHasInput(); + } else if(localInput.length != tailInput.length) { + return false; + } else { + for(let entry of tailInput) { + const matchIndex = localInput.findIndex((x) => { + const s1 = x.sample; + const s2 = entry.sample; + // Check for equal reference first before the other checks; it makes a nice shortcut. + if(x == entry) { + return true; + } if(x.p == entry.p && s1.deleteLeft == s2.deleteLeft + && s1.id == s2.id && ((s1.deleteRight ?? 0) == (s2.deleteRight ?? 0)) && s1.insert == s2.insert + ) { + return true; + } + return false; + }); + + if(matchIndex == -1) { + return false; + } else { + tailInput.splice(matchIndex, 1); + } + } + + return parentHasInput(); + } + } + + public get lastInput(): Distribution> { + // Shallow-copies the array to prevent external modification; the Transforms + // are marked Readonly to prevent their modification as well. + return [...this.inputs]; } public get bestExample(): {text: string, p: number} { - const bestPrefix = this.parentPath?.bestExample ?? { text: '', p: 1 }; + const bestPrefix = this.parentNode?.bestExample ?? { text: '', p: 1 }; const bestLocalInput = this.inputs?.reduce((max, curr) => max.p < curr.p ? curr : max) ?? { sample: { insert: '', deleteLeft: 0 }, p: 1}; return { @@ -101,8 +150,13 @@ export class SearchQuotientSpur implements SearchQuotientNode { } } + get parents() { + // The SearchPath class may only have a single parent. + return this.parentNode ? [this.parentNode] : []; + } + increaseMaxEditDistance() { - this.parentPath.increaseMaxEditDistance(); + this.parentNode.increaseMaxEditDistance(); // By extracting the entries from the priority queue and increasing distance outside of it as a batch job, // we get an O(N) implementation, rather than the O(N log N) that would result from maintaining the original queue. @@ -117,11 +171,11 @@ export class SearchQuotientSpur implements SearchQuotientNode { get correctionsEnabled(): boolean { // When corrections are disabled, the Web engine will only provide individual Transforms // for an input, not a distribution. No distributions means we shouldn't do corrections. - return this.parentPath?.correctionsEnabled || this.inputs?.length > 1; + return this.parentNode?.correctionsEnabled || this.inputs?.length > 1; } public get currentCost(): number { - const parentCost = this.parentPath?.currentCost ?? Number.POSITIVE_INFINITY; + const parentCost = this.parentNode?.currentCost ?? Number.POSITIVE_INFINITY; const localCost = this.selectionQueue.peek()?.currentCost ?? Number.POSITIVE_INFINITY; return Math.min(localCost, parentCost); @@ -156,7 +210,7 @@ export class SearchQuotientSpur implements SearchQuotientNode { * @returns */ public handleNextNode(): PathResult { - const parentCost = this.parentPath?.currentCost ?? Number.POSITIVE_INFINITY; + const parentCost = this.parentNode?.currentCost ?? Number.POSITIVE_INFINITY; const localCost = this.selectionQueue.peek()?.currentCost ?? Number.POSITIVE_INFINITY; if(parentCost <= localCost) { @@ -166,7 +220,7 @@ export class SearchQuotientSpur implements SearchQuotientNode { }; } - const result = this.parentPath.handleNextNode(); + const result = this.parentNode.handleNextNode(); if(result.type == 'complete') { this.addEdgesForNodes([result.finalNode]); @@ -178,9 +232,10 @@ export class SearchQuotientSpur implements SearchQuotientNode { } as PathResult } + // will have equal .spaceId. let currentNode = this.selectionQueue.dequeue(); - let unmatchedResult: PathResult = { + let unmatchedResult = { type: 'intermediate', cost: currentNode.currentCost } @@ -191,7 +246,7 @@ export class SearchQuotientSpur implements SearchQuotientNode { // Note: .knownCost is not scaled, while its contribution to .currentCost _is_ scaled. let substitutionsOnly = false; if(currentNode.editCount > 2) { - return unmatchedResult; + return unmatchedResult as PathResult; } else if(currentNode.editCount == 2) { substitutionsOnly = true; } @@ -200,10 +255,8 @@ export class SearchQuotientSpur implements SearchQuotientNode { // Allows a little 'wiggle room' + 2 "hard" edits. // Can be important if needed characters don't actually exist on the keyboard // ... or even just not the then-current layer of the keyboard. - // - // TODO: still consider the lowest-cost individual edges for THIS specific criterion. if(currentNode.currentCost > this.lowestPossibleSingleCost + 2.5 * EDIT_DISTANCE_COST_SCALE) { - return unmatchedResult; + return unmatchedResult as PathResult; } // Stage 2: process subset further OR build remaining edges @@ -211,7 +264,7 @@ export class SearchQuotientSpur implements SearchQuotientNode { if(currentNode.hasPartialInput) { // Re-use the current queue; the number of total inputs considered still holds. this.selectionQueue.enqueueAll(currentNode.processSubsetEdge()); - return unmatchedResult; + return unmatchedResult as PathResult; } // OK, we fully crossed a graph edge and have landed on a transition point; @@ -223,19 +276,26 @@ export class SearchQuotientSpur implements SearchQuotientNode { this.selectionQueue.enqueueAll(insertionEdges); } - if((this.returnedValues[currentNode.resultKey]?.currentCost ?? Number.POSITIVE_INFINITY) > currentNode.currentCost) { - this.returnedValues[currentNode.resultKey] = currentNode; - } else { - // Not a better cost, so reject it and move on to the next potential result. - return this.handleNextNode(); + if(currentNode.spaceId == this.spaceId) { + if(this.returnedValues) { + if((this.returnedValues[currentNode.resultKey]?.currentCost ?? Number.POSITIVE_INFINITY) > currentNode.currentCost) { + this.returnedValues[currentNode.resultKey] = currentNode; + } else { + // Not a better cost, so reject it and move on to the next potential result. + return this.handleNextNode(); + } + } + + return { + type: 'complete', + cost: currentNode.currentCost, + finalNode: currentNode, + spaceId: this.spaceId + }; } - return { - type: 'complete', - cost: currentNode.currentCost, - finalNode: currentNode, - spaceId: this.spaceId - }; + // If we've somehow fully exhausted all search options, indicate that none remain. + return unmatchedResult as PathResult; } public get previousResults(): SearchResult[] { diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts index f8bcbaa65c3..77f1361a810 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts @@ -7,7 +7,7 @@ import { applySuggestionCasing, correctAndEnumerate, dedupeSuggestions, finalize import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js'; import { ContextTracker } from './correction/context-tracker.js'; -import { DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL } from './correction/search-quotient-spur.js'; +import { DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL } from './correction/distance-modeler.js'; import CasingForm = LexicalModelTypes.CasingForm; import Configuration = LexicalModelTypes.Configuration; diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts index 93b6d5b164d..c0ffb9661a7 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts @@ -13,7 +13,7 @@ import { default as defaultBreaker } from '@keymanapp/models-wordbreakers'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { ContextState, determineContextSlideTransform, models } from '@keymanapp/lm-worker/test-index'; +import { ContextState, determineContextSlideTransform, models, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; import Context = LexicalModelTypes.Context; import Transform = LexicalModelTypes.Transform; @@ -252,11 +252,10 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch?.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); - assert.sameDeepMembers( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence, - [[{sample: { insert: '', deleteLeft: 0 }, p: 1}]] - ); + // space transform + assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); + // empty transform + assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -284,17 +283,11 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch?.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); - assert.deepEqual( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence, - [[{ sample: {insert: '', deleteLeft: 0}, p: 1 }]] - ); - - // if(!newContextMatch.final.tokenization.alignment.canAlign) { - // assert.fail("context alignment failed"); - // } - // assert.equal(newContextMatch.final.tokenization.alignment.leadTokenShift, 0); - // assert.equal(newContextMatch.final.tokenization.alignment.tailTokenShift, 0); + // Two whitespaces, one of which is new! + const preTail = state.tokenization.tokens[state.tokenization.tokens.length - 2]; + assert.equal(preTail.searchModule.inputCount, 2); + assert.deepEqual((preTail.searchModule as SearchQuotientSpur).lastInput, [{sample: transform, p: 1}]); + assert.equal(state.tokenization.tail.searchModule.inputCount, 1); }); it("properly matches and aligns when a 'wordbreak' is removed via backspace", function() { @@ -337,8 +330,8 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence); + assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -370,10 +363,9 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); - assert.deepEqual( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence, - [[{sample: {insert: '', deleteLeft: 0}, p: 1}]] + assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal( + state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1 ); // if(!newContextMatch.final.tokenization.alignment.canAlign) { @@ -402,10 +394,9 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); - assert.deepEqual( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence, - [[{sample: {insert: '', deleteLeft: 0}, p: 1}]] + assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal( + state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1 ); // if(!newContextMatch.final.tokenization.alignment.canAlign) { @@ -434,8 +425,8 @@ describe('ContextState', () => { // The 'wordbreak' transform let state = newContextMatch.final; - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence); - assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence); + assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index aad94c6bb37..06dc12a0d77 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -14,7 +14,7 @@ import { default as defaultBreaker } from '@keymanapp/models-wordbreakers'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { ContextToken, correction, getBestMatches, models, preprocessInputSources } from '@keymanapp/lm-worker/test-index'; +import { ContextToken, correction, getBestMatches, models, preprocessInputSources, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; import Distribution = LexicalModelTypes.Distribution; import ExecutionTimer = correction.ExecutionTimer; @@ -54,7 +54,7 @@ describe('ContextToken', function() { it("(model: LexicalModel)", async () => { let token = new ContextToken(plainModel); - assert.isEmpty(token.searchModule.inputSequence); + assert.equal(token.searchModule.inputCount, 0); assert.isEmpty(token.exampleInput); assert.isFalse(token.isWhitespace); @@ -67,21 +67,16 @@ describe('ContextToken', function() { it("(model: LexicalModel, text: string)", () => { let token = new ContextToken(plainModel, "and"); - assert.isNotEmpty(token.searchModule.inputSequence); - - assert.equal(token.searchModule.inputSequence.map((entry) => entry[0].sample.insert).join(''), 'and'); - token.searchModule.inputSequence.forEach((entry) => assert.equal(entry[0].sample.deleteLeft, 0)); - assert.deepEqual(token.searchModule.inputSequence, [..."and"].map((char) => { - return [{ - sample: { - insert: char, - deleteLeft: 0 - }, - p: 1.0 - }]; - })); + assert.equal(token.searchModule.bestExample.text, 'and'); assert.equal(token.exampleInput, 'and'); + assert.equal(token.searchModule.inputCount, 3); + assert.isTrue(token.searchModule.hasInputs([ + [{sample: { insert: 'a', deleteLeft: 0 }, p: 1}], + [{sample: { insert: 'n', deleteLeft: 0 }, p: 1}], + [{sample: { insert: 'd', deleteLeft: 0 }, p: 1}] + ])); + assert.isFalse(token.isWhitespace); }); @@ -93,8 +88,7 @@ describe('ContextToken', function() { assert.equal(clonedToken.searchModule, baseToken.searchModule); // Deep equality on .searchModule can't be directly checked due to the internal complexities involved. // We CAN check for the most important members, though. - assert.notEqual(clonedToken.searchModule.inputSequence, baseToken.searchModule.inputSequence); - assert.deepEqual(clonedToken.searchModule.inputSequence, baseToken.searchModule.inputSequence); + assert.equal(clonedToken.searchModule, baseToken.searchModule); assert.notEqual(clonedToken, baseToken); // Perfectly deep-equal when we ignore .searchModule. @@ -114,9 +108,13 @@ describe('ContextToken', function() { token2.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); token3.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); - assert.sameOrderedMembers(merged.searchModule.inputSequence.slice(0, 3), token1.searchModule.inputSequence); - assert.sameOrderedMembers(merged.searchModule.inputSequence.slice(3, 4), token2.searchModule.inputSequence); - assert.sameOrderedMembers(merged.searchModule.inputSequence.slice(4), token3.searchModule.inputSequence); + assert.isTrue(merged.searchModule.hasInputs([ + [{sample: { insert: 'c', deleteLeft: 0 }, p: 1}], + [{sample: { insert: 'a', deleteLeft: 0 }, p: 1}], + [{sample: { insert: 'n', deleteLeft: 0 }, p: 1}], + [{sample: { insert: '\'', deleteLeft: 0 }, p: 1}], + [{sample: { insert: 't', deleteLeft: 0 }, p: 1}] + ])); }); it("merges three tokens from single previously-split transforms", () => { @@ -147,7 +145,8 @@ describe('ContextToken', function() { const merged = ContextToken.merge([token1, token2, token3], plainModel); assert.equal(merged.exampleInput, "can't"); assert.deepEqual(merged.inputRange, [ { trueTransform: srcTransform, inputStartIndex: 0, bestProbFromSet: 1 } ]); - assert.deepEqual(merged.searchModule.inputSequence, [[{sample: srcTransform, p: 1}]]); + assert.equal(merged.searchModule.inputCount, 1); + assert.deepEqual((merged.searchModule as SearchQuotientSpur).lastInput, [{sample: srcTransform, p: 1}]); }); it("merges four tokens with previously-split transforms", () => { @@ -206,7 +205,9 @@ describe('ContextToken', function() { const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, "applesandsourgrapes"); assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); - assert.deepEqual(merged.searchModule.inputSequence, srcTransforms.map((t) => [{sample: t, p: 1}])); + assert.isTrue(merged.searchModule.hasInputs( + srcTransforms.map((t) => ([{sample: t, p: 1}])) + )); }); it("merges four tokens with previously-split transforms - non-BMP text", () => { @@ -265,7 +266,9 @@ describe('ContextToken', function() { const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); - assert.deepEqual(merged.searchModule.inputSequence, srcTransforms.map((t) => [{sample: t, p: 1}])); + assert.isTrue(merged.searchModule.hasInputs( + srcTransforms.map((t) => ([{sample: t, p: 1}])) + )); }); }); @@ -297,7 +300,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.sourceText, 'can\''); - assert.deepEqual(tokenToSplit.searchModule.inputSequence, keystrokeDistributions); + tokenToSplit.searchModule.hasInputs(keystrokeDistributions); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -314,10 +317,8 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 2); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), ['can', '\'']); - assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.searchModule.inputSequence), [ - keystrokeDistributions.slice(0, 3), - [keystrokeDistributions[3]] - ]); + assert.isTrue(resultsOfSplit[0].searchModule.hasInputs(keystrokeDistributions.slice(0, 3))); + assert.isTrue(resultsOfSplit[1].searchModule.hasInputs([keystrokeDistributions[3]])); }); it("handles mid-transform splits correctly", () => { @@ -335,7 +336,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.sourceText, 'biglargetransform'); - assert.deepEqual(tokenToSplit.searchModule.inputSequence, keystrokeDistributions); + assert.isTrue(tokenToSplit.searchModule.hasInputs(keystrokeDistributions)); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -362,9 +363,12 @@ describe('ContextToken', function() { inputStartIndex: i, bestProbFromSet: 1 }))); - assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.searchModule.inputSequence[0]), splitTextArray.map(t => [{ - sample: { insert: t, deleteLeft: 0, deleteRight: 0 }, p: 1 - }])); + + for(let i = 0; i < resultsOfSplit.length; i++) { + assert.isTrue(resultsOfSplit[i].searchModule.hasInputs([ + [{sample: { insert: splitTextArray[i], deleteLeft: 0, deleteRight: 0 }, p: 1}] + ])); + } }); it("handles messy mid-transform splits correctly", () => { @@ -386,7 +390,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.exampleInput, 'largelongtransforms'); - assert.deepEqual(tokenToSplit.searchModule.inputSequence, keystrokeDistributions); + tokenToSplit.searchModule.hasInputs(keystrokeDistributions); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -416,7 +420,7 @@ describe('ContextToken', function() { { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } ]); - assert.deepEqual(resultsOfSplit[0].searchModule.inputSequence, [ + assert.isTrue(resultsOfSplit[0].searchModule.hasInputs([ keystrokeDistributions[0], keystrokeDistributions[1].map((entry) => { return { @@ -426,9 +430,9 @@ describe('ContextToken', function() { }, p: entry.p } }), - ]); + ])); - assert.deepEqual(resultsOfSplit[1].searchModule.inputSequence, [ + assert.isTrue(resultsOfSplit[1].searchModule.hasInputs([ keystrokeDistributions[1].map((entry) => { return { sample: { @@ -446,9 +450,9 @@ describe('ContextToken', function() { }, p: entry.p } }), - ]); + ])); - assert.deepEqual(resultsOfSplit[2].searchModule.inputSequence, [ + assert.isTrue(resultsOfSplit[2].searchModule.hasInputs([ keystrokeDistributions[2].map((entry) => { return { sample: { @@ -458,7 +462,7 @@ describe('ContextToken', function() { }, p: entry.p } }), - ]); + ])); }); it("handles messy mid-transform splits correctly - non-BMP text", () => { @@ -480,7 +484,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.exampleInput, toMathematicalSMP('largelongtransforms')); - assert.deepEqual(tokenToSplit.searchModule.inputSequence, keystrokeDistributions); + tokenToSplit.searchModule.hasInputs(keystrokeDistributions); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -510,7 +514,7 @@ describe('ContextToken', function() { { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } ]); - assert.deepEqual(resultsOfSplit[0].searchModule.inputSequence, [ + assert.isTrue(resultsOfSplit[0].searchModule.hasInputs([ keystrokeDistributions[0], keystrokeDistributions[1].map((entry) => { return { @@ -520,9 +524,9 @@ describe('ContextToken', function() { }, p: entry.p } }), - ]); + ])); - assert.deepEqual(resultsOfSplit[1].searchModule.inputSequence, [ + assert.isTrue(resultsOfSplit[1].searchModule.hasInputs([ keystrokeDistributions[1].map((entry) => { return { sample: { @@ -540,9 +544,9 @@ describe('ContextToken', function() { }, p: entry.p } }), - ]); + ])); - assert.deepEqual(resultsOfSplit[2].searchModule.inputSequence, [ + assert.isTrue(resultsOfSplit[2].searchModule.hasInputs([ keystrokeDistributions[2].map((entry) => { return { sample: { @@ -552,7 +556,7 @@ describe('ContextToken', function() { }, p: entry.p } }), - ]); + ])); }); }); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts index 8b630891917..b65e046ba57 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts @@ -15,7 +15,20 @@ import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs' import { LexicalModelTypes } from '@keymanapp/common-types'; import { KMWString } from '@keymanapp/web-utils'; -import { analyzePathMergesAndSplits, assembleTransforms, buildEdgeWindow, ContextToken, ContextTokenization, EditOperation, EditTuple, ExtendedEditOperation, models, PendingTokenization, traceInsertEdits } from '@keymanapp/lm-worker/test-index'; +import { + analyzePathMergesAndSplits, + assembleTransforms, + buildEdgeWindow, + ContextToken, + ContextTokenization, + EditOperation, + EditTuple, + ExtendedEditOperation, + models, + PendingTokenization, + SearchQuotientSpur, + traceInsertEdits +} from '@keymanapp/lm-worker/test-index'; import Transform = LexicalModelTypes.Transform; import TrieModel = models.TrieModel; @@ -139,8 +152,10 @@ describe('ContextTokenization', function() { let baseTokenization = new ContextTokenization(tokens, transitionEdits, null /* dummy val */); let cloned = new ContextTokenization(baseTokenization); - assert.deepEqual(cloned.tokens.map((token) => token.searchModule.inputSequence), - baseTokenization.tokens.map((token) => token.searchModule.inputSequence)); + assert.sameOrderedMembers( + cloned.tokens.map((token) => token.searchModule), + baseTokenization.tokens.map((token) => token.searchModule) + ); // The `.searchModule` instances will not be deep-equal; there are class properties // that hold functions with closures, configured at runtime. @@ -198,13 +213,19 @@ describe('ContextTokenization', function() { assert.deepEqual(tokenization.tokens.map((t) => ({text: t.exampleInput, isWhitespace: t.isWhitespace})), targetTokens ); - assert.includeDeepMembers( - tokenization.tokens[tokenization.tokens.length - 2].searchModule.inputSequence, - [[{sample: inputTransformMap.get(1), p: 1}]] + assert.equal( + tokenization.tokens[tokenization.tokens.length - 2].searchModule.inputCount, 1 + ); + assert.deepEqual( + (tokenization.tokens[tokenization.tokens.length - 2].searchModule as SearchQuotientSpur).lastInput, + [{sample: inputTransformMap.get(1), p: 1}] ); - assert.includeDeepMembers( - tokenization.tail.searchModule.inputSequence, - [[{sample: inputTransformMap.get(2), p: 1}]] + assert.equal( + tokenization.tail.searchModule.inputCount, 1 + ); + assert.deepEqual( + (tokenization.tail.searchModule as SearchQuotientSpur).lastInput, + [{sample: inputTransformMap.get(2), p: 1}] ); }); @@ -281,9 +302,14 @@ describe('ContextTokenization', function() { assert.deepEqual(tokenization.tokens.map((t) => ({text: t.exampleInput, isWhitespace: t.isWhitespace})), targetTokens ); - assert.includeDeepMembers( - tokenization.tail.searchModule.inputSequence, - [[{sample: inputTransformMap.get(0), p: 1}]] + assert.equal(baseTokenization.tail.searchModule.inputCount, 2); + assert.deepEqual(tokenization.tail.searchModule.parents, [baseTokenization.tail.searchModule]); + assert.equal( + tokenization.tail.searchModule.inputCount, 3 + ); + assert.deepEqual( + (tokenization.tail.searchModule as SearchQuotientSpur).lastInput, + [{sample: inputTransformMap.get(0), p: 1}] ); }); @@ -322,11 +348,22 @@ describe('ContextTokenization', function() { assert.deepEqual(tokenization.tokens.map((t) => ({text: t.exampleInput, isWhitespace: t.isWhitespace})), targetTokens ); - assert.includeDeepMembers( - tokenization.tail.searchModule.inputSequence, + + // As we fully deleted the old token, the new one "starts" after the deleteLeft. + // The deleteLeft component should not be included here. Mocking may be needed! + assert.equal( + tokenization.tail.searchModule.inputCount, 1 // is a single transform. + ); + assert.equal( + tokenization.tokens[tokenization.tokens.length - 2].searchModule, + baseTokenization.tokens[tokenization.tokens.length - 2].searchModule + ) + assert.notEqual(tokenization.tail.searchModule.parents, [baseTokenization.tail.searchModule]); + assert.deepEqual( + (tokenization.tail.searchModule as SearchQuotientSpur).lastInput, // As we fully deleted the old token, the new one "starts" after the deleteLeft. // The deleteLeft component should not be included here. - [[{sample: { insert: 'week', deleteLeft: 0 /* NOT 3 */ }, p: 1}]] + [{sample: { insert: 'week', deleteLeft: 0 /* NOT 3 */ }, p: 1}] ); }); @@ -376,9 +413,8 @@ describe('ContextTokenization', function() { transform.deleteLeft = 0; } - assert.includeDeepMembers( - tokenization.tokens[tailIndex + i].searchModule.inputSequence, - [[{sample: transform, p: 1}]] + assert.deepEqual((tokenization.tokens[tailIndex + i].searchModule as SearchQuotientSpur).lastInput, + [{sample: transform, p: 1}] ); } }); @@ -438,9 +474,8 @@ describe('ContextTokenization', function() { transform.deleteLeft = 0; } - assert.includeDeepMembers( - tokenization.tokens[tailIndex + i].searchModule.inputSequence, - [[{sample: transform, p: 1}]] + assert.deepEqual((tokenization.tokens[tailIndex + i].searchModule as SearchQuotientSpur).lastInput, + [{sample: transform, p: 1}] ); } }); @@ -492,9 +527,8 @@ describe('ContextTokenization', function() { transform.deleteLeft = 0; } - assert.includeDeepMembers( - tokenization.tokens[tailIndex + i].searchModule.inputSequence, - [[{sample: transform, p: 1}]] + assert.deepEqual((tokenization.tokens[tailIndex + i].searchModule as SearchQuotientSpur).lastInput, + [{sample: transform, p: 1}] ); } }); @@ -506,7 +540,7 @@ describe('ContextTokenization', function() { const targetTokens = ['an', ' ', 'apple', ' ', 'a', ' ', 'day', ' ', 'can\'t'].map((t) => ({text: t, isWhitespace: t == ' '})); const inputTransform = { insert: 't', deleteLeft: 0, deleteRight: 0 }; const inputTransformMap: Map = new Map(); - inputTransformMap.set(0, { insert: 't', deleteLeft: 0 }); + inputTransformMap.set(0, inputTransform); const edgeWindow = buildEdgeWindow(baseTokenization.tokens, inputTransform, false, testEdgeWindowSpec); const tokenization = baseTokenization.evaluateTransition({ @@ -548,23 +582,18 @@ describe('ContextTokenization', function() { targetTokens ); - assert.includeDeepMembers( - [...tokenization.tail.inputRange], - [...baseTokenization.tokens[baseTokenization.tokens.length - 2].inputRange] - ); - assert.includeDeepMembers( - tokenization.tail.searchModule.inputSequence, - baseTokenization.tokens[baseTokenization.tokens.length - 2].searchModule.inputSequence - ); - - assert.includeDeepMembers( - [...tokenization.tail.inputRange], - [...baseTokenization.tokens[baseTokenization.tokens.length - 1].inputRange] - ); - assert.includeDeepMembers( - tokenization.tail.searchModule.inputSequence, - baseTokenization.tokens[baseTokenization.tokens.length - 1].searchModule.inputSequence + const basePreTail = baseTokenization.tokens[baseTokenization.tokens.length - 2]; + const baseTail = baseTokenization.tail; + assert.equal( + tokenization.tail.searchModule.inputCount, + basePreTail.searchModule.inputCount + baseTail.searchModule.inputCount + 1 /* +1 - incoming transform */ ); + assert.deepEqual((tokenization.tail.searchModule as SearchQuotientSpur).lastInput, [{ sample: inputTransform, p: 1 }]); + assert.equal(tokenization.tail.exampleInput, 'can\'t'); + assert.deepEqual(tokenization.tail.searchModule.bestExample, { + text: basePreTail.searchModule.bestExample.text + baseTail.searchModule.bestExample.text + inputTransform.insert, + p: basePreTail.searchModule.bestExample.p * baseTail.searchModule.bestExample.p * 1 /* prob of input transform */ + }); }); it('handles case that triggers a token split: can\' +. => can, \', .', () => { @@ -619,25 +648,24 @@ describe('ContextTokenization', function() { targetTokens ); - assert.includeDeepMembers( - [...baseTokenization.tail.inputRange], - [...tokenization.tokens[tokenization.tokens.length - 2].inputRange] - ); - assert.includeDeepMembers( - baseTokenization.tail.searchModule.inputSequence, - tokenization.tokens[tokenization.tokens.length - 2].searchModule.inputSequence - ); - - // We've also appended a '.' to the final split-off token. Thus, we need - // to account for that in the assertions below. - assert.includeDeepMembers( - [...baseTokenization.tail.inputRange, { trueTransform: inputTransform, inputStartIndex: 0, bestProbFromSet: 1 }], - [...tokenization.tokens[tokenization.tokens.length - 1].inputRange] - ); - assert.includeDeepMembers( - [...baseTokenization.tail.searchModule.inputSequence, [{sample: { insert: '.', deleteLeft: 0 }, p: 1}]], - tokenization.tokens[tokenization.tokens.length - 1].searchModule.inputSequence + const prepreTail = tokenization.tokens[tokenization.tokens.length - 3]; + const preTail = tokenization.tokens[tokenization.tokens.length - 2]; + const tail = tokenization.tail; + assert.equal( + baseTokenization.tail.searchModule.inputCount, + prepreTail.searchModule.inputCount + preTail.searchModule.inputCount ); + assert.equal(tail.searchModule.inputCount, 1); + // base tokenization did not include the '.' component. + assert.deepEqual((preTail.searchModule as SearchQuotientSpur).lastInput, (baseTokenization.tail.searchModule as SearchQuotientSpur).lastInput); + assert.deepEqual((tail.searchModule as SearchQuotientSpur).lastInput, [{sample: inputTransformMap.get(1), p: 1}]); + assert.equal(prepreTail.exampleInput, 'can'); + assert.equal(preTail.exampleInput, '\''); + assert.equal(tail.exampleInput, '.'); + assert.deepEqual({ + text: prepreTail.searchModule.bestExample.text + preTail.searchModule.bestExample.text, + p: prepreTail.searchModule.bestExample.p * preTail.searchModule.bestExample.p + }, baseTokenization.tail.searchModule.bestExample); }); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts new file mode 100644 index 00000000000..8cf910c0f30 --- /dev/null +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts @@ -0,0 +1,242 @@ +/* + * Keyman is copyright (C) SIL Global. MIT License. + * + * Created by jahorton on 2025-10-29 + * + * This file defines tests for the SearchSpace class of the + * predictive-text correction-search engine. + */ + +import { assert } from 'chai'; + +import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; +import { models, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; + +import TrieModel = models.TrieModel; + +const testModel = new TrieModel(jsonFixture('models/tries/english-1000')); + +export function buildSimplePathSplitFixture() { + const rootPath = new SearchQuotientSpur(testModel); + + const distrib1 = [ + { sample: {insert: 'c', deleteLeft: 0, id: 11}, p: 0.5 }, + { sample: {insert: 'r', deleteLeft: 0, id: 11}, p: 0.4 }, + { sample: {insert: 't', deleteLeft: 0, id: 11}, p: 0.1 } + ]; + const path1 = new SearchQuotientSpur(rootPath, distrib1, distrib1[0].p); + + const distrib2 = [ + { sample: {insert: 'a', deleteLeft: 0, id: 12}, p: 0.7 }, + { sample: {insert: 'e', deleteLeft: 0, id: 12}, p: 0.3 } + ]; + const path2 = new SearchQuotientSpur(path1, distrib2, distrib2[0].p); + + const distrib3 = [ + { sample: {insert: 'n', deleteLeft: 0, id: 13}, p: 0.8 }, + { sample: {insert: 'r', deleteLeft: 0, id: 13}, p: 0.2 } + ]; + const path3 = new SearchQuotientSpur(path2, distrib3, distrib3[0].p); + + const distrib4 = [ + { sample: {insert: 't', deleteLeft: 0, id: 14}, p: 1 } + ]; + const path4 = new SearchQuotientSpur(path3, distrib4, distrib4[0].p); + + return { + paths: [rootPath, path1, path2, path3, path4], + distributions: [distrib1, distrib2, distrib3, distrib4] + }; +} + +describe('SearchPath', () => { + describe('constructor', () => { + it('initializes from a lexical model', () => { + const path = new SearchQuotientSpur(testModel); + assert.equal(path.inputCount, 0); + assert.isNumber(path.spaceId); + assert.deepEqual(path.bestExample, {text: '', p: 1}); + assert.deepEqual(path.parents, []); + assert.isNotOk(path.inputs); + }); + + it('may be extended from root path', () => { + const rootPath = new SearchQuotientSpur(testModel); + + const leadEdgeDistribution = [ + {sample: {insert: 't', deleteLeft: 0, id: 13 }, p: 0.5}, + {sample: {insert: 'a', deleteLeft: 0, id: 13 }, p: 0.3}, + {sample: {insert: 'o', deleteLeft: 0, id: 13 }, p: 0.2} + ]; + + const extendedPath = new SearchQuotientSpur(rootPath, leadEdgeDistribution, leadEdgeDistribution[0].p); + + assert.equal(extendedPath.inputCount, 1); + assert.isNumber(extendedPath.spaceId); + assert.notEqual(extendedPath.spaceId, rootPath.spaceId); + assert.deepEqual(extendedPath.bestExample, {text: 't', p: 0.5}); + assert.deepEqual(extendedPath.parents, [rootPath]); + assert.deepEqual(extendedPath.inputs, leadEdgeDistribution); + + // Assert the root is unchanged. + assert.equal(rootPath.inputCount, 0); + // Should (still) have codepointLength == 0 once it's defined. + assert.deepEqual(rootPath.bestExample, {text: '', p: 1}); + assert.deepEqual(rootPath.parents, []); + assert.isNotOk(rootPath.inputs); + }); + + it('may be built from arbitrary prior SearchPath', () => { + const rootPath = new SearchQuotientSpur(testModel); + + const leadEdgeDistribution = [ + {sample: {insert: 't', deleteLeft: 0, id: 13 }, p: 0.5}, + {sample: {insert: 'a', deleteLeft: 0, id: 13 }, p: 0.3}, + {sample: {insert: 'o', deleteLeft: 0, id: 13 }, p: 0.2} + ]; + const inputClone = leadEdgeDistribution.map(e => ({...e})); + + const length1Path = new SearchQuotientSpur( + rootPath, + leadEdgeDistribution, + leadEdgeDistribution[0].p + ); + + const tailEdgeDistribution = [ + {sample: {insert: 'r', deleteLeft: 0, id: 17 }, p: 0.6}, + {sample: {insert: 'e', deleteLeft: 0, id: 17 }, p: 0.25}, + {sample: {insert: 'h', deleteLeft: 0, id: 17 }, p: 0.15} + ]; + + const length2Path = new SearchQuotientSpur( + length1Path, + tailEdgeDistribution, + tailEdgeDistribution[0].p + ); + + // Verify that the prior distribution remains fully unaltered. + assert.deepEqual(leadEdgeDistribution, inputClone); + + assert.equal(length2Path.inputCount, 2); + assert.isNumber(length2Path.spaceId); + assert.notEqual(length2Path.spaceId, length1Path.spaceId); + assert.deepEqual(length2Path.bestExample, {text: 'tr', p: leadEdgeDistribution[0].p * tailEdgeDistribution[0].p}); + assert.deepEqual(length2Path.parents, [length1Path]); + assert.deepEqual(length2Path.inputs, tailEdgeDistribution); + + assert.equal(length1Path.inputCount, 1); + assert.isNumber(length1Path.spaceId); + assert.notEqual(length1Path.spaceId, rootPath.spaceId); + assert.deepEqual(length1Path.bestExample, {text: 't', p: 0.5}); + assert.deepEqual(length1Path.parents, [rootPath]); + assert.deepEqual(length1Path.inputs, leadEdgeDistribution); + }); + + it('may extend with a Transform inserting multiple codepoints', () => { + const rootPath = new SearchQuotientSpur(testModel); + + const leadEdgeDistribution = [ + {sample: {insert: 't', deleteLeft: 0, id: 13 }, p: 0.5}, + {sample: {insert: 'a', deleteLeft: 0, id: 13 }, p: 0.3}, + {sample: {insert: 'o', deleteLeft: 0, id: 13 }, p: 0.2} + ]; + const inputClone = leadEdgeDistribution.map(e => ({...e})); + + const length1Path = new SearchQuotientSpur( + rootPath, + leadEdgeDistribution, + leadEdgeDistribution[0].p + ); + + const tailEdgeDistribution = [ + {sample: {insert: 'ri', deleteLeft: 0, id: 17 }, p: 0.6}, + {sample: {insert: 'er', deleteLeft: 0, id: 17 }, p: 0.25}, + {sample: {insert: 'hi', deleteLeft: 0, id: 17 }, p: 0.15} + ]; + + const length2Path = new SearchQuotientSpur( + length1Path, + tailEdgeDistribution, + tailEdgeDistribution[0].p + ); + + // Verify that the prior distribution remains fully unaltered. + assert.deepEqual(leadEdgeDistribution, inputClone); + + assert.equal(length2Path.inputCount, 2); + assert.isNumber(length2Path.spaceId); + assert.notEqual(length2Path.spaceId, length1Path.spaceId); + assert.deepEqual(length2Path.bestExample, {text: 'tri', p: leadEdgeDistribution[0].p * tailEdgeDistribution[0].p}); + assert.deepEqual(length2Path.parents, [length1Path]); + assert.deepEqual(length2Path.inputs, tailEdgeDistribution); + + assert.equal(length1Path.inputCount, 1); + assert.isNumber(length1Path.spaceId); + assert.notEqual(length1Path.spaceId, rootPath.spaceId); + assert.deepEqual(length1Path.bestExample, {text: 't', p: 0.5}); + assert.deepEqual(length1Path.parents, [rootPath]); + assert.deepEqual(length1Path.inputs, leadEdgeDistribution); + }); + }); + + describe('fixture construction', () => { + it('setup: buildSimplePathSplitFixture() constructs paths properly', () => { + const { paths, distributions } = buildSimplePathSplitFixture(); + const pathToSplit = paths[4]; + + assert.equal(pathToSplit.inputCount, 4); + assert.equal(distributions.length, pathToSplit.inputCount); + // Per assertions documented in the setup above. + assert.deepEqual(pathToSplit.bestExample, distributions.reduce( + (constructing, current) => ({text: constructing.text + current[0].sample.insert, p: constructing.p * current[0].p}), + {text: '', p: 1}) + ); + assert.deepEqual(pathToSplit.parents[0].bestExample, distributions.slice(0, pathToSplit.inputCount-1).reduce( + (constructing, current) => ({text: constructing.text + current[0].sample.insert, p: constructing.p * current[0].p}), + {text: '', p: 1}) + ); + assert.isTrue(pathToSplit.hasInputs(distributions)); + }); + }); + + describe('hasInputs()', () => { + it('matches an empty array on root SearchPaths', () => { + assert.isTrue(new SearchQuotientSpur(testModel).hasInputs([])); + }); + + it('matches all path inputs when provided in proper order', () => { + const { paths, distributions } = buildSimplePathSplitFixture(); + assert.isTrue(paths[4].hasInputs(distributions)); + }); + + it('does not match when any path input component is missing', () => { + const { paths, distributions } = buildSimplePathSplitFixture(); + assert.isFalse(paths[4].hasInputs(distributions.slice(1))); + assert.isFalse(paths[4].hasInputs(distributions.slice(2))); + assert.isFalse(paths[4].hasInputs(distributions.slice(3))); + assert.isFalse(paths[4].hasInputs(distributions.slice(0, 3))); + assert.isFalse(paths[4].hasInputs(distributions.slice(0, 1).concat(distributions.slice(2)))); + }); + + it('does not match when path inputs are not in proper order', () => { + const { paths, distributions } = buildSimplePathSplitFixture(); + assert.isFalse(paths[4].hasInputs(distributions.slice().reverse())); + + // Random shuffle. + let shuffled: typeof distributions; + let isShuffled: boolean; + do { + shuffled = distributions.slice().sort(() => Math.random() * 2 - 1); + // Validate that we actually shuffled - that we didn't land on the original order! + isShuffled = false; + for(let i = 0; i < distributions.length; i++) { + if(distributions[i] != shuffled[i]) { + isShuffled = true; + break; + } + } + } while(!isShuffled); + assert.isFalse(paths[4].hasInputs(shuffled)); + }); + }); +}); \ No newline at end of file From cf70816b97a5d065ff7a0f75d11edc558126deb8 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Wed, 14 Jan 2026 16:50:13 -0600 Subject: [PATCH 2/5] change(web): update unit-test suite name --- .../correction-search/search-quotient-spur.tests.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts index 8cf910c0f30..55babb6765d 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts @@ -49,7 +49,7 @@ export function buildSimplePathSplitFixture() { }; } -describe('SearchPath', () => { +describe('SearchQuotientSpur', () => { describe('constructor', () => { it('initializes from a lexical model', () => { const path = new SearchQuotientSpur(testModel); From 64791e1277ba3616e83d1a07741c856dcda6aae6 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Thu, 22 Jan 2026 08:40:41 -0600 Subject: [PATCH 3/5] fix(web): adjust formatting nit re: missing else --- .../src/main/correction/search-quotient-spur.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index b877aaf2868..5d00809df6c 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -115,11 +115,14 @@ export class SearchQuotientSpur implements SearchQuotientNode { // Check for equal reference first before the other checks; it makes a nice shortcut. if(x == entry) { return true; - } if(x.p == entry.p && s1.deleteLeft == s2.deleteLeft + } + + if(x.p == entry.p && s1.deleteLeft == s2.deleteLeft && s1.id == s2.id && ((s1.deleteRight ?? 0) == (s2.deleteRight ?? 0)) && s1.insert == s2.insert ) { return true; } + return false; }); From 42fcbc390dd038587af83f1291608192bf89c95f Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Thu, 22 Jan 2026 09:21:05 -0600 Subject: [PATCH 4/5] refactor(web): move unit-testing .hasInputs to standalone, tree-shakable method --- .../main/correction/search-quotient-node.ts | 83 +++++-- .../main/correction/search-quotient-spur.ts | 48 ---- .../worker-thread/src/main/test-index.ts | 1 + .../context/context-token.tests.ts | 205 ++++++++++-------- .../search-quotient-node.tests.ts | 52 +++++ .../search-quotient-spur.tests.ts | 45 +--- 6 files changed, 236 insertions(+), 198 deletions(-) create mode 100644 web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-node.tests.ts diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index 153edbe02c4..22e0b0b22a4 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -13,6 +13,7 @@ import { SearchNode, SearchResult } from "./distance-modeler.js"; import Distribution = LexicalModelTypes.Distribution; import Transform = LexicalModelTypes.Transform; +import { SearchQuotientSpur } from "./search-quotient-spur.js"; let SPACE_ID_SEED = 0; @@ -63,19 +64,6 @@ export interface SearchQuotientNode { */ handleNextNode(): PathResult; - /** - * Denotes whether or not the represented search space includes paths built from - * the specified set of keystroke input distributions. The distribution count - * should match .inputCount - no omissions or extras are permitted. - * - * Designed explicitly for use in unit testing; it's not super-efficient, so - * avoid live use. - * - * @param keystrokeDistributions - * @internal - */ - hasInputs(keystrokeDistributions: Distribution[]): boolean; - /** * Increases the editing range that will be considered for determining * correction distances. @@ -133,4 +121,73 @@ export interface SearchQuotientNode { * the correction-search graph and its paths. */ readonly bestExample: { text: string, p: number }; +} + +/** + * Denotes whether or not the represented search-space quotient path includes + * paths built from the specified set of keystroke input distributions. The + * distribution count should match .inputCount - no omissions or extras are + * permitted. + * + * Designed explicitly for use in unit testing; it's not super-efficient, so + * avoid live use. + * + * @param keystrokeDistributions + * @internal + */ +export function quotientPathHasInputs(node: SearchQuotientNode, keystrokeDistributions: Distribution[]): boolean { + if(!(node instanceof SearchQuotientSpur)) { + for(const p of node.parents) { + if(quotientPathHasInputs(p, keystrokeDistributions)) { + return true; + } + } + return false; + } + + if(node.inputCount == 0) { + return keystrokeDistributions.length == 0; + } else if(keystrokeDistributions.length != node.inputCount) { + return false; + } + + const tailInput = [...keystrokeDistributions[keystrokeDistributions.length - 1]]; + keystrokeDistributions = keystrokeDistributions.slice(0, keystrokeDistributions.length - 1); + const localInput = node.lastInput; + + const parentHasInput = () => !!node.parents.find(p => quotientPathHasInputs(p, keystrokeDistributions)); + + // Actual reference match? Easy mode. + if(localInput == tailInput) { + return parentHasInput(); + } else if(localInput.length != tailInput.length) { + return false; + } else { + for(let entry of tailInput) { + const matchIndex = localInput.findIndex((x) => { + const s1 = x.sample; + const s2 = entry.sample; + // Check for equal reference first before the other checks; it makes a nice shortcut. + if(x == entry) { + return true; + } + + if(x.p == entry.p && s1.deleteLeft == s2.deleteLeft + && s1.id == s2.id && ((s1.deleteRight ?? 0) == (s2.deleteRight ?? 0)) && s1.insert == s2.insert + ) { + return true; + } + + return false; + }); + + if(matchIndex == -1) { + return false; + } else { + tailInput.splice(matchIndex, 1); + } + } + + return parentHasInput(); + } } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index 5d00809df6c..4eeed530e77 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -89,54 +89,6 @@ export class SearchQuotientSpur implements SearchQuotientNode { } } - public hasInputs(keystrokeDistributions: Distribution[]): boolean { - if(this.inputCount == 0) { - return keystrokeDistributions.length == 0; - } else if(keystrokeDistributions.length != this.inputCount) { - return false; - } - - const tailInput = [...keystrokeDistributions[keystrokeDistributions.length - 1]]; - keystrokeDistributions = keystrokeDistributions.slice(0, keystrokeDistributions.length - 1); - const localInput = this.lastInput; - - const parentHasInput = () => !!this.parents.find(p => p.hasInputs(keystrokeDistributions)); - - // Actual reference match? Easy mode. - if(localInput == tailInput) { - return parentHasInput(); - } else if(localInput.length != tailInput.length) { - return false; - } else { - for(let entry of tailInput) { - const matchIndex = localInput.findIndex((x) => { - const s1 = x.sample; - const s2 = entry.sample; - // Check for equal reference first before the other checks; it makes a nice shortcut. - if(x == entry) { - return true; - } - - if(x.p == entry.p && s1.deleteLeft == s2.deleteLeft - && s1.id == s2.id && ((s1.deleteRight ?? 0) == (s2.deleteRight ?? 0)) && s1.insert == s2.insert - ) { - return true; - } - - return false; - }); - - if(matchIndex == -1) { - return false; - } else { - tailInput.splice(matchIndex, 1); - } - } - - return parentHasInput(); - } - } - public get lastInput(): Distribution> { // Shallow-copies the array to prevent external modification; the Transforms // are marked Readonly to prevent their modification as well. diff --git a/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts b/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts index 2c5c0214ada..231cc84815e 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts @@ -5,6 +5,7 @@ export * from './correction/context-tokenization.js'; export { ContextTracker } from './correction/context-tracker.js'; export { ContextTransition } from './correction/context-transition.js'; export * from './correction/distance-modeler.js'; +export * from './correction/search-quotient-node.js'; export * from './correction/search-quotient-spur.js'; export { ExtendedEditOperation, SegmentableDistanceCalculation } from './correction/segmentable-calculation.js'; export * from './correction/tokenization-subsets.js'; diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index 06dc12a0d77..013a1a85ce0 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -14,7 +14,7 @@ import { default as defaultBreaker } from '@keymanapp/models-wordbreakers'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { ContextToken, correction, getBestMatches, models, preprocessInputSources, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; +import { ContextToken, correction, getBestMatches, models, preprocessInputSources, quotientPathHasInputs, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; import Distribution = LexicalModelTypes.Distribution; import ExecutionTimer = correction.ExecutionTimer; @@ -71,7 +71,8 @@ describe('ContextToken', function() { assert.equal(token.exampleInput, 'and'); assert.equal(token.searchModule.inputCount, 3); - assert.isTrue(token.searchModule.hasInputs([ + assert.isTrue(quotientPathHasInputs( + token.searchModule, [ [{sample: { insert: 'a', deleteLeft: 0 }, p: 1}], [{sample: { insert: 'n', deleteLeft: 0 }, p: 1}], [{sample: { insert: 'd', deleteLeft: 0 }, p: 1}] @@ -108,7 +109,8 @@ describe('ContextToken', function() { token2.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); token3.inputRange.forEach((entry) => assert.isTrue(merged.inputRange.indexOf(entry) > -1)); - assert.isTrue(merged.searchModule.hasInputs([ + assert.isTrue(quotientPathHasInputs( + merged.searchModule, [ [{sample: { insert: 'c', deleteLeft: 0 }, p: 1}], [{sample: { insert: 'a', deleteLeft: 0 }, p: 1}], [{sample: { insert: 'n', deleteLeft: 0 }, p: 1}], @@ -205,7 +207,8 @@ describe('ContextToken', function() { const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, "applesandsourgrapes"); assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); - assert.isTrue(merged.searchModule.hasInputs( + assert.isTrue(quotientPathHasInputs( + merged.searchModule, srcTransforms.map((t) => ([{sample: t, p: 1}])) )); }); @@ -266,7 +269,8 @@ describe('ContextToken', function() { const merged = ContextToken.merge(tokensToMerge, plainModel); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) )); - assert.isTrue(merged.searchModule.hasInputs( + assert.isTrue(quotientPathHasInputs( + merged.searchModule, srcTransforms.map((t) => ([{sample: t, p: 1}])) )); }); @@ -300,7 +304,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.sourceText, 'can\''); - tokenToSplit.searchModule.hasInputs(keystrokeDistributions); + assert.isTrue(quotientPathHasInputs(tokenToSplit.searchModule, keystrokeDistributions)); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -317,8 +321,8 @@ describe('ContextToken', function() { assert.equal(resultsOfSplit.length, 2); assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), ['can', '\'']); - assert.isTrue(resultsOfSplit[0].searchModule.hasInputs(keystrokeDistributions.slice(0, 3))); - assert.isTrue(resultsOfSplit[1].searchModule.hasInputs([keystrokeDistributions[3]])); + assert.isTrue(quotientPathHasInputs(resultsOfSplit[0].searchModule, keystrokeDistributions.slice(0, 3))); + assert.isTrue(quotientPathHasInputs(resultsOfSplit[1].searchModule, [keystrokeDistributions[3]])); }); it("handles mid-transform splits correctly", () => { @@ -336,7 +340,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.sourceText, 'biglargetransform'); - assert.isTrue(tokenToSplit.searchModule.hasInputs(keystrokeDistributions)); + assert.isTrue(quotientPathHasInputs(tokenToSplit.searchModule, keystrokeDistributions)); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -365,7 +369,8 @@ describe('ContextToken', function() { }))); for(let i = 0; i < resultsOfSplit.length; i++) { - assert.isTrue(resultsOfSplit[i].searchModule.hasInputs([ + assert.isTrue(quotientPathHasInputs( + resultsOfSplit[i].searchModule, [ [{sample: { insert: splitTextArray[i], deleteLeft: 0, deleteRight: 0 }, p: 1}] ])); } @@ -390,7 +395,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.exampleInput, 'largelongtransforms'); - tokenToSplit.searchModule.hasInputs(keystrokeDistributions); + assert.isTrue(quotientPathHasInputs(tokenToSplit.searchModule, keystrokeDistributions)); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -420,49 +425,55 @@ describe('ContextToken', function() { { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } ]); - assert.isTrue(resultsOfSplit[0].searchModule.hasInputs([ - keystrokeDistributions[0], - keystrokeDistributions[1].map((entry) => { - return { - sample: { - ...entry.sample, - insert: entry.sample.insert.slice(0, 4) // gets the 'arge' portion & the deleteLefts. - }, p: entry.p - } - }), - ])); + assert.isTrue(quotientPathHasInputs( + resultsOfSplit[0].searchModule,[ + keystrokeDistributions[0], + keystrokeDistributions[1].map((entry) => { + return { + sample: { + ...entry.sample, + insert: entry.sample.insert.slice(0, 4) // gets the 'arge' portion & the deleteLefts. + }, p: entry.p + } + }) + ] + )); - assert.isTrue(resultsOfSplit[1].searchModule.hasInputs([ - keystrokeDistributions[1].map((entry) => { - return { - sample: { - ...entry.sample, - insert: entry.sample.insert.slice('arge'.length), - deleteLeft: 0 - }, p: entry.p - } - }), - keystrokeDistributions[2].map((entry) => { - return { - sample: { - ...entry.sample, - insert: entry.sample.insert.slice(0, 'ng'.length), // gets the 'ng' portion. - }, p: entry.p - } - }), - ])); + assert.isTrue(quotientPathHasInputs( + resultsOfSplit[1].searchModule, [ + keystrokeDistributions[1].map((entry) => { + return { + sample: { + ...entry.sample, + insert: entry.sample.insert.slice('arge'.length), + deleteLeft: 0 + }, p: entry.p + } + }), + keystrokeDistributions[2].map((entry) => { + return { + sample: { + ...entry.sample, + insert: entry.sample.insert.slice(0, 'ng'.length), // gets the 'ng' portion. + }, p: entry.p + } + }) + ] + )); - assert.isTrue(resultsOfSplit[2].searchModule.hasInputs([ - keystrokeDistributions[2].map((entry) => { - return { - sample: { - ...entry.sample, - insert: entry.sample.insert.slice('ng'.length), // drops the 'ng' portion. - deleteLeft: 0 - }, p: entry.p - } - }), - ])); + assert.isTrue(quotientPathHasInputs( + resultsOfSplit[2].searchModule, [ + keystrokeDistributions[2].map((entry) => { + return { + sample: { + ...entry.sample, + insert: entry.sample.insert.slice('ng'.length), // drops the 'ng' portion. + deleteLeft: 0 + }, p: entry.p + } + }), + ] + )); }); it("handles messy mid-transform splits correctly - non-BMP text", () => { @@ -484,7 +495,7 @@ describe('ContextToken', function() { }; assert.equal(tokenToSplit.exampleInput, toMathematicalSMP('largelongtransforms')); - tokenToSplit.searchModule.hasInputs(keystrokeDistributions); + assert.isTrue(quotientPathHasInputs(tokenToSplit.searchModule, keystrokeDistributions)); // And now for the "fun" part. const resultsOfSplit = tokenToSplit.split({ @@ -514,49 +525,55 @@ describe('ContextToken', function() { { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 } ]); - assert.isTrue(resultsOfSplit[0].searchModule.hasInputs([ - keystrokeDistributions[0], - keystrokeDistributions[1].map((entry) => { - return { - sample: { - ...entry.sample, - insert: KMWString.substring(entry.sample.insert, 0, 4) // gets the 'arge' portion & the deleteLefts. - }, p: entry.p - } - }), - ])); + assert.isTrue(quotientPathHasInputs( + resultsOfSplit[0].searchModule, [ + keystrokeDistributions[0], + keystrokeDistributions[1].map((entry) => { + return { + sample: { + ...entry.sample, + insert: KMWString.substring(entry.sample.insert, 0, 4) // gets the 'arge' portion & the deleteLefts. + }, p: entry.p + } + }) + ] + )); - assert.isTrue(resultsOfSplit[1].searchModule.hasInputs([ - keystrokeDistributions[1].map((entry) => { - return { - sample: { - ...entry.sample, - insert: KMWString.substring(entry.sample.insert, 'arge'.length), - deleteLeft: 0 - }, p: entry.p - } - }), - keystrokeDistributions[2].map((entry) => { - return { - sample: { - ...entry.sample, - insert: KMWString.substring(entry.sample.insert, 0, 'ng'.length), // gets the 'ng' portion. - }, p: entry.p - } - }), - ])); + assert.isTrue(quotientPathHasInputs( + resultsOfSplit[1].searchModule, [ + keystrokeDistributions[1].map((entry) => { + return { + sample: { + ...entry.sample, + insert: KMWString.substring(entry.sample.insert, 'arge'.length), + deleteLeft: 0 + }, p: entry.p + } + }), + keystrokeDistributions[2].map((entry) => { + return { + sample: { + ...entry.sample, + insert: KMWString.substring(entry.sample.insert, 0, 'ng'.length), // gets the 'ng' portion. + }, p: entry.p + } + }) + ] + )); - assert.isTrue(resultsOfSplit[2].searchModule.hasInputs([ - keystrokeDistributions[2].map((entry) => { - return { - sample: { - ...entry.sample, - insert: KMWString.substring(entry.sample.insert, 'ng'.length), // drops the 'ng' portion. - deleteLeft: 0 - }, p: entry.p - } - }), - ])); + assert.isTrue(quotientPathHasInputs( + resultsOfSplit[2].searchModule, [ + keystrokeDistributions[2].map((entry) => { + return { + sample: { + ...entry.sample, + insert: KMWString.substring(entry.sample.insert, 'ng'.length), // drops the 'ng' portion. + deleteLeft: 0 + }, p: entry.p + } + }) + ] + )); }); }); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-node.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-node.tests.ts new file mode 100644 index 00000000000..fb7e3471bd4 --- /dev/null +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-node.tests.ts @@ -0,0 +1,52 @@ +import { assert } from 'chai'; + +import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; +import { models, quotientPathHasInputs, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; + +import { buildSimplePathSplitFixture } from './search-quotient-spur.tests.js'; + +import TrieModel = models.TrieModel; + +const testModel = new TrieModel(jsonFixture('models/tries/english-1000')); + +describe('quotientNodeHasParents()', () => { + it('matches an empty array on root SearchPaths', () => { + assert.isTrue(quotientPathHasInputs(new SearchQuotientSpur(testModel), [])); + }); + + it('matches all path inputs when provided in proper order', () => { + const { paths, distributions } = buildSimplePathSplitFixture(); + assert.isTrue(quotientPathHasInputs(paths[4], distributions)); + }); + + it('does not match when any path input component is missing', () => { + const { paths, distributions } = buildSimplePathSplitFixture(); + assert.isFalse(quotientPathHasInputs(paths[4], distributions.slice(1))); + assert.isFalse(quotientPathHasInputs(paths[4], distributions.slice(2))); + assert.isFalse(quotientPathHasInputs(paths[4], distributions.slice(3))); + assert.isFalse(quotientPathHasInputs(paths[4], distributions.slice(0, 3))); + assert.isFalse(quotientPathHasInputs(paths[4], distributions.slice(0, 1).concat(distributions.slice(2)))); + }); + + it('does not match when path inputs are not in proper order', () => { + const { paths, distributions } = buildSimplePathSplitFixture(); + assert.isFalse(quotientPathHasInputs(paths[4], distributions.slice().reverse())); + + // Random shuffle. + let shuffled: typeof distributions; + let isShuffled: boolean; + do { + shuffled = distributions.slice().sort(() => Math.random() * 2 - 1); + // Validate that we actually shuffled - that we didn't land on the original order! + isShuffled = false; + for(let i = 0; i < distributions.length; i++) { + if(distributions[i] != shuffled[i]) { + isShuffled = true; + break; + } + } + } while(!isShuffled); + assert.isFalse(quotientPathHasInputs(paths[4], shuffled)); + }); +}); + diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts index 55babb6765d..fd2ac8a15c5 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts @@ -10,7 +10,7 @@ import { assert } from 'chai'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; -import { models, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; +import { models, quotientPathHasInputs, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; import TrieModel = models.TrieModel; @@ -195,48 +195,7 @@ describe('SearchQuotientSpur', () => { (constructing, current) => ({text: constructing.text + current[0].sample.insert, p: constructing.p * current[0].p}), {text: '', p: 1}) ); - assert.isTrue(pathToSplit.hasInputs(distributions)); - }); - }); - - describe('hasInputs()', () => { - it('matches an empty array on root SearchPaths', () => { - assert.isTrue(new SearchQuotientSpur(testModel).hasInputs([])); - }); - - it('matches all path inputs when provided in proper order', () => { - const { paths, distributions } = buildSimplePathSplitFixture(); - assert.isTrue(paths[4].hasInputs(distributions)); - }); - - it('does not match when any path input component is missing', () => { - const { paths, distributions } = buildSimplePathSplitFixture(); - assert.isFalse(paths[4].hasInputs(distributions.slice(1))); - assert.isFalse(paths[4].hasInputs(distributions.slice(2))); - assert.isFalse(paths[4].hasInputs(distributions.slice(3))); - assert.isFalse(paths[4].hasInputs(distributions.slice(0, 3))); - assert.isFalse(paths[4].hasInputs(distributions.slice(0, 1).concat(distributions.slice(2)))); - }); - - it('does not match when path inputs are not in proper order', () => { - const { paths, distributions } = buildSimplePathSplitFixture(); - assert.isFalse(paths[4].hasInputs(distributions.slice().reverse())); - - // Random shuffle. - let shuffled: typeof distributions; - let isShuffled: boolean; - do { - shuffled = distributions.slice().sort(() => Math.random() * 2 - 1); - // Validate that we actually shuffled - that we didn't land on the original order! - isShuffled = false; - for(let i = 0; i < distributions.length; i++) { - if(distributions[i] != shuffled[i]) { - isShuffled = true; - break; - } - } - } while(!isShuffled); - assert.isFalse(paths[4].hasInputs(shuffled)); + assert.isTrue(quotientPathHasInputs(pathToSplit, distributions)); }); }); }); \ No newline at end of file From 2f4089a318cba87205801224f2f25fe40fe048ce Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Thu, 22 Jan 2026 09:35:45 -0600 Subject: [PATCH 5/5] fix(web): better hasInputs handling for root nodes --- .../worker-thread/src/main/correction/search-quotient-node.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index 22e0b0b22a4..2335d6f31f9 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -142,7 +142,8 @@ export function quotientPathHasInputs(node: SearchQuotientNode, keystrokeDistrib return true; } } - return false; + + return node.parents.length == 0 && keystrokeDistributions.length == 0; } if(node.inputCount == 0) {