From 8a165d24b4b7dca3553b7887bdca631084b05dbb Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Wed, 22 Oct 2025 15:50:28 -0500 Subject: [PATCH 1/3] change(web): remove duplicate correction-search path filtering Upon investigation into the code being removed, this mostly triggered whenever an empty transform appeared in the input - during a context reset, at initialization, or when a deadkey is typed. (Deadkeys aren't sent to the pred-text engine.) This is generally not a common case within the engine, and there exists other filtering that helps prevent duplicating search results. Suppose a token that begins tagged with an empty transform. When the search performs an 'insertion' to better lookup words down a lexical path, the 'insertion' is identical whether before or after an empty transform. Similarly, insertions after a deletion appear no different than substitutions (or the same insertion before the deletion, then the deletion itself). The code likely didn't gain us much, as it likely carried some performance overhead - it built a large object that required memory and lookup time that involved constructing and processing strings for hashing. Build-bot: skip build:web Test-bot: skip --- .../src/main/correction/distance-modeler.ts | 64 ++++---- .../main/correction/search-quotient-spur.ts | 20 +-- .../distance-modeler.tests.ts | 139 +++++++++++++++--- 3 files changed, 153 insertions(+), 70 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts index fdaf0b4a28f..c2e55f953bb 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts @@ -13,6 +13,7 @@ import Distribution = LexicalModelTypes.Distribution; import LexiconTraversal = LexicalModelTypes.LexiconTraversal; import ProbabilityMass = LexicalModelTypes.ProbabilityMass; import Transform = LexicalModelTypes.Transform; +import TransformUtils from '#./transformUtils.js'; type RealizedInput = ProbabilityMass[]; // NOT Distribution - they're masses from separate distributions. @@ -136,9 +137,15 @@ export class SearchNode { */ readonly spaceId: number; + /** + * Notes if the most recent edit processed in the node's represented search path + * was not a deletion. Root nodes are considered to meet this condition. + */ + readonly nonDeletion: boolean; + constructor(rootTraversal: LexiconTraversal, spaceId: number, toKey?: (arg0: string) => string); - constructor(node: SearchNode, spaceId?: number); - constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, toKey?: ((arg0: string) => string)) { + constructor(node: SearchNode, spaceId?: number, isSubstitution?: boolean); + constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, param2?: boolean | ((arg0: string) => string)) { if(param1 instanceof SearchNode) { const priorNode = param1; @@ -149,6 +156,8 @@ export class SearchNode { this.priorInput = priorNode.priorInput.slice(0); this.matchedTraversals = priorNode.matchedTraversals.slice(); + this.nonDeletion = param2 as boolean; + // Do NOT copy over _inputCost; this is a helper-constructor for methods // building new nodes... which will have a different cost. delete this._inputCost; @@ -160,8 +169,10 @@ export class SearchNode { this.calculation = new ClassicalDistanceCalculation(); this.matchedTraversals = [param1]; this.priorInput = []; + const toKey = param2 as ((arg0: string) => string); this.toKey = toKey || (x => x); this.spaceId = spaceId; + this.nonDeletion = true; } } @@ -250,12 +261,28 @@ export class SearchNode { throw new Error("Invalid state: will not take new input while still processing Transform subset"); } + // Do not create insertion nodes after an empty transform; only before. + // "Before" and "after" are identical for empty transforms - why duplicate? + // + // Also, do not create insertion nodes directly after deletions; that's + // essentially a substitution. We'll have an equivalent edge already built + // with higher-accuracy modeling. + // + // Following previous insertions is fine, as is following a proper + // match/substitution. + if(this.priorInput.length > 0) { + const priorInput = this.priorInput[this.priorInput.length - 1].sample; + if(TransformUtils.isEmpty(priorInput) || !this.nonDeletion) { + return []; + } + } + let edges: SearchNode[] = []; for(let lexicalChild of this.currentTraversal.children()) { let childCalc = this.calculation.addMatchChar(lexicalChild.char); - let searchChild = new SearchNode(this); + let searchChild = new SearchNode(this, this.spaceId, true); searchChild.calculation = childCalc; searchChild.priorInput = this.priorInput; searchChild.matchedTraversals.push(lexicalChild.traversal()); @@ -367,7 +394,7 @@ export class SearchNode { keySet.add(char); - const node = new SearchNode(this); + const node = new SearchNode(this, this.spaceId, this.nonDeletion); node.calculation = calculation; node.partialEdge.subsetSubindex++; // Append the newly-processed char to the subset's `insert` string. @@ -415,7 +442,7 @@ export class SearchNode { // `insert` string. childSubset.insert += SENTINEL_CODE_UNIT; - const node = new SearchNode(this); + const node = new SearchNode(this, this.spaceId, this.nonDeletion); node.calculation = childCalc; node.matchedTraversals.push(childTraversal); node.partialEdge.subsetSubindex++; @@ -464,7 +491,7 @@ export class SearchNode { continue; } - const node = new SearchNode(this, edgeId); + const node = new SearchNode(this, edgeId, isSubstitution); node.calculation = edgeCalc; node.partialEdge = { doSubsetMatching: isSubstitution, @@ -514,31 +541,6 @@ export class SearchNode { return this.setupSubsetProcessing(dist, true, edgeId); } - /** - * A key uniquely identifying identical search path nodes. Replacement of a keystroke's - * text in a manner that results in identical path to a different keystroke should result - * in both path nodes sharing the same pathKey value. - * - * This mostly matters after re-use of a SearchSpace when a token is extended; children of - * completed paths are possible, and so children can be re-built in such a case. - */ - get pathKey(): string { - // Note: deleteLefts apply before inserts, so order them that way. - let inputString = this.priorInput.map((value) => '-' + value.sample.deleteLeft + '+' + value.sample.insert).join(''); - const partialEdge = this.partialEdge; - // Make sure the subset progress contributes to the key! - if(partialEdge) { - const subset = partialEdge.transformSubset; - // We need a copy of at least one insert string in the subset here. Without that, all subsets - // at the same level of the search, against the same match, look identical - not cool! - inputString += `-${subset.entries[0].sample.deleteLeft}+<${subset.key},${partialEdge.subsetSubindex},${subset.entries[0].sample.insert}>`; - } - let matchString = this.calculation.matchSequence.join(''); - - // TODO: might should also track diagonalWidth. - return inputString + ' @@ ' + matchString; - } - /** * A key uniquely identifying identical match sequences. Reaching the same net result * by different paths should result in identical `resultKey` values. diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index 6f0e75c1a58..ee37a0f7938 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -52,16 +52,6 @@ export class SearchQuotientSpur implements SearchQuotientNode { */ public returnedValues?: {[resultKey: string]: SearchNode} = {}; // TODO: make it private again! - /** - * Acts as a Map that prevents duplicating a correction-search path if reached - * more than once. - */ - protected get processedEdgeSet(): {[pathKey: string]: boolean} { - return this._processedEdgeSet; - } - - private _processedEdgeSet?: {[pathKey: string]: boolean} = {}; - /** * Provides a heuristic for the base cost at each depth if the best * individual input were taken at that level. @@ -245,7 +235,7 @@ export class SearchQuotientSpur implements SearchQuotientNode { const result = this.parentPath.handleNextNode(); - if(result.type == 'complete' && !this.processedEdgeSet[result.finalNode.pathKey]) { + if(result.type == 'complete') { this.addEdgesForNodes(result.finalNode); } @@ -262,14 +252,6 @@ export class SearchQuotientSpur implements SearchQuotientNode { cost: currentNode.currentCost } - // Have we already processed a matching edge? If so, skip it. - // We already know the previous edge is of lower cost. - if(this.processedEdgeSet[currentNode.pathKey]) { - return unmatchedResult; - } else { - this.processedEdgeSet[currentNode.pathKey] = true; - } - // Stage 1: filter out nodes/edges we want to prune // Forbid a raw edit-distance of greater than 2. diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts index abf9402f701..80e8a3b475c 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts @@ -337,31 +337,130 @@ describe('Correction Distance Modeler', () => { }); }); - // Consider adding more, deeper? - it('builds insertion edges based on lexicon, from root', () => { - const rootTraversal = testModel.traverseFromRoot(); - assert.isNotEmpty(rootTraversal); + describe('buildInsertionEdges()', () => { + it('builds insertion edges from (empty) root', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); - const rootSeed = SEARCH_EDGE_SEED++; - const rootNode = new correction.SearchNode(rootTraversal, rootSeed); - assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); - const edges = rootNode.buildInsertionEdges(); - assert.isAbove(edges.length, 0); + const edges = rootNode.buildInsertionEdges(); + assert.isAbove(edges.length, 0); - let expectedChildCount = 0; - for(const child of rootTraversal.children()) { - expectedChildCount++; + let expectedChildCount = 0; + for(const child of rootTraversal.children()) { + expectedChildCount++; - const childEdge = edges.filter(value => value.calculation.lastMatchEntry == child.char)[0]; - assert.isOk(childEdge); - assert.isEmpty(childEdge.priorInput); - assert.isEmpty(childEdge.calculation.inputSequence); - assert.isAbove(childEdge.currentCost, 0); - assert.equal(childEdge.spaceId, rootSeed); - } + const childEdge = edges.filter(value => value.calculation.lastMatchEntry == child.char)[0]; + assert.isOk(childEdge); + assert.isEmpty(childEdge.priorInput); + assert.isEmpty(childEdge.calculation.inputSequence); + assert.isAbove(childEdge.currentCost, 0); + assert.equal(childEdge.spaceId, rootSeed); + } - assert.equal(edges.length, expectedChildCount); + assert.equal(edges.length, expectedChildCount); + }); + + it('builds insertion edges directly after prior insertion', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + + const edges = rootNode.buildInsertionEdges(); + assert.isAbove(edges.length, 0); + + const firstChild = edges[0]; + const childEdges = firstChild.buildInsertionEdges(); + + let expectedChildCount = 0; + for(const child of firstChild.currentTraversal.children()) { + expectedChildCount++; + + const childEdge = edges.filter(value => value.calculation.lastMatchEntry == child.char)[0]; + assert.isOk(childEdge); + assert.isEmpty(childEdge.priorInput); + assert.isEmpty(childEdge.calculation.inputSequence); + assert.isAbove(childEdge.currentCost, 0); + assert.equal(childEdge.spaceId, rootSeed); + } + + assert.equal(childEdges.length, expectedChildCount); + }); + + it('does not build insertion edges after a deletion', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + + const edges = rootNode.buildDeletionEdges([{ + sample: { insert: 'd', deleteLeft: 0 }, + p: 1 + }], SEARCH_EDGE_SEED++).flatMap(n => n.processSubsetEdge()); + assert.isAbove(edges.length, 0); + const firstChild = edges[0]; + + const insertEdgesAfterDelete = firstChild.buildInsertionEdges(); + assert.equal(insertEdgesAfterDelete.length, 0); + }); + + it('does not build insertion edges after receiving an empty transform as input', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + + const edges = rootNode.buildSubstitutionEdges([{ + // intentionally empty! + sample: { insert: '', deleteLeft: 0 }, + p: 1 + }], SEARCH_EDGE_SEED++).flatMap(n => n.processSubsetEdge()); + assert.isAbove(edges.length, 0); + const firstChild = edges[0]; + const insertsAfterEmpty = firstChild.buildInsertionEdges(); + + assert.equal(insertsAfterEmpty.length, 0); + }); + + it('builds insertion edges after a substitution', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + + const subSeed = SEARCH_EDGE_SEED++; + const edges = rootNode.buildSubstitutionEdges([{ + sample: { insert: 't', deleteLeft: 0 }, + p: 1 + }], subSeed).flatMap(n => n.processSubsetEdge()); + assert.isAbove(edges.length, 0); + const firstChild = edges[0]; + const insertsAfterInput = firstChild.buildInsertionEdges(); + + let expectedChildCount = 0; + for(const child of firstChild.currentTraversal.children()) { + expectedChildCount++; + + const childEdge = edges.filter(value => value.calculation.lastMatchEntry == child.char)[0]; + assert.isOk(childEdge); + assert.equal(childEdge.priorInput.length, 1); + assert.equal(childEdge.spaceId, subSeed); + } + + assert.equal(insertsAfterInput.length, expectedChildCount); + }); }); describe('buildDeletionEdges @ token root', () => { From cf862439d54155b52bf3a18ab269c3a74b4e09b1 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Tue, 6 Jan 2026 11:49:50 -0600 Subject: [PATCH 2/3] change(web): track SearchNode last edit-edge type --- .../src/main/correction/distance-modeler.ts | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts index c2e55f953bb..ad744f9a590 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts @@ -34,6 +34,13 @@ enum TimedTaskTypes { CORRECTING = 2 } +enum PathEdge { + ROOT = 'root', + INSERTION = 'insertion', + DELETION = 'deletion', + SUBSTITUTION = 'substitution' +} + /** * This type models a partially-processed subset of Transforms to be processed * as a batch due to sharing similar properties. @@ -138,14 +145,14 @@ export class SearchNode { readonly spaceId: number; /** - * Notes if the most recent edit processed in the node's represented search path - * was not a deletion. Root nodes are considered to meet this condition. + * Notes the edit operation used for the most recent edge in the node's + * represented search path. */ - readonly nonDeletion: boolean; + private readonly lastEdgeType: PathEdge; constructor(rootTraversal: LexiconTraversal, spaceId: number, toKey?: (arg0: string) => string); - constructor(node: SearchNode, spaceId?: number, isSubstitution?: boolean); - constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, param2?: boolean | ((arg0: string) => string)) { + constructor(node: SearchNode, spaceId?: number, edgeType?: PathEdge); + constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, param2?: PathEdge | ((arg0: string) => string)) { if(param1 instanceof SearchNode) { const priorNode = param1; @@ -156,7 +163,7 @@ export class SearchNode { this.priorInput = priorNode.priorInput.slice(0); this.matchedTraversals = priorNode.matchedTraversals.slice(); - this.nonDeletion = param2 as boolean; + this.lastEdgeType = param2 as PathEdge; // Do NOT copy over _inputCost; this is a helper-constructor for methods // building new nodes... which will have a different cost. @@ -172,7 +179,7 @@ export class SearchNode { const toKey = param2 as ((arg0: string) => string); this.toKey = toKey || (x => x); this.spaceId = spaceId; - this.nonDeletion = true; + this.lastEdgeType = PathEdge.ROOT; } } @@ -272,7 +279,7 @@ export class SearchNode { // match/substitution. if(this.priorInput.length > 0) { const priorInput = this.priorInput[this.priorInput.length - 1].sample; - if(TransformUtils.isEmpty(priorInput) || !this.nonDeletion) { + if(TransformUtils.isEmpty(priorInput) || this.lastEdgeType == PathEdge.DELETION) { return []; } } @@ -282,7 +289,7 @@ export class SearchNode { for(let lexicalChild of this.currentTraversal.children()) { let childCalc = this.calculation.addMatchChar(lexicalChild.char); - let searchChild = new SearchNode(this, this.spaceId, true); + let searchChild = new SearchNode(this, this.spaceId, PathEdge.INSERTION); searchChild.calculation = childCalc; searchChild.priorInput = this.priorInput; searchChild.matchedTraversals.push(lexicalChild.traversal()); @@ -394,7 +401,7 @@ export class SearchNode { keySet.add(char); - const node = new SearchNode(this, this.spaceId, this.nonDeletion); + const node = new SearchNode(this, this.spaceId, this.lastEdgeType); node.calculation = calculation; node.partialEdge.subsetSubindex++; // Append the newly-processed char to the subset's `insert` string. @@ -442,7 +449,7 @@ export class SearchNode { // `insert` string. childSubset.insert += SENTINEL_CODE_UNIT; - const node = new SearchNode(this, this.spaceId, this.nonDeletion); + const node = new SearchNode(this, this.spaceId, this.lastEdgeType); node.calculation = childCalc; node.matchedTraversals.push(childTraversal); node.partialEdge.subsetSubindex++; @@ -491,7 +498,7 @@ export class SearchNode { continue; } - const node = new SearchNode(this, edgeId, isSubstitution); + const node = new SearchNode(this, edgeId, isSubstitution ? PathEdge.SUBSTITUTION : PathEdge.DELETION); node.calculation = edgeCalc; node.partialEdge = { doSubsetMatching: isSubstitution, From 4f98dbff7fb91a490142ec8f15a2cc09ceeff176 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Tue, 6 Jan 2026 12:00:54 -0600 Subject: [PATCH 3/3] change(web): avoid main-code internal-hash imports --- .../worker-thread/src/main/correction/distance-modeler.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts index ad744f9a590..53c735397e2 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts @@ -8,12 +8,12 @@ import { ExecutionTimer, STANDARD_TIME_BETWEEN_DEFERS } from './execution-timer. import { QUEUE_NODE_COMPARATOR, SearchQuotientSpur } from './search-quotient-spur.js'; import { PathResult } from './search-quotient-node.js'; import { subsetByChar, subsetByInterval, mergeSubset, TransformSubset } from '../transform-subsets.js'; +import TransformUtils from '../transformUtils.js'; import Distribution = LexicalModelTypes.Distribution; import LexiconTraversal = LexicalModelTypes.LexiconTraversal; import ProbabilityMass = LexicalModelTypes.ProbabilityMass; import Transform = LexicalModelTypes.Transform; -import TransformUtils from '#./transformUtils.js'; type RealizedInput = ProbabilityMass[]; // NOT Distribution - they're masses from separate distributions.