diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts index fdaf0b4a28f..53c735397e2 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts @@ -8,6 +8,7 @@ import { ExecutionTimer, STANDARD_TIME_BETWEEN_DEFERS } from './execution-timer. import { QUEUE_NODE_COMPARATOR, SearchQuotientSpur } from './search-quotient-spur.js'; import { PathResult } from './search-quotient-node.js'; import { subsetByChar, subsetByInterval, mergeSubset, TransformSubset } from '../transform-subsets.js'; +import TransformUtils from '../transformUtils.js'; import Distribution = LexicalModelTypes.Distribution; import LexiconTraversal = LexicalModelTypes.LexiconTraversal; @@ -33,6 +34,13 @@ enum TimedTaskTypes { CORRECTING = 2 } +enum PathEdge { + ROOT = 'root', + INSERTION = 'insertion', + DELETION = 'deletion', + SUBSTITUTION = 'substitution' +} + /** * This type models a partially-processed subset of Transforms to be processed * as a batch due to sharing similar properties. @@ -136,9 +144,15 @@ export class SearchNode { */ readonly spaceId: number; + /** + * Notes the edit operation used for the most recent edge in the node's + * represented search path. + */ + private readonly lastEdgeType: PathEdge; + constructor(rootTraversal: LexiconTraversal, spaceId: number, toKey?: (arg0: string) => string); - constructor(node: SearchNode, spaceId?: number); - constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, toKey?: ((arg0: string) => string)) { + constructor(node: SearchNode, spaceId?: number, edgeType?: PathEdge); + constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, param2?: PathEdge | ((arg0: string) => string)) { if(param1 instanceof SearchNode) { const priorNode = param1; @@ -149,6 +163,8 @@ export class SearchNode { this.priorInput = priorNode.priorInput.slice(0); this.matchedTraversals = priorNode.matchedTraversals.slice(); + this.lastEdgeType = param2 as PathEdge; + // Do NOT copy over _inputCost; this is a helper-constructor for methods // building new nodes... which will have a different cost. delete this._inputCost; @@ -160,8 +176,10 @@ export class SearchNode { this.calculation = new ClassicalDistanceCalculation(); this.matchedTraversals = [param1]; this.priorInput = []; + const toKey = param2 as ((arg0: string) => string); this.toKey = toKey || (x => x); this.spaceId = spaceId; + this.lastEdgeType = PathEdge.ROOT; } } @@ -250,12 +268,28 @@ export class SearchNode { throw new Error("Invalid state: will not take new input while still processing Transform subset"); } + // Do not create insertion nodes after an empty transform; only before. + // "Before" and "after" are identical for empty transforms - why duplicate? + // + // Also, do not create insertion nodes directly after deletions; that's + // essentially a substitution. We'll have an equivalent edge already built + // with higher-accuracy modeling. + // + // Following previous insertions is fine, as is following a proper + // match/substitution. + if(this.priorInput.length > 0) { + const priorInput = this.priorInput[this.priorInput.length - 1].sample; + if(TransformUtils.isEmpty(priorInput) || this.lastEdgeType == PathEdge.DELETION) { + return []; + } + } + let edges: SearchNode[] = []; for(let lexicalChild of this.currentTraversal.children()) { let childCalc = this.calculation.addMatchChar(lexicalChild.char); - let searchChild = new SearchNode(this); + let searchChild = new SearchNode(this, this.spaceId, PathEdge.INSERTION); searchChild.calculation = childCalc; searchChild.priorInput = this.priorInput; searchChild.matchedTraversals.push(lexicalChild.traversal()); @@ -367,7 +401,7 @@ export class SearchNode { keySet.add(char); - const node = new SearchNode(this); + const node = new SearchNode(this, this.spaceId, this.lastEdgeType); node.calculation = calculation; node.partialEdge.subsetSubindex++; // Append the newly-processed char to the subset's `insert` string. @@ -415,7 +449,7 @@ export class SearchNode { // `insert` string. childSubset.insert += SENTINEL_CODE_UNIT; - const node = new SearchNode(this); + const node = new SearchNode(this, this.spaceId, this.lastEdgeType); node.calculation = childCalc; node.matchedTraversals.push(childTraversal); node.partialEdge.subsetSubindex++; @@ -464,7 +498,7 @@ export class SearchNode { continue; } - const node = new SearchNode(this, edgeId); + const node = new SearchNode(this, edgeId, isSubstitution ? PathEdge.SUBSTITUTION : PathEdge.DELETION); node.calculation = edgeCalc; node.partialEdge = { doSubsetMatching: isSubstitution, @@ -514,31 +548,6 @@ export class SearchNode { return this.setupSubsetProcessing(dist, true, edgeId); } - /** - * A key uniquely identifying identical search path nodes. Replacement of a keystroke's - * text in a manner that results in identical path to a different keystroke should result - * in both path nodes sharing the same pathKey value. - * - * This mostly matters after re-use of a SearchSpace when a token is extended; children of - * completed paths are possible, and so children can be re-built in such a case. - */ - get pathKey(): string { - // Note: deleteLefts apply before inserts, so order them that way. - let inputString = this.priorInput.map((value) => '-' + value.sample.deleteLeft + '+' + value.sample.insert).join(''); - const partialEdge = this.partialEdge; - // Make sure the subset progress contributes to the key! - if(partialEdge) { - const subset = partialEdge.transformSubset; - // We need a copy of at least one insert string in the subset here. Without that, all subsets - // at the same level of the search, against the same match, look identical - not cool! - inputString += `-${subset.entries[0].sample.deleteLeft}+<${subset.key},${partialEdge.subsetSubindex},${subset.entries[0].sample.insert}>`; - } - let matchString = this.calculation.matchSequence.join(''); - - // TODO: might should also track diagonalWidth. - return inputString + ' @@ ' + matchString; - } - /** * A key uniquely identifying identical match sequences. Reaching the same net result * by different paths should result in identical `resultKey` values. diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index 6f0e75c1a58..ee37a0f7938 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -52,16 +52,6 @@ export class SearchQuotientSpur implements SearchQuotientNode { */ public returnedValues?: {[resultKey: string]: SearchNode} = {}; // TODO: make it private again! - /** - * Acts as a Map that prevents duplicating a correction-search path if reached - * more than once. - */ - protected get processedEdgeSet(): {[pathKey: string]: boolean} { - return this._processedEdgeSet; - } - - private _processedEdgeSet?: {[pathKey: string]: boolean} = {}; - /** * Provides a heuristic for the base cost at each depth if the best * individual input were taken at that level. @@ -245,7 +235,7 @@ export class SearchQuotientSpur implements SearchQuotientNode { const result = this.parentPath.handleNextNode(); - if(result.type == 'complete' && !this.processedEdgeSet[result.finalNode.pathKey]) { + if(result.type == 'complete') { this.addEdgesForNodes(result.finalNode); } @@ -262,14 +252,6 @@ export class SearchQuotientSpur implements SearchQuotientNode { cost: currentNode.currentCost } - // Have we already processed a matching edge? If so, skip it. - // We already know the previous edge is of lower cost. - if(this.processedEdgeSet[currentNode.pathKey]) { - return unmatchedResult; - } else { - this.processedEdgeSet[currentNode.pathKey] = true; - } - // Stage 1: filter out nodes/edges we want to prune // Forbid a raw edit-distance of greater than 2. diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts index d3aca5662f3..a3a7c8db016 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts @@ -341,31 +341,130 @@ describe('Correction Distance Modeler', () => { }); }); - // Consider adding more, deeper? - it('builds insertion edges based on lexicon, from root', () => { - const rootTraversal = testModel.traverseFromRoot(); - assert.isNotEmpty(rootTraversal); + describe('buildInsertionEdges()', () => { + it('builds insertion edges from (empty) root', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); - const rootSeed = SEARCH_EDGE_SEED++; - const rootNode = new correction.SearchNode(rootTraversal, rootSeed); - assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); - const edges = rootNode.buildInsertionEdges(); - assert.isAbove(edges.length, 0); + const edges = rootNode.buildInsertionEdges(); + assert.isAbove(edges.length, 0); - let expectedChildCount = 0; - for(const child of rootTraversal.children()) { - expectedChildCount++; + let expectedChildCount = 0; + for(const child of rootTraversal.children()) { + expectedChildCount++; - let childEdge = edges.filter(value => lastEntry(value.calculation.matchSequence) == child.char)[0]; - assert.isOk(childEdge); - assert.isEmpty(childEdge.priorInput); - assert.isEmpty(childEdge.calculation.inputSequence); - assert.isAbove(childEdge.currentCost, 0); - assert.equal(childEdge.spaceId, rootSeed); - } + let childEdge = edges.filter(value => lastEntry(value.calculation.matchSequence) == child.char)[0]; + assert.isOk(childEdge); + assert.isEmpty(childEdge.priorInput); + assert.isEmpty(childEdge.calculation.inputSequence); + assert.isAbove(childEdge.currentCost, 0); + assert.equal(childEdge.spaceId, rootSeed); + } - assert.equal(edges.length, expectedChildCount); + assert.equal(edges.length, expectedChildCount); + }); + + it('builds insertion edges directly after prior insertion', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + + const edges = rootNode.buildInsertionEdges(); + assert.isAbove(edges.length, 0); + + const firstChild = edges[0]; + const childEdges = firstChild.buildInsertionEdges(); + + let expectedChildCount = 0; + for(const child of firstChild.currentTraversal.children()) { + expectedChildCount++; + + let childEdge = edges.filter(value => lastEntry(value.calculation.matchSequence) == child.char)[0]; + assert.isOk(childEdge); + assert.isEmpty(childEdge.priorInput); + assert.isEmpty(childEdge.calculation.inputSequence); + assert.isAbove(childEdge.currentCost, 0); + assert.equal(childEdge.spaceId, rootSeed); + } + + assert.equal(childEdges.length, expectedChildCount); + }); + + it('does not build insertion edges after a deletion', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + + const edges = rootNode.buildDeletionEdges([{ + sample: { insert: 'd', deleteLeft: 0 }, + p: 1 + }], SEARCH_EDGE_SEED++).flatMap(n => n.processSubsetEdge()); + assert.isAbove(edges.length, 0); + const firstChild = edges[0]; + + const insertEdgesAfterDelete = firstChild.buildInsertionEdges(); + assert.equal(insertEdgesAfterDelete.length, 0); + }); + + it('does not build insertion edges after receiving an empty transform as input', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + + const edges = rootNode.buildSubstitutionEdges([{ + // intentionally empty! + sample: { insert: '', deleteLeft: 0 }, + p: 1 + }], SEARCH_EDGE_SEED++).flatMap(n => n.processSubsetEdge()); + assert.isAbove(edges.length, 0); + const firstChild = edges[0]; + const insertsAfterEmpty = firstChild.buildInsertionEdges(); + + assert.equal(insertsAfterEmpty.length, 0); + }); + + it('builds insertion edges after a substitution', () => { + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const rootSeed = SEARCH_EDGE_SEED++; + const rootNode = new correction.SearchNode(rootTraversal, rootSeed); + assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0); + + const subSeed = SEARCH_EDGE_SEED++; + const edges = rootNode.buildSubstitutionEdges([{ + sample: { insert: 't', deleteLeft: 0 }, + p: 1 + }], subSeed).flatMap(n => n.processSubsetEdge()); + assert.isAbove(edges.length, 0); + const firstChild = edges[0]; + const insertsAfterInput = firstChild.buildInsertionEdges(); + + let expectedChildCount = 0; + for(const child of firstChild.currentTraversal.children()) { + expectedChildCount++; + + let childEdge = edges.filter(value => lastEntry(value.calculation.matchSequence) == child.char)[0]; + assert.isOk(childEdge); + assert.equal(childEdge.priorInput.length, 1); + assert.equal(childEdge.spaceId, subSeed); + } + + assert.equal(insertsAfterInput.length, expectedChildCount); + }); }); describe('buildDeletionEdges @ token root', () => {