Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ export class ContextToken {
* Contains all relevant correction-search data for use in generating
* corrections for this ContextToken instance.
*/
public get searchSpace(): SearchQuotientSpur {
return this._searchSpace;
public get searchModule(): SearchQuotientSpur {
return this._searchModule;
}
private _searchSpace: SearchQuotientSpur;
private _searchModule: SearchQuotientSpur;

isPartial: boolean;

Expand Down Expand Up @@ -107,7 +107,7 @@ export class ContextToken {
//
// In case we are unable to perfectly track context (say, due to multitaps)
// we need to ensure that only fully-utilized keystrokes are considered.
this._searchSpace = priorToken.searchSpace;
this._searchModule = priorToken.searchModule;
this._inputRange = priorToken._inputRange.slice();

// Preserve any annotated applied-suggestion transition ID data; it's useful
Expand Down Expand Up @@ -138,7 +138,7 @@ export class ContextToken {
searchSpace = searchSpace.addInput([{sample: transform, p: BASE_PROBABILITY}], 1);
});

this._searchSpace = searchSpace;
this._searchModule = searchSpace;
}
}

Expand All @@ -148,7 +148,7 @@ export class ContextToken {
*/
addInput(inputSource: TokenInputSource, distribution: Distribution<Transform>) {
this._inputRange.push(inputSource);
this._searchSpace = this._searchSpace.addInput(distribution, inputSource.bestProbFromSet);
this._searchModule = this._searchModule.addInput(distribution, inputSource.bestProbFromSet);
}

/**
Expand Down Expand Up @@ -201,18 +201,7 @@ export class ContextToken {
* received that can correspond to the current instance.
*/
get exampleInput(): string {
/*
* TODO: with clear limits (strict cost minimization?) / prior calculation
* attempts, return the best _suggestion_ for this token. This is
* especially relevant for epic/dict-breaker - we want to best model the token
* as it would apply within the word-breaking algorithm.
*
* If not possible, find the best of the deepest search paths and append the
* most likely keystroke data afterward.
*/
const transforms = this.searchSpace.inputSequence.map((dist) => dist[0].sample)
const composite = transforms.reduce((accum, current) => buildMergedTransform(accum, current), {insert: '', deleteLeft: 0});
return composite.insert;
return this.searchModule.bestExample.text;
}

/**
Expand Down Expand Up @@ -248,7 +237,7 @@ export class ContextToken {

lastInputDistrib = lastInputDistrib?.map((entry, index) => {
return {
sample: buildMergedTransform(entry.sample, token.searchSpace.inputSequence[0][index].sample),
sample: buildMergedTransform(entry.sample, token.searchModule.inputSequence[0][index].sample),
p: entry.p
}
});
Expand All @@ -267,10 +256,10 @@ export class ContextToken {
// Ignore the last entry for now - it may need to merge with a matching
// entry in the next token!
for(let i = startIndex; i < inputCount - 1; i++) {
resultToken.addInput(token.inputRange[i], token.searchSpace.inputSequence[i]);
resultToken.addInput(token.inputRange[i], token.searchModule.inputSequence[i]);
}
lastSourceInput = token.inputRange[inputCount-1];
lastInputDistrib = token.searchSpace.inputSequence[inputCount-1];
lastInputDistrib = token.searchModule.inputSequence[inputCount-1];
}

resultToken.addInput(lastSourceInput, lastInputDistrib);
Expand Down Expand Up @@ -326,7 +315,7 @@ export class ContextToken {
const totalLenBeforeLastApply = committedLen + lenBeforeLastApply;
// We read the start position for the NEXT token to know the split position.
const extraCharsAdded = splitSpecs[1].textOffset - totalLenBeforeLastApply;
const tokenSequence = overextendedToken.searchSpace.inputSequence;
const tokenSequence = overextendedToken.searchModule.inputSequence;
const lastInputIndex = tokenSequence.length - 1;
const inputDistribution = tokenSequence[lastInputIndex];
const headDistribution = inputDistribution.map((m) => {
Expand Down Expand Up @@ -374,7 +363,7 @@ export class ContextToken {
backupToken = new ContextToken(constructingToken);
lenBeforeLastApply = KMWString.length(currentText.left);
currentText = applyTransform(alteredSources[transformIndex].trueTransform, currentText);
constructingToken.addInput(this.inputRange[transformIndex], this.searchSpace.inputSequence[transformIndex]);
constructingToken.addInput(this.inputRange[transformIndex], this.searchModule.inputSequence[transformIndex]);
transformIndex++;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types';
import { ClassicalDistanceCalculation } from './classical-calculation.js';
import { ExecutionTimer, STANDARD_TIME_BETWEEN_DEFERS } from './execution-timer.js';
import { QUEUE_NODE_COMPARATOR, SearchQuotientSpur } from './search-quotient-spur.js';
import { PathResult } from './search-quotient-node.js';
import { subsetByChar, subsetByInterval, mergeSubset, TransformSubset } from '../transform-subsets.js';

import Distribution = LexicalModelTypes.Distribution;
Expand Down Expand Up @@ -599,23 +600,6 @@ export class SearchResult {
}
}

type NullPath = {
type: 'none'
}

type IntermediateSearchPath = {
type: 'intermediate',
cost: number
}

type CompleteSearchPath = {
type: 'complete',
cost: number,
finalNode: SearchNode
}

export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath;

// Current best guesstimate of how compositor will retrieve ideal corrections.
export async function *getBestMatches(searchSpace: SearchQuotientSpur, timer: ExecutionTimer): AsyncGenerator<SearchResult> {
let currentReturns: {[resultKey: string]: SearchNode} = {};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Keyman is copyright (C) SIL Global. MIT License.
*
* Created by jahorton on 2025-10-09
*
* This file defines the predictive-text engine's SearchSpace class, which is used to
* manage the search-space(s) for text corrections within the engine.
*/

import { SearchNode, SearchResult } from "./distance-modeler.js";

let SPACE_ID_SEED = 0;

export function generateSpaceSeed(): number {
return SPACE_ID_SEED++;
}

type NullPath = {
type: 'none'
}

type IntermediateSearchPath = {
type: 'intermediate',
cost: number
}

type CompleteSearchPath = {
type: 'complete',
cost: number,
finalNode: SearchNode
}

export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath;

/**
* Represents all or a portion of the dynamically-generated graph used to search
* for predictive-text corrections.
*/
export interface SearchQuotientNode {
/**
* Retrieves the lowest-cost / lowest-distance edge from the batcher's search
* area, checks its validity as a correction to the input text, and reports on
* what sort of result the edge's destination node represents.
* @returns
*/
handleNextNode(): PathResult;

/**
* Reports the cost of the lowest-cost / lowest-distance edge held within the
* batcher's search area.
* @returns
*/
readonly currentCost: number;

/**
* Returns the set of previously-processed results under this batcher's domain.
*/
readonly previousResults: SearchResult[];

/**
* When true, this indicates that the currently-represented portion of context
* has fat-finger data available, which itself indicates that the user has
* corrections enabled.
*/
readonly correctionsEnabled: boolean;

/**
* Reports the total number of input keystrokes represented by this
* graph/subgraph.
*
* (Their fat-finger alternates, when provided, do not influence this count -
* they're associated with the original keystroke that affected the context.)
*/
readonly inputCount: number;

/**
* Determines the best example text representable by this batcher's portion of
* the correction-search graph and its paths.
*/
readonly bestExample: { text: string, p: number };
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
* engine.
*/

import { QueueComparator as Comparator, PriorityQueue } from '@keymanapp/web-utils';
import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils';
import { LexicalModelTypes } from '@keymanapp/common-types';

import { EDIT_DISTANCE_COST_SCALE, PathResult, SearchNode, SearchResult } from './distance-modeler.js';
import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
import { PathResult, SearchQuotientNode } from './search-quotient-node.js';

import Distribution = LexicalModelTypes.Distribution;
import LexicalModel = LexicalModelTypes.LexicalModel;
Expand All @@ -25,7 +26,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator<SearchNode> = function(arg1, arg2

// The set of search spaces corresponding to the same 'context' for search.
// Whenever a wordbreak boundary is crossed, a new instance should be made.
export class SearchQuotientSpur {
export class SearchQuotientSpur implements SearchQuotientNode {
private selectionQueue: PriorityQueue<SearchNode> = new PriorityQueue(QUEUE_NODE_COMPARATOR);
private inputs: Distribution<Transform>;

Expand Down Expand Up @@ -114,6 +115,20 @@ export class SearchQuotientSpur {
}
}

public get inputCount(): number {
return (this.parentPath?.inputCount ?? 0) + (this.inputs ? 1 : 0);
}

public get bestExample(): {text: string, p: number} {
const bestPrefix = this.parentPath?.bestExample ?? { text: '', p: 1 };
const bestLocalInput = this.inputs?.reduce((max, curr) => max.p < curr.p ? curr : max) ?? { sample: { insert: '', deleteLeft: 0 }, p: 1};

return {
text: KMWString.substring(bestPrefix.text, 0, KMWString.length(bestPrefix.text) - bestLocalInput.sample.deleteLeft) + bestLocalInput.sample.insert,
p: bestPrefix.p * bestLocalInput.p
}
}

increaseMaxEditDistance() {
this.parentPath.increaseMaxEditDistance();

Expand Down Expand Up @@ -308,7 +323,7 @@ export class SearchQuotientSpur {
};
}

public previousResults(): SearchResult[] {
public get previousResults(): SearchResult[] {
return Object.values(this.returnedValues).map(v => new SearchResult(v));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ export async function correctAndEnumerate(
// Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue.

// The 'eventual' logic will be significantly more complex, though still manageable.
const searchSpace = transition.final.tokenization.tail.searchSpace;
const searchSpace = transition.final.tokenization.tail.searchModule;

// If corrections are not enabled, bypass the correction search aspect
// entirely. No need to 'search' - just do a direct lookup.
Expand Down Expand Up @@ -532,7 +532,7 @@ export async function correctAndEnumerate(
* Worst-case, it's possible to temporarily add normalization if a code deep-dive
* is needed in the future.
*/
if(searchSpace.inputSequence.length <= 1) {
if(searchSpace.inputCount <= 1) {
/* Suppose a key distribution: most likely with p=0.5, second-most with 0.4 - a pretty
* ambiguous case that would only arise very near the center of the boundary between two keys.
* Raising (0.5/0.4)^16 ~= 35.53. (At time of writing, SINGLE_CHAR_KEY_PROB_EXPONENT = 16.)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,9 @@ describe('ContextState', () => {

// The 'wordbreak' transform
let state = newContextMatch?.final;
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence);
assert.sameDeepMembers(
state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence,
state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence,
[[{sample: { insert: '', deleteLeft: 0 }, p: 1}]]
);

Expand Down Expand Up @@ -284,9 +284,9 @@ describe('ContextState', () => {

// The 'wordbreak' transform
let state = newContextMatch?.final;
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence);
assert.deepEqual(
state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence,
state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence,
[[{ sample: {insert: '', deleteLeft: 0}, p: 1 }]]
);

Expand Down Expand Up @@ -337,8 +337,8 @@ describe('ContextState', () => {

// The 'wordbreak' transform
let state = newContextMatch.final;
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence);

// if(!newContextMatch.final.tokenization.alignment.canAlign) {
// assert.fail("context alignment failed");
Expand Down Expand Up @@ -370,9 +370,9 @@ describe('ContextState', () => {

// The 'wordbreak' transform
let state = newContextMatch.final;
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence);
assert.deepEqual(
state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence,
state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence,
[[{sample: {insert: '', deleteLeft: 0}, p: 1}]]
);

Expand Down Expand Up @@ -402,9 +402,9 @@ describe('ContextState', () => {

// The 'wordbreak' transform
let state = newContextMatch.final;
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence);
assert.deepEqual(
state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence,
state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence,
[[{sample: {insert: '', deleteLeft: 0}, p: 1}]]
);

Expand Down Expand Up @@ -434,8 +434,8 @@ describe('ContextState', () => {

// The 'wordbreak' transform
let state = newContextMatch.final;
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputSequence);
assert.isNotEmpty(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputSequence);

// if(!newContextMatch.final.tokenization.alignment.canAlign) {
// assert.fail("context alignment failed");
Expand Down
Loading