Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { applyTransform, buildMergedTransform } from "@keymanapp/models-template
import { LexicalModelTypes } from '@keymanapp/common-types';
import { deepCopy, KMWString } from "@keymanapp/web-utils";

import { SearchQuotientNode, TokenInputSource } from "./search-quotient-node.js";
import { SearchQuotientNode, PathInputProperties } from "./search-quotient-node.js";
import { TokenSplitMap } from "./context-tokenization.js";
import { LegacyQuotientSpur } from "./legacy-quotient-spur.js";
import { LegacyQuotientRoot } from "./legacy-quotient-root.js";
Expand Down Expand Up @@ -107,9 +107,12 @@ export class ContextToken {
let searchModule: SearchQuotientNode = new LegacyQuotientRoot(model);
const BASE_PROBABILITY = 1;
textToCharTransforms(rawText).forEach((transform) => {
let inputMetadata: TokenInputSource = {
trueTransform: transform,
inputStartIndex: 0,
let inputMetadata: PathInputProperties = {
segment: {
trueTransform: transform,
start: 0,
transitionId: undefined
},
bestProbFromSet: BASE_PROBABILITY
};
searchModule = new LegacyQuotientSpur(searchModule, [{sample: transform, p: BASE_PROBABILITY}], inputMetadata);
Expand All @@ -123,7 +126,7 @@ export class ContextToken {
* Call this to record the original keystroke Transforms for the context range
* corresponding to this token.
*/
addInput(inputSource: TokenInputSource, distribution: Distribution<Transform>) {
addInput(inputSource: PathInputProperties, distribution: Distribution<Transform>) {
this._searchModule = new LegacyQuotientSpur(this._searchModule, distribution, inputSource);
}

Expand All @@ -142,8 +145,8 @@ export class ContextToken {
* Denotes the original keystroke Transforms comprising the range corresponding
* to this token.
*/
get inputRange(): TokenInputSource[] {
return this.searchModule.sourceIdentifiers;
get inputSegments() {
return this.searchModule.inputSegments;
}

/**
Expand All @@ -161,9 +164,9 @@ export class ContextToken {
get sourceRangeKey(): string {
const components: string[] = [];

for(const source of this.inputRange) {
const i = source.inputStartIndex;
components.push(`T${source.trueTransform.id}${i != 0 ? '@' + i : ''}`);
for(const source of this.inputSegments) {
const i = source.segment.start;
components.push(`T${source.segment.transitionId}${i != 0 ? `@${i}` : ''}`);
}

return components.join('+');
Expand All @@ -189,7 +192,7 @@ export class ContextToken {
// Thus, we don't set the .isWhitespace flag field.
const resultToken = new ContextToken(lexicalModel);

let lastSourceInput: TokenInputSource;
let lastSourceInput: PathInputProperties;
let lastInputDistrib: Distribution<Transform>;
for(const token of tokensToMerge) {
const inputCount = token.inputCount;
Expand All @@ -200,7 +203,7 @@ export class ContextToken {
}

// Are we re-merging on a previously split transform?
if(lastSourceInput?.trueTransform != token.inputRange[0].trueTransform) {
if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) {
if(lastSourceInput) {
resultToken.addInput(lastSourceInput, lastInputDistrib);
} // else: there's nothing to add as input
Expand Down Expand Up @@ -229,9 +232,9 @@ export class ContextToken {
// Ignore the last entry for now - it may need to merge with a matching
// entry in the next token!
for(let i = startIndex; i < inputCount - 1; i++) {
resultToken.addInput(token.inputRange[i], token.searchModule.inputSequence[i]);
resultToken.addInput(token.inputSegments[i], token.searchModule.inputSequence[i]);
}
lastSourceInput = token.inputRange[inputCount-1];
lastSourceInput = token.inputSegments[inputCount-1];
lastInputDistrib = token.searchModule.inputSequence[inputCount-1];
}

Expand All @@ -254,7 +257,7 @@ export class ContextToken {

// Build an alternate version of the transforms: if we preprocess all deleteLefts,
// what text remains from each?
const alteredSources = preprocessInputSources(this.inputRange);
const alteredSources = preprocessInputSources(this.inputSegments);

const blankContext = { left: '', startOfBuffer: true, endOfBuffer: true };
const splitSpecs = split.matches.slice();
Expand Down Expand Up @@ -310,15 +313,17 @@ export class ContextToken {
};
});

const priorSourceInput = overextendedToken.inputRange[lastInputIndex];
const priorSourceInput = overextendedToken.inputSegments[lastInputIndex];
constructingToken.addInput(priorSourceInput, headDistribution);
tokensFromSplit.push(constructingToken);

constructingToken = new ContextToken(lexicalModel);
backupToken = new ContextToken(constructingToken);
constructingToken.addInput({
trueTransform: priorSourceInput.trueTransform,
inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded,
segment: {
...priorSourceInput.segment,
start: priorSourceInput.segment.start + extraCharsAdded
},
bestProbFromSet: priorSourceInput.bestProbFromSet
}, tailDistribution);

Expand All @@ -335,34 +340,34 @@ export class ContextToken {

backupToken = new ContextToken(constructingToken);
lenBeforeLastApply = KMWString.length(currentText.left);
currentText = applyTransform(alteredSources[transformIndex].trueTransform, currentText);
constructingToken.addInput(this.inputRange[transformIndex], this.searchModule.inputSequence[transformIndex]);
currentText = applyTransform(alteredSources[transformIndex].segment.trueTransform, currentText);
constructingToken.addInput(this.inputSegments[transformIndex], this.searchModule.inputSequence[transformIndex]);
transformIndex++;
}

return tokensFromSplit;
}
}

export function preprocessInputSources(inputSources: ReadonlyArray<TokenInputSource>) {
export function preprocessInputSources(inputSources: ReadonlyArray<PathInputProperties>) {
const alteredSources = deepCopy(inputSources);
let trickledDeleteLeft = 0;
for(let i = alteredSources.length - 1; i >= 0; i--) {
const source = alteredSources[i];
if(trickledDeleteLeft) {
const insLen = KMWString.length(source.trueTransform.insert);
const insLen = KMWString.length(source.segment.trueTransform.insert);
if(insLen <= trickledDeleteLeft) {
source.trueTransform.insert = '';
source.segment.trueTransform.insert = '';
trickledDeleteLeft -= insLen;
} else {
source.trueTransform.insert = KMWString.substring(source.trueTransform.insert, 0, insLen - trickledDeleteLeft);
source.segment.trueTransform.insert = KMWString.substring(source.segment.trueTransform.insert, 0, insLen - trickledDeleteLeft);
trickledDeleteLeft = 0;
}
}
trickledDeleteLeft += source.trueTransform.deleteLeft;
source.trueTransform.deleteLeft = 0;
trickledDeleteLeft += source.segment.trueTransform.deleteLeft;
source.segment.trueTransform.deleteLeft = 0;
}

alteredSources[0].trueTransform.deleteLeft = trickledDeleteLeft;
alteredSources[0].segment.trueTransform.deleteLeft = trickledDeleteLeft;
return alteredSources;
}
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,14 @@ export class ContextTokenization {
if(affectedToken.inputCount == 0 && distribution[0].sample.deleteLeft != 0) {
distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p }));
}
affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength, bestProbFromSet}, distribution);
affectedToken.addInput({
segment: {
trueTransform: sourceInput,
transitionId: sourceInput.id,
start: appliedLength
},
bestProbFromSet: bestProbFromSet
}, distribution);
appliedLength += KMWString.length(distribution[0].sample.insert);

const tokenize = determineModelTokenizer(lexicalModel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import { LexicalModelTypes } from '@keymanapp/common-types';

import { SearchNode } from './distance-modeler.js';
import { PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js';
import { PathResult, SearchQuotientNode, PathInputProperties } from './search-quotient-node.js';
import { SearchQuotientSpur } from './search-quotient-spur.js';

import Distribution = LexicalModelTypes.Distribution;
Expand All @@ -28,7 +28,7 @@ export class LegacyQuotientSpur extends SearchQuotientSpur {
* @param inputs
* @param bestProbFromSet
*/
constructor(space: SearchQuotientNode, inputs: Distribution<Transform>, inputSource: TokenInputSource | ProbabilityMass<Transform>) {
constructor(space: SearchQuotientNode, inputs: Distribution<Transform>, inputSource: PathInputProperties | ProbabilityMass<Transform>) {
super(space, inputs, inputSource);
this.queueNodes(this.buildEdgesForNodes(space.previousResults.map(r => r.node)));
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,38 @@ type CompleteSearchPath = {

export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath;

/**
* Models the properties and portion of an input event applied by a SearchSpace for
* correction-search purposes.
*/
export interface TokenInputSource {
export interface InputSegment {
/**
* The Transform corresponding to the keystroke applied to the true context
* for this input event.
*
* NOTE: outside of use for .sourceText / .likeliestSourceText, the only part
* that should actually be referenced is the Transform / transition ID.
* @deprecated Slated for removal within epic/autocorrect.
*/
trueTransform: Transform;

/**
* The transform / transition ID of the corresponding input event.
*/
transitionId: number | undefined,

/**
* Marks the initial index (inclusive) within the insert strings for the
* corresponding transitions' Transforms that is applied by the corresponding
* corresponding transitions' Transforms that are applied by the corresponding
* tokenized correction-search input.
*/
inputStartIndex: number;
start: number
}

/**
* Models the properties and portion of an input event applied by a SearchSpace for
* correction-search purposes.
*/
export interface PathInputProperties {
/**
* Denotes the portion of the ongoing input stream represented by the corresponding
* input distribution(s) of a SearchSpace.
*/
segment: InputSegment;

/**
* Notes the highest probability found in the input event's transform
Expand Down Expand Up @@ -154,8 +166,10 @@ export interface SearchQuotientNode {
/**
* Gets components useful for building a string-based representation of the
* keystroke range corrected by this search space.
*
* TODO: will return only the `inputSegment` part of each entry in the future.
*/
readonly sourceIdentifiers: TokenInputSource[];
readonly inputSegments: PathInputProperties[];
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import { LexicalModelTypes } from '@keymanapp/common-types';

import { SearchNode, SearchResult } from './distance-modeler.js';
import { generateSpaceSeed, PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js';
import { generateSpaceSeed, PathInputProperties, PathResult, SearchQuotientNode } from './search-quotient-node.js';

import LexicalModel = LexicalModelTypes.LexicalModel;

Expand Down Expand Up @@ -90,7 +90,7 @@ export class SearchQuotientRoot implements SearchQuotientNode {
}
}

get sourceIdentifiers(): TokenInputSource[] {
get inputSegments(): PathInputProperties[] {
return [];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keyman
import { LexicalModelTypes } from '@keymanapp/common-types';

import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
import { generateSpaceSeed, PathResult, SearchQuotientNode, TokenInputSource } from './search-quotient-node.js';
import { generateSpaceSeed, PathResult, SearchQuotientNode, PathInputProperties } from './search-quotient-node.js';

import Distribution = LexicalModelTypes.Distribution;
import ProbabilityMass = LexicalModelTypes.ProbabilityMass;
Expand All @@ -27,7 +27,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator<SearchNode> = function(arg1, arg2
export abstract class SearchQuotientSpur implements SearchQuotientNode {
private selectionQueue: PriorityQueue<SearchNode> = new PriorityQueue(QUEUE_NODE_COMPARATOR);
readonly inputs?: Distribution<Transform>;
readonly inputSource?: TokenInputSource;
readonly inputSource?: PathInputProperties;

private parentNode: SearchQuotientNode;
readonly spaceId: number;
Expand Down Expand Up @@ -61,23 +61,26 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode {
constructor(
parentNode: SearchQuotientNode,
inputs: Distribution<Readonly<Transform>>,
inputSource: TokenInputSource | ProbabilityMass<Transform>
inputSource: PathInputProperties | ProbabilityMass<Transform>
) {
this.spaceId = generateSpaceSeed();

// Coerce inputSource to TokenInputSource format.
if(inputSource && (inputSource as TokenInputSource).trueTransform == undefined) {
if(inputSource && (inputSource as ProbabilityMass<Transform>).sample != undefined) {
const keystroke = inputSource as ProbabilityMass<Transform>;
inputSource = {
trueTransform: keystroke.sample,
bestProbFromSet: keystroke.p,
inputStartIndex: 0
segment: {
trueTransform: keystroke.sample,
transitionId: keystroke.sample.id,
start: 0
},
bestProbFromSet: keystroke.p
}
};
const inputSrc = inputSource as TokenInputSource;
const inputSrc = inputSource as PathInputProperties;

const transitionId = (inputs?.[0].sample.id);
if(transitionId !== undefined && inputSrc?.trueTransform.id != transitionId) {
if(transitionId !== undefined && inputSrc?.segment.transitionId != transitionId) {
throw new Error("Input distribution and input-source transition IDs must match");
}

Expand Down Expand Up @@ -249,15 +252,15 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode {
return Object.values(this.returnedValues ?? {}).map(v => new SearchResult(v));
}

public get sourceIdentifiers(): TokenInputSource[] {
public get inputSegments(): PathInputProperties[] {
if(!this.parentNode) {
return [];
}

const parentSources = this.parentNode.sourceIdentifiers;
const parentSources = this.parentNode.inputSegments;
if(this.inputSource) {
const inputId = this.inputSource.trueTransform.id;
if(inputId !== undefined && parentSources.length > 0 && parentSources[parentSources.length - 1].trueTransform.id == inputId) {
const inputId = this.inputSource.segment.transitionId;
if(inputId !== undefined && parentSources.length > 0 && parentSources[parentSources.length - 1].segment.transitionId == inputId) {
return parentSources;
}

Expand Down
Loading