Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ export class ContextState {
*/
analyzeTransition(
context: Context,
transformDistribution?: Distribution<Transform>,
transformDistribution: Distribution<Transform>,
// overrides checks for token substitution that can fail for large applied suggestions.
isApplyingSuggestion?: boolean
): ContextTransition {
Expand Down Expand Up @@ -245,8 +245,11 @@ export class ContextState {
// and then fold all resulting search spaces (on the final token) into one.
const tokenizationAnalysis = trueInputSubset.pendingSet.get(baseTokenization);

// Determine the best probability from among ALL available inputs, before they're split
// into subsets.
const bestProb = transformDistribution.reduce((best, curr) => Math.max(best, curr.p), 0);
// Should gain one per subsetBuilder.subsets entry.
const resultTokenization = baseTokenization.evaluateTransition(tokenizationAnalysis, lexicalModel, trueInput);
const resultTokenization = baseTokenization.evaluateTransition(tokenizationAnalysis, lexicalModel, trueInput, bestProb);

// ------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import Transform = LexicalModelTypes.Transform;
export interface TokenInputSource {
trueTransform: Transform;
inputStartIndex: number;
bestProbFromSet: number;
}

/**
Expand Down Expand Up @@ -123,15 +124,15 @@ export class ContextToken {
rawText ||= '';

// Supports the old pathway for: updateWithBackspace(tokenText: string, transformId: number)
const rawTransformDistributions: Distribution<Transform>[] = textToCharTransforms(rawText).map(function(transform) {
return [{sample: transform, p: 1.0}];
});
rawTransformDistributions.forEach((entry) => {
// Build a token that represents the current text with no ambiguity - probability at max (1.0)
const BASE_PROBABILITY = 1;
textToCharTransforms(rawText).forEach((transform) => {
this._inputRange.push({
trueTransform: entry[0].sample,
inputStartIndex: 0
trueTransform: transform,
inputStartIndex: 0,
bestProbFromSet: BASE_PROBABILITY
});
this.searchSpace.addInput(entry);
this.searchSpace.addInput([{sample: transform, p: BASE_PROBABILITY}], 1);
});
}
}
Expand All @@ -142,7 +143,7 @@ export class ContextToken {
*/
addInput(inputSource: TokenInputSource, distribution: Distribution<Transform>) {
this._inputRange.push(inputSource);
this.searchSpace.addInput(distribution);
this.searchSpace.addInput(distribution, inputSource.bestProbFromSet);
}

/**
Expand Down Expand Up @@ -350,7 +351,8 @@ export class ContextToken {
backupToken = new ContextToken(constructingToken);
constructingToken.addInput({
trueTransform: priorSourceInput.trueTransform,
inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded
inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded,
bestProbFromSet: priorSourceInput.bestProbFromSet
}, tailDistribution);

const lenToCommit = lenBeforeLastApply + extraCharsAdded;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,17 @@ export class ContextTokenization {
* @param lexicalModel The active lexical model
* @param sourceInput The Transform associated with the keystroke triggering
* the transition.
* @param bestProbFromSet The probability of the single most likely input
* transform in the overall transformDistribution associated with the
* keystroke triggering the transition. It need not be represented by the
* pendingTokenization to be built.
* @returns
*/
evaluateTransition(
pendingTokenization: PendingTokenization,
lexicalModel: LexicalModel,
sourceInput: Transform
sourceInput: Transform,
bestProbFromSet: number
): ContextTokenization {
const { alignment: alignment, inputs } = pendingTokenization;
const sliceIndex = alignment.edgeWindow.sliceIndex;
Expand Down Expand Up @@ -581,7 +586,7 @@ export class ContextTokenization {
if(affectedToken.inputRange.length == 0 && distribution[0].sample.deleteLeft != 0) {
distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p }));
}
affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength}, distribution);
affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength, bestProbFromSet}, distribution);
appliedLength += KMWString.length(distribution[0].sample.insert);

const tokenize = determineModelTokenizer(lexicalModel);
Expand Down
Loading