-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcontext.ts
More file actions
29 lines (24 loc) · 784 Bytes
/
context.ts
File metadata and controls
29 lines (24 loc) · 784 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import type { TokenId } from './vocabulary'
export type TrainingSample = {
contextTokens: TokenId[]
nextToken: TokenId
}
/**
* Creates training samples from a sequence of tokens using a sliding window approach.
*/
export const buildTrainingSamples = (
tokenSequence: TokenId[],
contextWindowSize: number,
): TrainingSample[] => {
const trainingSamples: TrainingSample[] = []
for (let position = 0; position + contextWindowSize < tokenSequence.length; position++) {
const nextToken: TokenId | undefined = tokenSequence[position + contextWindowSize]
if (nextToken !== undefined) {
trainingSamples.push({
contextTokens: tokenSequence.slice(position, position + contextWindowSize),
nextToken,
})
}
}
return trainingSamples
}