-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtrain.js
More file actions
60 lines (46 loc) · 1.72 KB
/
train.js
File metadata and controls
60 lines (46 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
let tokenize = require("./tokenize.js");
let fs = require("fs");
function train(data) {
let model = {};
let totalTokens = 0;
const totalMessages = Array.isArray(data) ? data.length : 0;
if (totalMessages === 0) {
console.log("[!] No training data provided.");
}
let processedMessages = 0;
function parseTokens(array, context) {
let contextLayer = context < array.length - 1 ? context : array.length - 1;
let tokenNumber = array.length - contextLayer;
if (contextLayer > 0) {
for (let i = 0; i < tokenNumber; i++) {
let contextString = array.slice(i, i + contextLayer).join("");
let afterString = array[i + contextLayer];
if (!model[contextString]) {
model[contextString] = {};
}
if (!model[contextString][afterString]) {
model[contextString][afterString] = 1;
} else {
model[contextString][afterString]++;
}
}
}
}
for (const message of data) {
if (message.length > 100000) continue;
let sentence = tokenize(message);
totalTokens += sentence.length;
let wordMaxContext = Math.min(sentence.length - 1, 5);
if (wordMaxContext > 0) {
for (let i = 1; i <= wordMaxContext; i++) {
parseTokens(sentence, i);
}
}
processedMessages++;
const pct = totalMessages > 0 ? Math.round((processedMessages / totalMessages) * 100) : 100;
console.log(`[.] Training progress: ${processedMessages}/${totalMessages} (${pct}%)`);
}
console.log(`[+] Training successful, ${totalTokens} tokens generated and trained upon.`);
fs.writeFileSync("model.json", JSON.stringify(model), "utf8");
}
module.exports = train;