-
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathweb.html
More file actions
119 lines (94 loc) · 3.48 KB
/
web.html
File metadata and controls
119 lines (94 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
<!-- Example showing how to use kitoken/web without a bundler. -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Kitoken Web Example</title>
</head>
<body>
<h1>Kitoken Web Example</h1>
<textarea id="input" placeholder="Type your input here.">Let's tokenize this example text!</textarea>
<button id="submit">Tokenize</button>
<div id="output">
<div id="count">Token count: <span>0</span></div>
<div id="tokens"></div>
</div>
<script type="module">
// import from the web entry point "kitoken/web".
// without a bundler: copy web.js and web_bg.wasm from the published package or run the build:web script to generate them.
import init, { Kitoken } from "../dist/web.js";
// init returns a promise resolving to the wasm instance.
// note: only required for the "kitoken/web" export.
const wasm_ = init()
// load your tokenizer definition:
const model_ = fetch('../../../tests/models/llama2.kit').then(res => res.arrayBuffer())
document.addEventListener('DOMContentLoaded', () => {
const input = document.querySelector('#input');
const submit = document.querySelector('#submit');
const count = document.querySelector('#count span');
const output = document.querySelector('#tokens');
let encoder = null
async function tokenize() {
try {
// wait for initialization and loading to complete:
await wasm_;
const model = await model_;
// create the Kitoken instance when it doesn't exist:
if (!encoder) {
encoder = new Kitoken(new Uint8Array(model))
}
const text = input.value;
// call encode or encode_all to tokenize.
const tokens = encoder.encode(text);
console.log("Encoded:", tokens)
count.textContent = tokens.length;
output.textContent = tokens.join(', ')
// call decode or decode_all to detokenize.
const decoded = new TextDecoder().decode(encoder.decode(tokens));
console.log("Decoded:", decoded);
} catch (error) {
console.error(error)
output.textContent = 'Error: ' + error.message;
}
}
submit.addEventListener('click', tokenize);
tokenize();
});
</script>
<style>
* {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
font-size: 14px;
}
body {
width: 600px;
margin: 0 auto;
line-height: 1.5rem;
display: flex;
flex-direction: column;
align-items: stretch;
gap: 8px;
}
#input {
height: 96px;
resize: vertical;
}
#submit {
margin-bottom: 12px;
padding: 2px;
}
#output {
display: flex;
flex-direction: column;
gap: 8px;
}
#count {
opacity: 0.6;
}
#tokens {
font-family: monospace;
white-space: pre-wrap;
}
</style>
</body>
</html>