-
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest.py
More file actions
48 lines (37 loc) · 1.17 KB
/
test.py
File metadata and controls
48 lines (37 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from time import time
from kitoken import Kitoken
print(Kitoken)
encoder = Kitoken.from_file("../../tests/models/sentencepiece/llama2.model")
print(encoder)
en = encoder.encode("hello world!", True)
print(en)
de = encoder.decode(en)
print(de.decode("utf-8"))
assert de.decode("utf-8") == "hello world!"
text = open("../../benches/data/wagahai.txt", "rb").read().decode("utf-8")
now = time()
for i in range(100):
_ = encoder.encode(text, True)
print(f"100 iterations in {(time() - now) * 1000:.3f}ms")
definition = encoder.definition()
print(definition["meta"])
encoder.set_definition(definition)
conf = encoder.config()
print(conf)
encoder.set_config(conf)
mult = encoder.encode_all(["hello world!", "hello world!"], True)
demu = encoder.decode_all(mult)
assert demu[0].decode("utf-8") == "hello world!"
assert demu[1].decode("utf-8") == "hello world!"
t = encoder.encode("Kitoken. Tokenize Everything!", True)
print(t)
print(encoder.decode(t).decode("utf-8"))
print(encoder.decode_all([[x] for x in t]))
encoder.to_bytes()
print("OK")
try:
encoder = Kitoken.from_web("hf:Qwen/Qwen3.5-9B")
print(encoder)
print("Web OK")
except Exception as e:
print(f"Web ERR: {e}")