-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
26 lines (19 loc) · 905 Bytes
/
test.py
File metadata and controls
26 lines (19 loc) · 905 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from mbr.Chunker import Chunker
from mbr.Compiler import Compiler
from mbr.Logger import Logger
import pickle
import glob
if __name__ == "__main__":
with open("test/output.log", mode="w", encoding="utf-8") as log_file:
log = Logger(log_file)
with open("test/texte_ref.txt", mode="r", encoding="utf-8") as f:
chunks = Chunker(
Compiler.load("test/rulesets/tokenizer.mbr"), # On charge le tokenizer
[Compiler.load(file) for file in glob.glob("test/rulesets/chunker_pass_?.mbr")], # On charge les différentes passes de règles
f.read(),
log
)
with open("test/out.xml", mode="w", encoding="utf-8") as out:
out.write(chunks.to_xml())
with open("test/tokens.tsv", mode="w", encoding="utf-8") as out:
out.write(chunks.list_tokens())