-
Notifications
You must be signed in to change notification settings - Fork 37
Expand file tree
/
Copy pathtest_pdf2zh.py
More file actions
54 lines (43 loc) · 1.15 KB
/
test_pdf2zh.py
File metadata and controls
54 lines (43 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import json
from pdf2zh import translate
from pdf2zh.config import ConfigManager
from pdf2zh.doclayout import OnnxModel
# 加载配置
with open('pdf2zh_config.json', 'r') as f:
config = json.load(f)
# 应用配置
for key, value in config.items():
if key not in ['models', 'fonts']:
ConfigManager.set(key, value)
# 设置模型
model_path = config['models']['doclayout_path']
model = OnnxModel(model_path)
# 设置字体
font_path = config['fonts']['zh']
ConfigManager.set("NOTO_FONT_PATH", font_path)
# service = silicon
# envs ={
# "SILICON_API_KEY": "自己的api-key",
# "SILICON_MODEL": "Qwen/Qwen2.5-7B-Instruct"
# }
# service = ollama
# envs ={
# "OLLAMA_HOST": "http://127.0.0.1:11434",
# "OLLAMA_MODEL": "deepseek-r1:1.5b"
# }
# service = google/bing
envs = {}
# 假设用户输入的页码(1基)
user_pages = [11, 12, 13]
params = {
"model": model,
"lang_in": "en",
"lang_out": "zh",
"service": "bing",
"thread": 4,
"vfont": font_path,
"envs": envs,
# 自动转换为0基页码
"pages": [p-1 for p in user_pages]
}
(file_mono, file_dual) = translate(files=["test.pdf"], **params)[0]