-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathai_translator.py
More file actions
executable file
·150 lines (109 loc) · 4.81 KB
/
ai_translator.py
File metadata and controls
executable file
·150 lines (109 loc) · 4.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
import os
import shutil
import pathlib
import openai
class AITranslator:
"""AI Translator class"""
def __init__(self, api_key: str):
"""Initialize the translator with the given API key"""
self.OpenAIClient = openai.OpenAI(api_key=api_key)
with open('cs/vocabulary.txt', 'r', encoding='utf-8') as f:
self.Vocabulary = f.read()
def _prepare_translation_prompt(self, filename: pathlib.Path, previous_translation: str) -> str:
with open(filename, 'r', encoding='utf-8') as f:
content = f.read()
"""Prepare the prompt for the LLM"""
prompt = f"""Translate the Markdown content from English to Czech.
The content is part of technical documentation using Material for MkDocs framework.
The content starts with the markdown header "--- START OF CONTENT ---" and ends with the marker "--- END OF CONTENT ---".
Don't translate or include markers in the output.
Please maintain:
1. All Markdown formatting
2. Code blocks and their language specifiers
3. Material for MkDocs specific features (admonitions, tabs, etc.)
4. Proper Czech technical terminology
5. Don't change any links or references to other files, they must stay in English
6. Don't translate any configuration, JSON, YAML, etc.; the only exception are comments.
Translate the content while preserving all formatting and special features.
Vocabulary:
{self.Vocabulary}
--- START OF CONTENT ---
{content}
--- END OF CONTENT ---
"""
if previous_translation:
prompt += f"""
Previous translation, use it as a reference:
{previous_translation}
"""
return prompt
def translate_content(self, filename: pathlib.Path, previous_translation: str) -> str:
"""Translate content using OpenAI API"""
prompt = self._prepare_translation_prompt(filename, previous_translation)
try:
# Create the file attachment
response = self.OpenAIClient.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a professional technical translator specializing in software documentation."},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=4000
)
translation = response.choices[0].message.content
return translation
except Exception as e:
print(f"Error during translation: {e}")
raise
def process_file(self, source_path: pathlib.Path, target_path: pathlib.Path):
"""Process a single markdown file"""
print(f"Processing {source_path}...")
if os.path.exists(target_path):
previous_translation = open(target_path, 'r', encoding='utf-8').read()
else:
previous_translation = None
# Translate content
translated_content = self.translate_content(source_path, previous_translation)
# Ensure target directory exists
target_path.parent.mkdir(parents=True, exist_ok=True)
# Write translated content
with open(target_path, 'w', encoding='utf-8') as f:
f.write(translated_content)
def process_directory(self, source_dir: pathlib.Path, target_dir: pathlib.Path):
"""Process all markdown files in the directory"""
source_dir = pathlib.Path(source_dir)
target_dir = pathlib.Path(target_dir)
# Create target directory if it doesn't exist
target_dir.mkdir(parents=True, exist_ok=True)
files = list(source_dir.iterdir())
files.sort(key=lambda x: (not x.name.endswith('index.md'), x))
dirs = []
# Process all markdown files
for source_path in files:
if source_path.is_dir():
dirs.append(source_path)
continue
if source_path.suffix == '.md':
rel_path = source_path.relative_to(source_dir)
target_path = target_dir / rel_path
self.process_file(source_path, target_path)
else:
rel_path = source_path.relative_to(source_dir)
target_path = target_dir / rel_path
target_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source_path, target_path)
for source_path in dirs:
rel_path = source_path.relative_to(source_dir)
target_path = target_dir / rel_path
self.process_directory(source_path, target_path)
def main():
api_key = os.environ['OPENAI_API_KEY']
translator = AITranslator(api_key)
source_dir = pathlib.Path('docs')
target_dir = pathlib.Path('cs/docs')
translator.process_directory(source_dir, target_dir)
print("Translation completed successfully!")
if __name__ == '__main__':
main()