-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcorrections.py
More file actions
82 lines (73 loc) · 2.49 KB
/
corrections.py
File metadata and controls
82 lines (73 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
import json
import os
import re
import tempfile
MAX_ENTRIES = 200
class CorrectionStore:
def __init__(self, path: str):
self.path = path
self._cache = None # loaded lazily
def _get(self):
if self._cache is None:
self._cache = self.load()
return self._cache
def load(self):
if not os.path.exists(self.path):
return {}
try:
with open(self.path, encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict):
return data
except Exception:
pass
return {}
def save(self, wrong: str, right: str):
wrong = (wrong or "").strip()
right = (right or "").strip()
if not wrong or not right:
return
# Only store if strings differ beyond case
if wrong.lower() == right.lower():
return
# Skip very short strings that are too likely to cause false replacements
if len(wrong.split()) < 2 and len(wrong) < 6:
return
key = wrong.lower()
store = self._get()
# If already stored with same mapping, nothing to do
if store.get(key) == right:
return
store[key] = right
# Evict oldest entries beyond limit
if len(store) > MAX_ENTRIES:
excess = len(store) - MAX_ENTRIES
for old_key in list(store.keys())[:excess]:
del store[old_key]
self._write(store)
def apply(self, text: str) -> str:
if not text:
return text
store = self._get()
if not store:
return text
# Sort by key length descending so longer phrases match first
for key in sorted(store.keys(), key=len, reverse=True):
replacement = store[key]
# Case-insensitive exact word/phrase boundary match
try:
pattern = re.compile(r"(?<!\w)" + re.escape(key) + r"(?!\w)", re.IGNORECASE)
text = pattern.sub(replacement, text)
except re.error:
continue
return text
def _write(self, store):
dir_path = os.path.dirname(self.path) or "."
try:
fd, tmp = tempfile.mkstemp(dir=dir_path, suffix=".tmp")
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(store, f, ensure_ascii=False, indent=2)
os.replace(tmp, self.path)
except Exception:
pass