-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix_mojibake.py
More file actions
78 lines (64 loc) · 2.26 KB
/
fix_mojibake.py
File metadata and controls
78 lines (64 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Fix mojibake characters in HTML files
Replace Windows-1252 mojibake with correct UTF-8 Spanish characters
"""
import os
from pathlib import Path
# Root path
root_path = Path(r"c:\Users\ablma\Documents\Cerezo\Web\condadodecastilla")
# Mojibake to correct character mapping
replacements = {
'Ã': 'í', # i with acute
'ó': 'ó', # o with acute
'á': 'á', # a with acute
'é': 'é', # e with acute
'ú': 'ú', # u with acute
'ñ': 'ñ', # n with tilde
'ü': 'ü', # u with diaeresis
'Ã': 'Í', # I with acute
'Ã"': 'Ó', # O with acute
'Ã': 'Á', # A with acute
'É': 'É', # E with acute
'Ú': 'Ú', # U with acute
'Ã'': 'Ñ', # N with tilde
'Ü': 'Ü', # U with diaeresis
'¿': '¿', # inverted question mark
'¡': '¡', # inverted exclamation mark
}
# Find all HTML files
html_files = []
for html_file in root_path.rglob("*.html"):
# Skip backup directory and condadodecastilla.com
if "_encoding_backup" not in str(html_file) and "condadodecastilla.com" not in str(html_file):
html_files.append(html_file)
total_files = len(html_files)
fixed_count = 0
unchanged_count = 0
print(f"\nFixing mojibake in {total_files} HTML files...\n")
for file_path in html_files:
relative_path = file_path.relative_to(root_path)
try:
# Read file as UTF-8
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
original_content = content
# Apply all replacements
for mojibake, correct in replacements.items():
content = content.replace(mojibake, correct)
# Only write if content changed
if content != original_content:
# Write back as UTF-8
with open(file_path, 'w', encoding='utf-8', newline='\r\n') as f:
f.write(content)
fixed_count += 1
print(f"[FIXED] {relative_path}")
else:
unchanged_count += 1
except Exception as e:
print(f"[ERROR] {relative_path}: {e}")
print(f"\n=== Fix Summary ===")
print(f"Total files: {total_files}")
print(f"Fixed: {fixed_count}")
print(f"Unchanged: {unchanged_count}")