-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDescription update based on File name.py
More file actions
117 lines (96 loc) · 4.96 KB
/
Description update based on File name.py
File metadata and controls
117 lines (96 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
=========================================================
Description to Filename Matcher for Wikimedia Commons
=========================================================
[ SETUP & USAGE ]
1. Install reqs: pip install pywikibot
2. Login config: Ensure 'user-config.py' is set up in your directory.
3. Purpose : Extracts the filename (without extension) and
replaces the existing English description
(|description={{en|...}}) with that exact filename.
4. Run : Set TARGET_CATEGORIES below, then run the script.
=========================================================
"""
import pywikibot
from pywikibot import pagegenerators
import re
import time
# ================= CONFIGURATION =================
# 1. Target Categories
# Add the exact names of the categories you want to process
TARGET_CATEGORIES = [
'Category:YOUR_FIRST_CATEGORY_NAME_HERE',
'Category:YOUR_SECOND_CATEGORY_NAME_HERE'
]
# 2. Advanced Controls
PROCESS_SUBCATEGORIES = False # Set to True if you want to process folders inside the target category
SLEEP_TIME = 1 # Seconds to pause after each successful edit
DRY_RUN = False # Set to True to test regex replacements without saving to the Wiki
# =================================================
def main():
try:
site = pywikibot.Site('commons', 'commons')
site.login()
print(f"Logged in as: {site.user()}")
except Exception as e:
print(f"Login Failed: {e}")
return
# Loop through each category defined in the configuration
for cat_name in TARGET_CATEGORIES:
print(f"\n[{'DRY RUN' if DRY_RUN else 'LIVE'}] Processing Category: {cat_name}...")
try:
cat = pywikibot.Category(site, cat_name)
gen = pagegenerators.CategorizedPageGenerator(cat, recurse=PROCESS_SUBCATEGORIES)
except pywikibot.exceptions.InvalidTitleError:
print(f"[ERROR] Invalid category name: {cat_name}")
continue
for page in gen:
# Skip if the page is not an actual file (e.g., a subcategory or text page)
if not page.is_filepage():
continue
current_text = page.text
# Extract clean title by removing the namespace (File:) and extension (.svg, .png)
# Example: "File:Noto Serif Sinhala.svg" -> "Noto Serif Sinhala"
file_title_full = page.title(with_ns=False)
clean_title = file_title_full.rsplit('.', 1)[0]
# Regex Explanation:
# Group 1 (\1): Matches the exact string "|description = {{en|" (including spaces)
# Group 2 (\2): Matches the existing content inside (lazy match)
# Group 3 (\3): Matches the closing "}}"
pattern = r'(\|description\s*=\s*\{\{en\|)(.*?)(\}\})'
# Search for the specific Information template pattern
if re.search(pattern, current_text):
# Reconstruct the string: Keep Group 1 + Insert Clean Title + Keep Group 3
new_text = re.sub(pattern, r'\1' + clean_title + r'\3', current_text)
# Only proceed if a modification is actually needed
if new_text != current_text:
if DRY_RUN:
print(f"[-] DRY RUN: Would have updated description of '{file_title_full}' to '{clean_title}'")
continue
print(f"[\u2713] Updating: {file_title_full}")
print(f" New Description: {clean_title}")
page.text = new_text
try:
# Save the changes to the Wiki
edit_summary = f"Updated description to match filename: {clean_title}"
page.save(summary=edit_summary)
# Respect server load
time.sleep(SLEEP_TIME)
except pywikibot.exceptions.LockedPageError:
print(f"[SKIP] Page is locked: {file_title_full}")
except pywikibot.exceptions.OtherPageSaveError as e:
print(f"[ERROR] Saving {file_title_full} failed: {e}")
except Exception as e:
print(f"[ERROR] Generic error on {file_title_full}: {e}")
else:
# Skip if the description is already identical to the filename
print(f"[SKIP] {file_title_full} (Description already matches)")
else:
# Skip if no description template (in English) is found
print(f"[SKIP] {file_title_full} (Pattern |description={{en|...}} not found)")
if DRY_RUN:
print("\n\u2705 DRY RUN complete. No files were modified.")
else:
print("\n\u2705 Batch processing completed across all categories.")
if __name__ == '__main__':
main()