diff --git a/scripts/delete_ngu.py b/scripts/delete_ngu.py new file mode 100644 index 0000000000..8ca88a22cb --- /dev/null +++ b/scripts/delete_ngu.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python3 +"""This script can be used to delete and undelete pages en masse. + +Of course, you will need an admin account on the relevant wiki. + +These command line parameters can be used to specify which pages to work on: + +¶ms; + +Furthermore, the following command line parameters are supported: + +-always Don't prompt to delete pages, just do it. + +-summary:XYZ Set the summary message text for the edit to XYZ. + +-undelete Actually undelete pages instead of deleting. Obviously + makes sense only with -page and -file. + +-isorphan Alert if there are pages that link to page to be deleted + (check 'What links here'). By default it is active and + only the summary per namespace is be given. If given as + ``-isorphan:n``, n pages per namespace will be shown. If + given as ``-isorphan:0``, only the summary per namespace + will be shown. If given as ``-isorphan:n``, with n < 0, + the option is disabled. This option is disregarded if + ``-always`` is set. + +-orphansonly: Specified namespaces. Separate multiple namespace numbers + or names with commas. Examples: + + .. code:: shell + + -orphansonly:0,2,4 + -orphansonly:Help,MediaWiki + + Note that Main ns can be indicated either with a 0 or a ',': + + .. code:: shell + + -orphansonly:0,1 + -orphansonly:,Talk + +Usage: + + python pwb.py delete [-category categoryName] + +Examples +-------- + +Delete everything in the category "To delete" without prompting: + + python pwb.py delete -cat:"To delete" -always +""" +# +# (C) Pywikibot team, 2013-2024 +# +# Distributed under the terms of the MIT license. +# +from __future__ import annotations + +import collections + +import pywikibot +from pywikibot import i18n, pagegenerators +from pywikibot.backports import DefaultDict +from pywikibot.bot import CurrentPageBot +from pywikibot.page import Page +from pywikibot.site import Namespace +from pywikibot.tools.itertools import islice_with_ellipsis + + +# This is required for the text that is shown when you run this script +# with the parameter -help. +docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816 + +RefTable = DefaultDict[Namespace, Page] + + +class PageWithRefs(Page): + + """A subclass of Page with convenience methods for reference checking. + + Supports the same interface as Page, with some added methods. + """ + + def __init__(self, source, title: str = '', ns=0) -> None: + """Initializer.""" + super().__init__(source, title, ns) + _cache_attrs = list(super()._cache_attrs) + _cache_attrs = tuple(_cache_attrs + ['_ref_table']) + + def get_ref_table(self, *args, **kwargs) -> RefTable: + """Build mapping table with pages which links the current page.""" + ref_table = collections.defaultdict(list) + for page in self.getReferences(*args, **kwargs): + ref_table[page.namespace()].append(page) + return ref_table + + @property + def ref_table(self) -> RefTable: + """Build link reference table lazily. + + This property gives a default table without any parameter set for + getReferences(), whereas self.get_ref_table() is able to accept + parameters. + """ + if not hasattr(self, '_ref_table'): + self._ref_table = self.get_ref_table() + return self._ref_table + + def namespaces_with_ref_to_page(self, namespaces=None) -> set[Namespace]: + """Check if current page has links from pages in namepaces. + + If namespaces is None, all namespaces are checked. + Returns a set with namespaces where a ref to page is present. + + :param namespaces: Namespace to check + :type namespaces: iterable of Namespace objects + """ + if namespaces is None: + namespaces = self.site.namespaces() + + return set(namespaces) & set(self.ref_table) + + +class DeletionRobot(CurrentPageBot): + + """This robot allows deletion of pages en masse.""" + + update_options = { + 'undelete': False, + 'isorphan': 0, + 'orphansonly': [], + } + + def __init__(self, summary: str, **kwargs) -> None: + """Initializer. + + :param summary: the reason for the (un)deletion + """ + super().__init__(**kwargs) + + self.summary = summary + # Upcast pages to PageWithRefs() + self.generator = (PageWithRefs(p) for p in self.generator) + + def display_references(self) -> None: + """Display pages that link to the current page, sorted per namespace. + + Number of pages to display per namespace is provided by: + - self.opt.isorphan + """ + refs = self.current_page.ref_table + if not refs: + return + + total = sum(len(v) for v in refs.values()) + if total > 1: + pywikibot.warning( + f'There are {total} pages that link to {self.current_page}.') + else: + pywikibot.warning( + f'There is a page that links to {self.current_page}.') + + show_n_pages = self.opt.isorphan + width = len(max((ns.canonical_prefix() for ns in refs), key=len)) + for ns in sorted(refs): + n_pages_in_ns = len(refs[ns]) + plural = '' if n_pages_in_ns == 1 else 's' + ns_name = ns.canonical_prefix() if ns != ns.MAIN else 'Main:' + ns_id = f'[{ns.id}]' + pywikibot.info( + ' {0!s:<{width}} {1:>6} {2:>10} page{pl}'.format( + ns_name, ns_id, n_pages_in_ns, width=width, pl=plural)) + if show_n_pages: # do not show marker if 0 pages are requested. + for page in islice_with_ellipsis(refs[ns], show_n_pages): + pywikibot.info(f' {page.title()!s}') + + def skip_page(self, page) -> bool: + """Skip files with global usage""" + if not self.opt.undelete and page.exists() and page.namespace() == Namespace.FILE: + global_usage = page.site.simple_request(action='query', prop='globalusage', titles=page.title()).submit() + if len(global_usage['query']['pages'][str(page.pageid)]['globalusage']) > 0: + pywikibot.info(f'Skipping: {page} has global usage.') + return True + """Skip the page under some conditions.""" + if self.opt.undelete and page.exists(): + pywikibot.info(f'Skipping: {page} already exists.') + return True + if not self.opt.undelete and not page.exists(): + pywikibot.info(f'Skipping: {page} does not exist.') + return True + return super().skip_page(page) + + def treat_page(self) -> None: + """Process one page from the generator.""" + if self.opt.undelete: + self.current_page.undelete(self.summary) + self.counter['undelete'] += 1 + else: + if (self.opt.isorphan is not False + and not self.opt.always): + self.display_references() + + if self.opt.orphansonly: + namespaces = self.opt.orphansonly + ns_with_ref = self.current_page.namespaces_with_ref_to_page( + namespaces) + ns_with_ref = sorted(ns_with_ref) + if ns_with_ref: + ns_names = ', '.join(str(ns.id) for ns in ns_with_ref) + pywikibot.info(f'Skipping: {self.current_page} is not ' + f'orphan in ns: {ns_names}.') + return # Not an orphan, do not delete. + + if self.current_page.site.user() is None: + self.current_page.site.login() + res = self.current_page.delete(self.summary, + not self.opt.always, + self.opt.always, + automatic_quit=True) + if res > 0: + self.counter['delete'] += 1 + elif res < 0: + self.counter['marked-for-deletion'] += 1 + else: + self.counter['no-action'] += 1 + + +def main(*args: str) -> None: + """Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + :param args: command line arguments + """ + page_name = '' + summary = None + options = {} + pg_args = [] + + # read command line parameters + local_args = pywikibot.handle_args(args) + gen_factory = pagegenerators.GeneratorFactory() + mysite = pywikibot.Site() + + for arg in local_args: + opt, _, value = arg.partition(':') + if opt in ('-always', '-undelete'): + options[opt[1:]] = True + elif opt == '-summary': + summary = value or pywikibot.input( + 'Enter a reason for the deletion:') + elif opt == '-isorphan': + value = int(value or 0) + options[opt[1:]] = value if value >= 0 else False + elif opt == '-orphansonly': + if value: + namespaces = mysite.namespaces.resolve(value.split(',')) + else: + namespaces = mysite.namespaces + options[opt[1:]] = namespaces + else: + pg_args.append(arg) + + un = 'un' if 'undelete' in options else '' + for arg in pg_args: + *_, page_name = arg.partition(':') + if gen_factory.handle_arg(arg) and not summary: + if arg.startswith('-file'): + summary = i18n.twtranslate(mysite, un + 'delete-from-file') + elif page_name: + if arg.startswith(('-cat', '-subcats')): + summary = i18n.twtranslate(mysite, 'delete-from-category', + {'page': page_name}) + elif arg.startswith('-links'): + summary = i18n.twtranslate(mysite, + un + 'delete-linked-pages', + {'page': page_name}) + elif arg.startswith('-ref'): + summary = i18n.twtranslate( + mysite, 'delete-referring-pages', {'page': page_name}) + elif arg.startswith('-imageused'): + summary = i18n.twtranslate(mysite, un + 'delete-images', + {'page': page_name}) + + # We are just deleting pages, so we have no need of using a preloading + # page generator to actually get the text of those pages. + generator = gen_factory.getCombinedGenerator() + if not pywikibot.bot.suggest_help(missing_generator=not generator): + if summary is None: + summary = pywikibot.input(f'Enter a reason for the {un}deletion:') + bot = DeletionRobot(summary, generator=generator, **options) + bot.run() + + +if __name__ == '__main__': + main()