diff --git a/.codecov.yml b/.codecov.yml index 322dcd039f..71869ffe4f 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -19,13 +19,9 @@ coverage: default: enabled: true ignore: - - Users - pywikibot/daemonize.py - pywikibot/families/__init__.py - - pywikibot/scripts/preload_sites.py - - pywikibot/scripts/version.py - - scripts/maintenance/colors.py - - scripts/maintenance/make_i18n_dict.py + - scripts/create_isbn_edition.py - scripts/userscripts/ - tests/pwb/ notify: diff --git a/.github/workflows/doctest.yml b/.github/workflows/doctest.yml index dee414e153..419bb238b2 100644 --- a/.github/workflows/doctest.yml +++ b/.github/workflows/doctest.yml @@ -22,18 +22,17 @@ jobs: fail-fast: false max-parallel: 17 matrix: - python-version: [pypy3.8, pypy3.10, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: [pypy3.8, pypy3.11, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] os: ['windows-latest', 'macOS-latest', 'ubuntu-latest'] include: - - python-version: 3.14-dev - python-version: 3.15-dev steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 if: "!endsWith(matrix.python-version, '-dev')" with: python-version: ${{ matrix.python-version }} @@ -68,6 +67,7 @@ jobs: coverage run -m pytest pywikibot --doctest-modules --ignore-glob="*gui.py" --ignore-glob="*memento.py" - name: Show coverage statistics run: | + coverage combine || true coverage report - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 diff --git a/.github/workflows/login_tests-ci.yml b/.github/workflows/login_tests-ci.yml index f167462166..ea6d5959ab 100644 --- a/.github/workflows/login_tests-ci.yml +++ b/.github/workflows/login_tests-ci.yml @@ -6,8 +6,7 @@ on: workflow_run: workflows: [Pywikibot CI] branches: [master] - types: - - completed + types: [completed] env: PYWIKIBOT_TEST_RUNNING: 1 @@ -15,14 +14,35 @@ env: PYWIKIBOT_USERNAME: Pywikibot-test jobs: - build: + wait_for_all: + name: Wait for other workflows to finish + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Wait for all workflows to complete excluding this one + uses: kachick/wait-other-jobs@v3.8.1 + with: + skip-same-workflow: true + skip-list: | + [ + { + "workflowFile": "login_tests-ci.yml", + "jobName": "Wait for other workflows to finish" + } + ] + warmup-delay: PT1M + minimum-interval: PT5M + + run_tests: + name: Run Login/Logout Tests runs-on: ${{ matrix.os || 'ubuntu-latest' }} + needs: wait_for_all timeout-minutes: 30 strategy: fail-fast: false max-parallel: 1 matrix: - python-version: [pypy3.8, pypy3.10, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', 3.14-dev, 3.15-dev] + python-version: [pypy3.8, pypy3.11, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14', 3.15-dev] site: ['wikipedia:en', 'wikisource:zh', 'wikipedia:test'] include: - python-version: '3.8' @@ -52,11 +72,11 @@ jobs: os: macOS-latest steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 if: "!endsWith(matrix.python-version, '-dev')" with: python-version: ${{ matrix.python-version }} @@ -80,10 +100,10 @@ jobs: - name: Generate family files run: | if [ ${{matrix.family || 0}} == wpbeta ]; then - python pwb.py generate_family_file http://${{matrix.code}}.wikipedia.beta.wmflabs.org/ wpbeta y + python pwb.py generate_family_file http://${{matrix.code}}.wikipedia.beta.wmcloud.org/ wpbeta y fi if [ ${{matrix.site || 0}} == 'wsbeta:en' ]; then - python pwb.py generate_family_file http://en.wikisource.beta.wmflabs.org/ wsbeta y + python pwb.py generate_family_file http://en.wikisource.beta.wmcloud.org/ wsbeta n fi - name: Generate user files run: | @@ -111,6 +131,7 @@ jobs: coverage run -m unittest -vv tests/site_login_logout_tests.py - name: Show coverage statistics run: | + coverage combine || true coverage report - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 diff --git a/.github/workflows/oauth_tests-ci.yml b/.github/workflows/oauth_tests-ci.yml index 8fe4cd8321..2f880af3e9 100644 --- a/.github/workflows/oauth_tests-ci.yml +++ b/.github/workflows/oauth_tests-ci.yml @@ -18,11 +18,11 @@ jobs: build: runs-on: ${{ matrix.os || 'ubuntu-latest' }} continue-on-error: ${{ matrix.experimental || false }} - timeout-minutes: 5 + timeout-minutes: 10 strategy: fail-fast: false matrix: - python-version: [pypy3.8, pypy3.10, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', 3.14-dev, 3.15-dev] + python-version: [pypy3.8, pypy3.11, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14', 3.15-dev] family: [wikipedia] code: [test] domain: [test.wikipedia.org] @@ -30,18 +30,20 @@ jobs: - python-version: '3.8' family: wpbeta code: en - domain: en.wikipedia.beta.wmflabs.org + domain: en.wikipedia.beta.wmcloud.org + experimental: true - python-version: '3.8' family: wpbeta code: zh - domain: zh.wikipedia.beta.wmflabs.org + domain: zh.wikipedia.beta.wmcloud.org + experimental: true steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 if: "!endsWith(matrix.python-version, '-dev')" with: python-version: ${{ matrix.python-version }} @@ -82,26 +84,28 @@ jobs: - name: Generate family files if: ${{ matrix.family == 'wpbeta' }} run: | - python pwb.py generate_family_file http://${{matrix.code}}.wikipedia.beta.wmflabs.org/ wpbeta y + python pwb.py generate_family_file http://${{matrix.code}}.wikipedia.beta.wmcloud.org/ wpbeta y - name: Generate user files run: | python -Werror::UserWarning -m pwb generate_user_files -family:${{matrix.family}} -lang:${{matrix.code}} -user:${{ env.PYWIKIBOT_USERNAME }} -v -debug; echo "usernames['commons']['beta'] = '${{ env.PYWIKIBOT_USERNAME }}'" >> user-config.py + echo "usernames['meta']['meta'] = '${{ env.PYWIKIBOT_USERNAME }}'" >> user-config.py echo "authenticate['${{ matrix.domain }}'] = ('${{ steps.split.outputs._0 }}', '${{ steps.split.outputs._1 }}', '${{ steps.split.outputs._2 }}', '${{ steps.split.outputs._3 }}')" >> user-config.py echo "noisysleep = float('inf')" >> user-config.py echo "maximum_GET_length = 5000" >> user-config.py echo "console_encoding = 'utf8'" >> user-config.py - name: Oauth tests with unittest - timeout-minutes: 2 + timeout-minutes: 8 env: PYWIKIBOT_TEST_WRITE: 1 PYWIKIBOT_TEST_OAUTH: ${{ secrets[format('{0}', steps.token.outputs.uppercase)] }} - PYWIKIBOT_TEST_MODULES: edit_failure,file,oauth + PYWIKIBOT_TEST_MODULES: edit_failure,file,oauth,superset run: | python pwb.py version coverage run -m unittest -vv - name: Show coverage statistics run: | + coverage combine || true coverage report - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index cc99546c98..9372a93f92 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -17,18 +17,27 @@ env: jobs: pre-commit: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os || 'ubuntu-latest' }} + continue-on-error: ${{ matrix.experimental || false }} strategy: fail-fast: false matrix: python-version: + - '3.9' - '3.13' - - 3.14-dev - - 3.15-dev + - '3.14' + os: + - windows-latest + - macOS-latest + include: + - python-version: '3.14' + os: ubuntu-latest + - python-version: 3.15-dev + experimental: true steps: - name: set up python ${{ matrix.python-version }} if: "!endsWith(matrix.python-version, '-dev')" - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: set up development python ${{ matrix.python-version }} @@ -37,10 +46,12 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: submodules: true - name: run pre-commit uses: pre-commit/action@v3.0.1 + env: + SKIP: copyright timeout-minutes: 5 timeout-minutes: 100 diff --git a/.github/workflows/pywikibot-ci.yml b/.github/workflows/pywikibot-ci.yml index 6130c339ed..9635b67845 100644 --- a/.github/workflows/pywikibot-ci.yml +++ b/.github/workflows/pywikibot-ci.yml @@ -25,7 +25,7 @@ jobs: fail-fast: false max-parallel: 19 matrix: - python-version: [pypy3.10, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: [pypy3.10, pypy3.11, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] site: ['wikipedia:en', 'wikisource:zh'] include: - python-version: '3.8' @@ -59,12 +59,6 @@ jobs: - python-version: pypy3.8 site: wikisource:zh os: ubuntu-22.04 - - python-version: 3.14-dev - site: wikipedia:en - os: ubuntu-22.04 - - python-version: 3.14-dev - site: wikisource:zh - os: ubuntu-22.04 - python-version: 3.15-dev site: wikipedia:en os: ubuntu-22.04 @@ -73,11 +67,11 @@ jobs: os: ubuntu-22.04 steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 if: "!endsWith(matrix.python-version, '-dev')" with: python-version: ${{ matrix.python-version }} @@ -101,10 +95,10 @@ jobs: - name: Generate family files run: | if [ ${{matrix.family || 0}} == wpbeta ]; then - python pwb.py generate_family_file http://${{matrix.code}}.wikipedia.beta.wmflabs.org/ wpbeta y + python pwb.py generate_family_file http://${{matrix.code}}.wikipedia.beta.wmcloud.org/ wpbeta y fi if [ ${{matrix.site || 0}} == 'wsbeta:en' ]; then - python pwb.py generate_family_file http://en.wikisource.beta.wmflabs.org/ wsbeta y + python pwb.py generate_family_file http://en.wikisource.beta.wmcloud.org/ wsbeta n fi - name: Generate user files run: | @@ -140,6 +134,7 @@ jobs: fi - name: Show coverage statistics run: | + coverage combine || true coverage report - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 @@ -147,4 +142,7 @@ jobs: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - name: Check on failure if: steps.ci_test.outcome == 'failure' - run: exit 1 + run: | + # Print public IP of runner + curl -s https://api.ipify.org + exit 1 diff --git a/.github/workflows/sysop_write_tests-ci.yml b/.github/workflows/sysop_write_tests-ci.yml index a08b124a08..8993ab60e2 100644 --- a/.github/workflows/sysop_write_tests-ci.yml +++ b/.github/workflows/sysop_write_tests-ci.yml @@ -27,11 +27,11 @@ jobs: attr: [write and not rights, write and rights, rights and not write] steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -62,6 +62,7 @@ jobs: coverage run -m pytest -s -r A -a "${{ matrix.attr }}" - name: Show coverage statistics run: | + coverage combine || true coverage report - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 diff --git a/.github/workflows/windows_tests.yml b/.github/workflows/windows_tests.yml index 13a24057f5..9c54f80e61 100644 --- a/.github/workflows/windows_tests.yml +++ b/.github/workflows/windows_tests.yml @@ -22,16 +22,16 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8.0, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: [3.8.0, '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] python-arch: [x64, x86] site: ['wikipedia:en'] steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} architecture: ${{ matrix.python-arch }} @@ -75,6 +75,7 @@ jobs: coverage run -m unittest discover -vv -p \"*_tests.py\"; - name: Show coverage statistics run: | + coverage combine || true coverage report - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 @@ -82,4 +83,7 @@ jobs: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - name: Check on failure if: steps.ci_test.outcome == 'failure' - run: exit 1 + run: | + # Print public IP of runner + curl -s https://api.ipify.org + exit 1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04b0fe62da..a150f20aad 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: hooks: - id: commit-message-validator - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-added-large-files args: @@ -63,15 +63,16 @@ repos: entry: tests/hooks/copyright_fixer.py files: .+\.py$ language: python + require_serial: true - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.1 + rev: v0.14.1 hooks: - id: ruff-check alias: ruff args: - --fix - repo: https://github.com/asottile/pyupgrade - rev: v3.20.0 + rev: v3.21.0 hooks: - id: pyupgrade args: @@ -88,12 +89,12 @@ repos: - --remove-unused-variables exclude: ^pywikibot/backports\.py$ - repo: https://github.com/PyCQA/isort - rev: 6.0.1 + rev: 6.1.0 hooks: - id: isort exclude: ^pwb\.py$ - repo: https://github.com/jshwi/docsig - rev: v0.69.4 + rev: v0.71.0 hooks: - id: docsig exclude: ^(tests|scripts) @@ -103,6 +104,7 @@ repos: - id: flake8 args: - --doctests + - --config=tox.ini additional_dependencies: # Due to incompatibilities between packages the order matters. - flake8-bugbear>=24.12.12 @@ -110,3 +112,29 @@ repos: - flake8-print>=5.0.0 - flake8-tuple>=0.4.1 - pep8-naming>=0.15.1 + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.18.2 + hooks: + - id: mypy + args: + - --config-file=pyproject.toml + - --follow-imports=silent + additional_dependencies: + - types-PyMySQL + - types-requests + # Test for files which already passed in past. + # They should be also used in conftest.py to exclude them from non-voting mypy test. + files: | + (?x)^pywikibot/( + (__metadata__|backports|config|cosmetic_changes|daemonize|diff|echo|exceptions|fixes|logging|plural|time|titletranslate)| + (comms|data|families|specialbots)/__init__| + comms/eventstreams| + data/(api/(__init__|_optionset)|citoid|memento|wikistats)| + families/[a-z][a-z\d]+_family| + page/(__init__|_decorators|_page|_revision)| + pagegenerators/(__init__|_filters)| + scripts/(?:i18n/)?__init__| + site/(__init__|_basesite|_decorators|_interwikimap|_tokenwallet|_upload)| + tools/(_deprecate|_logging|_unidata|chars|formatter)| + userinterfaces/(__init__|_interface_base|buffer_interface|terminal_interface|transliteration) + )\.py$ diff --git a/AUTHORS.rst b/AUTHORS.rst index 3e29b4eae8..61bd9d25e7 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -191,6 +191,7 @@ L :: + Lars G Legoktm Leonardo Gregianin Lewis Cawte @@ -294,6 +295,7 @@ S :: + Sanjai Siddharthan Serio Santoro Scot Wilcoxon Shardul C @@ -317,6 +319,7 @@ T Tacsipacsi + Tejashxv Tgr TheRogueMule theopolisme @@ -381,7 +384,6 @@ Y Yrithinnd Yuri Astrakhan Yusuke Matsubara - Zaher Kadour Z - @@ -389,5 +391,7 @@ Z :: + Zabe + Zaher Kadour zhuyifei1999 Zoran Dori diff --git a/CONTENT.rst b/CONTENT.rst index 50936affff..5ffc998160 100644 --- a/CONTENT.rst +++ b/CONTENT.rst @@ -24,6 +24,8 @@ The contents of the package +---------------------------+-----------------------------------------------------------+ | ROADMAP.rst | PyPI version roadmap file | +---------------------------+-----------------------------------------------------------+ + | conftest.py | Local per-directory plugin for pytest-mypy | + +---------------------------+-----------------------------------------------------------+ | dev-requirements.txt | PIP requirements file for development dependencies | +---------------------------+-----------------------------------------------------------+ | make_dist.py | Script to create a Pywikibot distribution | diff --git a/HISTORY.rst b/HISTORY.rst index 3b45a1d030..23feeedd6d 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,134 @@ Release History =============== +10.6.0 +------ +*23 October 2025* + +* Fix :meth:`ItemPage.get_value_at_timestamp()`; + keep the rank of the claims in the loop. (:phab:`T407701`) +* :meth:`Family.isPublic()` is deprecated (:phab:`T407049`) +* Added :func:`tools.itertools.union_generators` for sorted merging of pre-sorted iterables. +* **Support for Python 3.8 will be discontinued**; + this is likely the last Pywikibot version to support it. +* Added a Citoid Query interface with the :mod:`data.citoid` module. +* Updated localization (L10N) files. +* :meth:`Family.interwiki_replacements` is deprecated; + use :attr:`Family.code_aliases` instead. +* The first parameter of :meth:`Transliterator.transliterate + ` is positional only + whereas *prev* and *succ* parameters are keyword only. The :class:`Transliterator + ` was improved. +* Show user-agent with :mod:`version` script (:phab:`T406458`) +* Positional arguments of :func:`daemonize()` are deprecated and must + be given as keyword arguments. +* i18n updates. +* Return :meth:`bot.BaseBot.userPut` result from :meth:`AutomaticTWSummaryBot.put_current() + ` method + + +10.5.0 +------ +*21 September 2025* + +* :class:`Siteinfo` query is made with formatversion 2. Several + boolean values are available now. Note that '*' keys for some data are kept for backward + compatibility but may be removed later. (:phab:`T404301`) +* A new property :attr:`APISite.restrictions` was + added. It replaces the methods :meth:`APISite.protection_types() + ` and :meth:`APISite.protection_levels() + ` which are deprecated now. +* Support for mswikiquote was added (:phab:`T404702`) +* :meth:`APISite.rollbackpage()` supports *pageid* + argument as alternative to *page*. *markbot* defaults to True if the rollbacker is a bot and not + explicitly given. The method now returns a dictionary with rollback information. The version + history no longer has to be preloaded. (:phab:`T403425`) +* :meth:`BasePage.rollback()` was implemented (:phab:`T403425`) +* The first parameter of :exc:`exceptions.PageRelatedError` may now be a pageid (:phab:`T403425`) +* i18n Updates +* Use 'login' token from API response in :meth:`login.ClientLoginManager.login_to_site` + (:phab:`T328814`) +* Always use *fallback_prompt* in :func:`i18n.twtranslate` whenever no + translation is found, including unknown keys in existing packages (:phab:`T326470`) + + +10.4.0 +------ +*31 August 2025* + +* Apply client-side filtering for *maxsize* in misermode in + :meth:`Site.allpages()` (:phab:`T402995`) +* Add :attr:`filter_func()` and :meth:`filter_item() + ` filter function in :class:`APIGeneratorBase + ` and modify `generator` property to implement filtering in + `APIGeneratorBase` subclasses (:phab:`T402995`) +* All parameters of :meth:`Site.allpages()` + except *start* must be given as keyword arguments. +* Add support for bewwiktionary (:phab:`T402136`) +* Add user-agent header to :mod:`eventstreams` requests (:phab:`T402796`) +* Update i18n +* Save global options in :attr:`bot.global_args` (:phab:`T250034`) +* Update :mod:`plural` forms from unicode.org (:phab:`T114978`) +* Add :class:`textlib.SectionList` to hold :attr:`textlib.Content.sections` (:phab:`T401464`) +* :class:`pywikibot.Coordinate` parameters are keyword only +* Add *strict* parameter to :meth:`Site.unconnected_pages() + ` and :func:`pagegenerators.UnconnectedPageGenerator` + (:phab:`T401699`) +* Raise ValueError if a VAR_POSITIONAL parameter like *\*args* is used with + :class:`tools.deprecate_positionals` decorator +* Add :meth:`get_value_at_timestamp()` API + to :class:`pywikibot.ItemPage` (:phab:`T400612`) +* Clean up :mod:`setup` module (:phab:`T396356`) +* Implement :meth:`pywikibot.ItemPage.get_best_claim` (:phab:`T400610`) +* Add *expiry* parameter to :meth:`BasePage.watch()` and + :meth:`Site.watch()`; fix the methods to return False if + page is missing and no expiry is set (:phab:`T330839`) + + +10.3.2 +------ +*12 August 2025* + +* Add support for zghwiktionary, madwikisource, rkiwiki, minwikibooks + (:phab:`T391769`, :phab:`T392501`, :phab:`T395501`, :phab:`T399787`) +* i18n updates + + +10.3.1 +------ +*08 August 2025* + +* Add support for tlwikisource (:phab:`T388656`) +* i18n updates + + +10.3.0 +------ +*03 August 2025* + +* :attr:`Site.articlepath` may raise a ValueError + instead of AttributeError if ``$1`` placeholder is missing from API +* Refactor the :class:`throttle.Throttle` class (:phab:`T289318`) +* L10N-Updates: add language aliases for ``gsw``, ``sgs``, ``vro``, ``rup`` and ``lzh`` + to :class:`family.WikimediaFamily` family class + (:phab:`T399411`, :phab:`T399438`, :phab:`T399444`, :phab:`T399693`, :phab:`T399697` ) +* Refactor HTML removal logic in :func:`textlib.removeHTMLParts` using :class:`textlib.GetDataHTML` + parser; *removetags* parameter was introduced to remove specified tag blocks (:phab:`T399378`) +* Refactor :class:`echo.Notification` and fix :meth:`mark_as_read()` + method (:phab:`T398770`) +* Update beta domains in family files from beta.wmflabs.org to beta.wmcloud.org (:phab:`T289318`) +* ``textlib.to_latin_digits()`` was renamed to :func:`textlib.to_ascii_digits` (:phab:`T398146#10958283`), + ``NON_LATIN_DIGITS`` of :mod:`userinterfaces.transliteration` was renamed to ``NON_ASCII_DIGITS`` +* Add -cookies option to the :mod:`login` script to log in with cookies + files only +* Create a Site using the :func:`pywikibot.Site` constructor with a given url even if the URL, even + if it ends with a slash (:phab:`T396592`) +* Remove hard-coded error messages from :meth:`login.LoginManager.login` and use API response instead +* Add additional information to :meth:`Site.login()` + error message (:phab:`T395670`) +* i18n updates + + 10.2.0 ------ *14 June 2025* @@ -90,7 +218,7 @@ Release History * ``APISite.article_path`` was removed. :attr:`APISite.articlepath ` can be used instead. * ``fix_digits`` method of :class:`textlib.TimeStripper` was removed; - :func:`textlib.to_latin_digits` can be used instead. + :func:`textlib.to_ascii_digits` can be used instead. * :mod:`textlib`.tzoneFixedOffset class was removed in favour of :class:`time.TZoneFixedOffse`. * A boolean *watch* parameter in :meth:`page.BasePage.save` is desupported. @@ -929,9 +1057,9 @@ Release History **Improvements** * i18n updates for date.py -* Add number transliteration of 'lo', 'ml', 'pa', 'te' to NON_LATIN_DIGITS +* Add number transliteration of 'lo', 'ml', 'pa', 'te' to NON_ASCII_DIGITS * Detect range blocks with Page.is_blocked() method (:phab:`T301282`) -* to_latin_digits() function was added to textlib as counterpart of to_local_digits() function +* to_ascii_digits() function was added to textlib as counterpart of to_local_digits() function * api.Request.submit now handles search-title-disabled and search-text-disabled API Errors * A show_diff parameter was added to Page.put() and Page.change_category() * Allow categories when saving IndexPage (:phab:`T299806`) @@ -1956,7 +2084,7 @@ Release History * UploadBot got a filename prefix parameter (:phab:`T170123`) * cosmetic_changes is able to remove empty sections (:phab:`T140570`) * Pywikibot is following :pep:`396` versioning -* pagegenerators AllpagesPageGenerator, CombinedPageGenerator, UnconnectedPageGenerator are deprecated +* CombinedPageGenerator is deprecated, itertools.chain can be used instead * Some DayPageGenerator parameters has been renamed * unicodedata2, httpbin and Flask dependency was removed (:phab:`T102461`, :phab:`T108068`, :phab:`T178864`, :phab:`T193383`) diff --git a/ROADMAP.rst b/ROADMAP.rst index fb6a0e942d..8e546d9a7d 100644 --- a/ROADMAP.rst +++ b/ROADMAP.rst @@ -1,80 +1,154 @@ Current Release Changes ======================= -* Add -cookies option to :mod:`login` script to login with cookies files only -* Create a Site using :func:`pywikibot.Site` constructor with a given url even if the url ends with - a slash (:phab:`T396592`) -* Remove hard-coded error messages from :meth:`login.LoginManager.login` and use API response instead -* Add additional informations to :meth:`Site.login()` error message (:phab:`T395670`) +* Add support for pcmwikiquote and minwikisource. (:phab:`T408345`, :phab:`T408353`) +* Deprecate dysfunctional :meth:`APISite.alllinks() + `. (:phab:`T359427`, :phab:`T407708`) +* Refactor ``replace_magicwords`` in + :meth:`cosmetic_changes.CosmeticChangesToolkit.translateMagicWords`. (:phab:`T396715`) +* Deprecate old ``(type, value, traceback)`` signature in + :meth:`tools.collections.GeneratorWrapper.throw`. (:phab:`T340641`) +* Replace default timetravel.mementoweb.org with web.archive.org in :mod:`data.memento` module. + (:phab:`T400570`, :phab:`T407694`) * i18n updates -Current Deprecations -==================== -* 10.2.0: :mod:`tools.threading.RLock` is deprecated and moved to :mod:`backports` - module. The :meth:`backports.RLock.count` method is also deprecated. For Python 3.14+ use ``RLock`` - from Python library ``threading`` instead. (:phab:`T395182`) -* 10.1.0: *revid* and *date* parameters of :meth:`Page.authorship() - ` were dropped -* 10.0.0: *last_id* of :class:`comms.eventstreams.EventStreams` was renamed to *last_event_id* (:phab:`T309380`) -* 10.0.0: 'millenia' argument for *precision* parameter of :class:`pywikibot.WbTime` is deprecated; - 'millennium' must be used instead. -* 10.0.0: *includeredirects* parameter of :func:`pagegenerators.AllpagesPageGenerator` and - :func:`pagegenerators.PrefixingPageGenerator` is deprecated and should be replaced by *filterredir* +Deprecations +============ + +This section lists features, methods, parameters, or attributes that are deprecated +and scheduled for removal in future Pywikibot releases. + +Deprecated items may still work in the current release but are no longer recommended for use. +Users should update their code according to the recommended alternatives. + +Pywikibot follows a clear deprecation policy: features are typically deprecated in one release and +removed in in the third subsequent major release, remaining available for the two releases in between. + + +Pending removal in Pywikibot 11 +------------------------------- + +* 10.7.0: Dysfunctional :meth:`APISite.alllinks() + ` will be removed. + (:phab:`T359427`, :phab:`T407708`) +* 10.6.0: Python 3.8 support is deprecated and will be dropped soon +* 8.4.0: :attr:`data.api.QueryGenerator.continuekey` will be removed in favour of + :attr:`data.api.QueryGenerator.modules` +* 8.4.0: The *modules_only_mode* parameter in the :class:`data.api.ParamInfo` class, its + *paraminfo_keys* class attribute, and its ``preloaded_modules`` property will be removed +* 8.4.0: The *dropdelay* and *releasepid* attributes of the :class:`throttle.Throttle` class will be + removed in favour of the *expiry* class attribute +* 8.2.0: The :func:`tools.itertools.itergroup` function will be removed in favour of the + :func:`backports.batched` function +* 8.2.0: The *normalize* parameter in the :meth:`pywikibot.WbTime.toTimestr` and + :meth:`pywikibot.WbTime.toWikibase` methods will be removed +* 8.1.0: The inheritance of the :exc:`exceptions.NoSiteLinkError` exception from + :exc:`exceptions.NoPageError` will be removed +* 8.1.0: The ``exceptions.Server414Error`` exception is deprecated in favour of the + :exc:`exceptions.Client414Error` exception +* 8.0.0: The :meth:`Timestamp.clone()` method is deprecated in + favour of the ``Timestamp.replace()`` method +* 8.0.0: The :meth:`family.Family.maximum_GET_length` method is deprecated in favour of the + :ref:`config.maximum_GET_length` configuration option (:phab:`T325957`) +* 8.0.0: The ``addOnly`` parameter in the :func:`textlib.replaceLanguageLinks` and + :func:`textlib.replaceCategoryLinks` functions is deprecated in favour of ``add_only`` +* 8.0.0: The regex attributes ``ptimeR``, ``ptimeznR``, ``pyearR``, ``pmonthR``, and ``pdayR`` of + the :class:`textlib.TimeStripper` class are deprecated in favour of the ``patterns`` attribute, + which is a :class:`textlib.TimeStripperPatterns` object +* 8.0.0: The ``groups`` attribute of the :class:`textlib.TimeStripper` class is deprecated in favour + of the :data:`textlib.TIMEGROUPS` constant +* 8.0.0: The :meth:`LoginManager.get_login_token` method + has been replaced by ``login.ClientLoginManager.site.tokens['login']`` +* 8.0.0: The ``data.api.LoginManager()`` constructor is deprecated in favour of the + :class:`login.ClientLoginManager` class +* 8.0.0: The :meth:`APISite.messages()` method is + deprecated in favour of the :attr:`userinfo['messages']` + attribute +* 8.0.0: The :meth:`Page.editTime()` method is deprecated and should be + replaced by the :attr:`Page.latest_revision.timestamp` attribute + + +Pending removal in Pywikibot 12 +------------------------------- + * 9.6.0: :meth:`BaseSite.languages()` will be removed in favour of :attr:`BaseSite.codes` * 9.5.0: :meth:`DataSite.getPropertyType()` will be removed in favour of :meth:`DataSite.get_property_type()` * 9.3.0: :meth:`page.BasePage.userName` and :meth:`page.BasePage.isIpEdit` are deprecated in favour of ``user`` or ``anon`` attributes of :attr:`page.BasePage.latest_revision` property -* 9.2.0: Imports of :mod:`logging` functions from :mod:`bot` module is deprecated and will be desupported +* 9.3.0: *botflag* parameter of :meth:`Page.save()`, :meth:`Page.put() + `, :meth:`Page.touch()` and + :meth:`Page.set_redirect_target()` was renamed to *bot* +* 9.2.0: All parameters of :meth:`Page.templates` and + :meth:`Page.itertemplates()` must be given as keyworded arguments +* 9.2.0: Imports of :mod:`logging` functions from the :mod:`bot` module are deprecated and will be desupported * 9.2.0: *total* argument in ``-logevents`` pagegenerators option is deprecated; use ``-limit`` instead (:phab:`T128981`) * 9.0.0: The *content* parameter of :meth:`proofreadpage.IndexPage.page_gen` is deprecated and will be ignored (:phab:`T358635`) -* 9.0.0: ``userinterfaces.transliteration.transliterator`` was renamed to :class:`Transliterator - ` -* 9.0.0: ``next`` parameter of :meth:`userinterfaces.transliteration.transliterator.transliterate` was +* 9.0.0: ``next`` parameter of :meth:`userinterfaces.transliteration.Transliterator.transliterate` was renamed to ``succ`` -* 9.0.0: ``type`` parameter of :meth:`site.APISite.protectedpages() +* 9.0.0: ``userinterfaces.transliteration.transliterator`` object was renamed to :class:`Transliterator + ` +* 9.0.0: The ``type`` parameter of :meth:`site.APISite.protectedpages() ` was renamed to ``protect_type`` -* 9.0.0: ``all`` parameter of :meth:`site.APISite.namespace()` - was renamed to ``all_ns`` +* 9.0.0: The ``all`` parameter of :meth:`site.APISite.namespace() + ` was renamed to ``all_ns`` * 9.0.0: ``filter`` parameter of :func:`date.dh` was renamed to ``filter_func`` * 9.0.0: ``dict`` parameter of :class:`data.api.OptionSet` was renamed to ``data`` -* 9.0.0: ``pywikibot.version.get_toolforge_hostname()`` is deprecated without replacement +* 9.0.0: :func:`pywikibot.version.get_toolforge_hostname` is deprecated with no replacement * 9.0.0: ``allrevisions`` parameter of :class:`xmlreader.XmpDump` is deprecated, use ``revisions`` instead (:phab:`T340804`) * 9.0.0: ``iteritems`` method of :class:`data.api.Request` will be removed in favour of ``items`` -* 9.0.0: ``SequenceOutputter.output()`` is deprecated in favour of :attr:`tools.formatter.SequenceOutputter.out` - property +* 9.0.0: ``SequenceOutputter.output()`` is deprecated in favour of the + :attr:`tools.formatter.SequenceOutputter.out` property -Pending removal in Pywikibot 11 +Pending removal in Pywikibot 13 ------------------------------- -* 8.4.0: *modules_only_mode* parameter of :class:`data.api.ParamInfo`, its *paraminfo_keys* class attribute - and its preloaded_modules property will be removed -* 8.4.0: *dropdelay* and *releasepid* attributes of :class:`throttle.Throttle` will be removed - in favour of *expiry* class attribute -* 8.2.0: :func:`tools.itertools.itergroup` will be removed in favour of :func:`backports.batched` -* 8.2.0: *normalize* parameter of :meth:`WbTime.toTimestr` and :meth:`WbTime.toWikibase` will be removed -* 8.1.0: Dependency of :exc:`exceptions.NoSiteLinkError` from :exc:`exceptions.NoPageError` will be removed -* 8.1.0: ``exceptions.Server414Error`` is deprecated in favour of :exc:`exceptions.Client414Error` -* 8.0.0: :meth:`Timestamp.clone()` method is deprecated - in favour of ``Timestamp.replace()`` method. -* 8.0.0: :meth:`family.Family.maximum_GET_length` method is deprecated in favour of - :ref:`config.maximum_GET_length` (:phab:`T325957`) -* 8.0.0: ``addOnly`` parameter of :func:`textlib.replaceLanguageLinks` and - :func:`textlib.replaceCategoryLinks` are deprecated in favour of ``add_only`` -* 8.0.0: :class:`textlib.TimeStripper` regex attributes ``ptimeR``, ``ptimeznR``, ``pyearR``, ``pmonthR``, - ``pdayR`` are deprecated in favour of ``patterns`` attribute which is a - :class:`textlib.TimeStripperPatterns`. -* 8.0.0: :class:`textlib.TimeStripper` ``groups`` attribute is deprecated in favour of ``textlib.TIMEGROUPS`` -* 8.0.0: :meth:`LoginManager.get_login_token` was - replaced by ``login.ClientLoginManager.site.tokens['login']`` -* 8.0.0: ``data.api.LoginManager()`` is deprecated in favour of :class:`login.ClientLoginManager` -* 8.0.0: :meth:`APISite.messages()` method is deprecated in - favour of :attr:`userinfo['messages']` -* 8.0.0: :meth:`Page.editTime()` method is deprecated and should be replaced by - :attr:`Page.latest_revision.timestamp` +* 10.6.0: The old ``(type, value, traceback)`` signature in + :meth:`tools.collections.GeneratorWrapper.throw` will be removed in Pywikibot 13, or earlier if it + is dropped from a future Python release. (:phab:`T340641`) +* 10.6.0: :meth:`Family.isPublic()` will be removed (:phab:`T407049`) +* 10.6.0: :meth:`Family.interwiki_replacements` is deprecated; + use :attr:`Family.code_aliases` instead. +* Keyword argument for *char* parameter of :meth:`Transliterator.transliterate + ` and + positional arguments for *prev* and *succ* parameters are deprecated. +* 10.6.0: Positional arguments of :func:`daemonize()` are deprecated and must + be given as keyword arguments. +* 10.5.0: Accessing the fallback '*' keys in 'languages', 'namespaces', 'namespacealiases', and + 'skins' properties of :attr:`APISite.siteinfo` are + deprecated and will be removed. +* 10.5.0: The methods :meth:`APISite.protection_types() + ` and :meth:`APISite.protection_levels() + ` are deprecated. + :attr:`APISite.restrictions` should be used instead. +* 10.4.0: Require all parameters of :meth:`Site.allpages() + ` except *start* to be keyword arguments. +* 10.4.0: Positional arguments of :class:`pywikibot.Coordinate` are deprecated and must be given as + keyword arguments. +* 10.3.0: :meth:`throttle.Throttle.getDelay` and :meth:`throttle.Throttle.setDelays` were renamed to + :meth:`get_delay()` and :meth:`set_delays() + `; the old methods will be removed (:phab:`T289318`) +* 10.3.0: :attr:`throttle.Throttle.next_multiplicity` attribute is unused and will be removed + (:phab:`T289318`) +* 10.3.0: *requestsize* parameter of :class:`throttle.Throttle` call is deprecated and will be + dropped (:phab:`T289318`) +* 10.3.0: :func:`textlib.to_latin_digits` will be removed in favour of + :func:`textlib.to_ascii_digits`, ``NON_LATIN_DIGITS`` of :mod:`userinterfaces.transliteration` + will be removed in favour of ``NON_ASCII_DIGITS`` (:phab:`T398146#10958283`) +* 10.2.0: :mod:`tools.threading.RLock` is deprecated and moved to :mod:`backports` + module. The :meth:`backports.RLock.count` method is also deprecated. For Python 3.14+ use ``RLock`` + from Python library ``threading`` instead. (:phab:`T395182`) +* 10.1.0: *revid* and *date* parameters of :meth:`Page.authorship() + ` were dropped +* 10.0.0: *last_id* of :class:`comms.eventstreams.EventStreams` was renamed to *last_event_id* + (:phab:`T309380`) +* 10.0.0: 'millenia' argument for *precision* parameter of :class:`pywikibot.WbTime` is deprecated; + 'millennium' must be used instead +* 10.0.0: *includeredirects* parameter of :func:`pagegenerators.AllpagesPageGenerator` and + :func:`pagegenerators.PrefixingPageGenerator` is deprecated and should be replaced by *filterredir* diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000000..2f93dee192 --- /dev/null +++ b/conftest.py @@ -0,0 +1,60 @@ +"""Configuration file for pytest. + +.. versionadded:: 10.3 +""" +# +# (C) Pywikibot team, 2025 +# +# Distributed under the terms of the MIT license. +# +from __future__ import annotations + +import re +from pathlib import Path +from typing import Literal + + +EXCLUDE_PATTERN = re.compile( + r'(?:' + r'(__metadata__|backports|config|cosmetic_changes|daemonize|diff|echo|' + r'exceptions|fixes|logging|plural|time|titletranslate)|' + r'(comms|data|families|specialbots)/__init__|' + r'comms/eventstreams|' + r'data/(api/(__init__|_optionset)|citoid|memento|wikistats)|' + r'families/[a-z][a-z\d]+_family|' + r'page/(__init__|_decorators|_page|_revision)|' + r'pagegenerators/(__init__|_filters)|' + r'scripts/(i18n/)?__init__|' + r'site/(__init__|_basesite|_decorators|_interwikimap|' + r'_tokenwallet|_upload)|' + r'tools/(_deprecate|_logging|_unidata|chars|formatter)|' + r'userinterfaces/(__init__|_interface_base|buffer_interface|' + r'terminal_interface|transliteration)' + r')\.py' +) + + +def pytest_ignore_collect(collection_path: Path, + config) -> Literal[True] | None: + """Ignore files matching EXCLUDE_PATTERN when pytest-mypy is loaded.""" + # Check if any plugin name includes 'mypy' + plugin_names = {p.__class__.__name__.lower() + for p in config.pluginmanager.get_plugins()} + if not any('mypy' in name for name in plugin_names): + return None + + # no cover: start + project_root = Path(__file__).parent / 'pywikibot' + try: + rel_path = collection_path.relative_to(project_root) + except ValueError: + # Ignore files outside project root + return None + + norm_path = rel_path.as_posix() + if EXCLUDE_PATTERN.fullmatch(norm_path): + print(f'Ignoring file in mypy: {norm_path}') # noqa: T201 + return True + + return None + # no cover: stop diff --git a/dev-requirements.txt b/dev-requirements.txt index c5465e1f12..823b1fbbfe 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,20 +1,23 @@ # This is a PIP 6+ requirements file for development dependencies # -pytest >= 8.3.4 -pytest-subtests >= 0.14.1; python_version > "3.8" +pytest >= 8.4.2; python_version > "3.8" +pytest == 8.3.5; python_version < "3.9" +pytest-subtests >= 0.14.2; python_version > "3.8" pytest-subtests == 0.13.1; python_version < "3.9" pytest-attrib>=0.1.3 -pytest-xvfb>=3.0.0 +pytest-xvfb>=3.1.1; python_version > "3.8" +pytest-xvfb==3.0.0; python_version < "3.9" -pre-commit >= 4.2.0; python_version > "3.8" +pre-commit >= 4.3.0; python_version > "3.8" pre-commit == 3.5.0; python_version < "3.9" +coverage>=7.11.0; python_version > "3.9" +coverage==7.10.7; python_version == "3.9" coverage==7.6.1; python_version < "3.9" -coverage>=7.6.12; python_version > "3.8" # required for coverage (T380697) -tomli>=2.2.1; python_version < "3.11" +tomli>=2.3.0; python_version < "3.11" # optional but needed for tests -fake-useragent >= 2.0.3; python_version > "3.8" +fake-useragent >= 2.2.0; python_version > "3.8" fake-useragent == 1.5.1; python_version < "3.9" diff --git a/docs/api_ref/pywikibot.data.rst b/docs/api_ref/pywikibot.data.rst index 0caf1b9734..4a5693d7ca 100644 --- a/docs/api_ref/pywikibot.data.rst +++ b/docs/api_ref/pywikibot.data.rst @@ -11,12 +11,20 @@ .. automodule:: data.api :synopsis: Module providing several layers of data access to the wiki +:mod:`data.citoid` --- Citoid Requests +====================================== + +.. automodule:: data.citoid + :synopsis: Citoid Query interface + :mod:`data.memento` --- Memento Requests ======================================== .. automodule:: data.memento :synopsis: Fix ups for memento-client package version 0.6.1 +.. autodata:: data.memento.DEFAULT_TIMEGATE_BASE_URI + :mod:`data.mysql` --- Mysql Requests ==================================== diff --git a/docs/api_ref/pywikibot.page.rst b/docs/api_ref/pywikibot.page.rst index 7d38815dbc..0ea0b77390 100644 --- a/docs/api_ref/pywikibot.page.rst +++ b/docs/api_ref/pywikibot.page.rst @@ -32,6 +32,8 @@ .. automodule:: pywikibot.page._collections :synopsis: Structures holding data for Wikibase entities +.. autoclass:: BaseDataDict + :mod:`page.\_decorators` --- Page Decorators ============================================ diff --git a/docs/changelog.rst b/docs/changelog.rst index de64526e5a..5a6a5bacb1 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,7 +2,7 @@ Change log ********** -What is new with Pywikibot |release|? What are the main changes of older version? +New features, improvements, and fixes in Pywikibot |release|. .. include:: ../ROADMAP.rst diff --git a/docs/conf.py b/docs/conf.py index 1da84b8b74..63801d7d6a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,16 +24,12 @@ import os import re import sys +import tomllib import warnings +from itertools import pairwise from pathlib import Path -try: - import tomllib -except ImportError: - import tomli as tomllib - - # Deprecated classes will generate warnings as Sphinx processes them. # Ignoring them. @@ -50,7 +46,7 @@ # If your documentation needs a minimal Sphinx version, state it here. # -needs_sphinx = '8.2.1' +needs_sphinx = '8.2.3' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -529,55 +525,75 @@ def pywikibot_docstring_fixups(app, what, name, obj, options, lines) -> None: def pywikibot_script_docstring_fixups(app, what, name, obj, options, lines) -> None: - """Pywikibot specific conversions.""" + """Pywikibot-specific docstring conversions for scripts.""" from scripts.cosmetic_changes import warning if what != 'module' or 'scripts.' not in name: return - length = 0 - desc = '' - for index, line in enumerate(lines): - # highlight the first line - if index == 0: # highlight the first line - lines[0] = f"**{line.strip('.')}**" + if not lines: + return + + nextline = None + # highlight the first line + newlines = [f"**{lines[0].strip('.')}**"] + + for previous, line in pairwise(lines): # add link for pagegenerators options - elif line == '¶ms;': - lines[index] = ('This script supports use of ' - ':py:mod:`pagegenerators` arguments.') + if line == '¶ms;': + newlines.append( + 'This script supports use of :mod:`pagegenerators` arguments.') + continue # add link for fixes - elif name == 'scripts.replace' and line == '&fixes-help;': - lines[index] = (' The available fixes are listed ' - 'in :py:mod:`pywikibot.fixes`.') + if name == 'scripts.replace' and line == '&fixes-help;': + newlines.append(' The available fixes are ' + 'listed in :mod:`pywikibot.fixes`.') + continue # replace cosmetic changes warning - elif name == 'scripts.cosmetic_changes' and line == '&warning;': - lines[index] = warning + if name == 'scripts.cosmetic_changes' and line == '&warning;': + newlines.append(warning) + continue # adjust options: if the option contains a colon, convert it to a # definition list and mark the option with a :kbd: role. Also convert # option types enclosed in square brackets to italic style. if line.startswith('-'): # extract term and wrap it with :kbd: role - match = re.fullmatch(r'(-\w.+?[^ ])( {2,})(.+)', line) + match = re.fullmatch(r'(-\w\S+)(?:( {2,})(.+))?', line) if match: opt, sp, desc = match.groups() - desc = re.sub(r'\[(float|int|str)\]', r'*(\1)*', desc) - if ':' in opt or ' ' in opt and ', ' not in opt: + sp = sp or '' + desc = desc or '' + # make [type] italic + types = '(?:float|int|str)' + desc = re.sub(rf'\[({types}(?:\|{types})*)\]', r'*(\1)*', desc) + show_as_kbd = ':' in opt or (' ' in opt and ', ' not in opt) + if show_as_kbd: + # extract term and wrap it with :kbd: role + if previous: + # add an empty line if previous is not empty + newlines.append('') length = len(opt + sp) - lines[index] = f':kbd:`{opt}`' + newlines.append(f':kbd:`{opt}`') + # add the description to a new line later + if desc: + nextline = length, desc else: - lines[index] = f'{opt}{sp}{desc}' - - elif length and (not line or line.startswith(' ' * length)): - # Add descriptions to the next line - lines[index] = ' ' * length + f'{desc} {line.strip()}' - length = 0 - elif line: - # Reset length - length = 0 + newlines.append(f'{opt}{sp}{desc}') + continue + + if nextline: + spaces = len(line) - len(line.lstrip()) or nextline[0] + newlines.append(' ' * spaces + nextline[1]) + nextline = None + + newlines.append(line) + + # Overwrite original lines in-place for autodoc + lines[:] = newlines def setup(app) -> None: diff --git a/docs/index.rst b/docs/index.rst index 46b4fe6b1b..1673cd24a4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -21,7 +21,8 @@ system that has a compatible version of Python installed. To check whether you have Python installed and to find its version, just type ``python`` at the CMD or shell prompt. -Python 3.8 or higher is currently required to run. +Python 3.8 or higher is currently required to run the bot but Python 3.9 or +higher is recommended. Python 3.8 support will be dropped with Pywikibot 11 soon. Pywikibot and this documentation are licensed under the :ref:`MIT license`; diff --git a/docs/mwapi.rst b/docs/mwapi.rst index f8a9748ae4..10a90e0e93 100644 --- a/docs/mwapi.rst +++ b/docs/mwapi.rst @@ -103,7 +103,7 @@ See the table below for a cross reference between MediaWiki's API and Pywikibot' - * - :api:`rollback` - :meth:`rollbackpage()` - - + - meth:`BasePage.rollback()` - * - :api:`shortenurl` - :meth:`create_short_link()` diff --git a/docs/requirements.txt b/docs/requirements.txt index 1a29bd5730..40ec37e3fa 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,10 +1,9 @@ # This is a PIP requirements file for building Sphinx documentation of Pywikibot # ../requirements.txt is also needed. # Note: Python 3.11 is required for sphinx 8.2 -sphinx >= 8.2.1 -rstcheck >=6.2.4 -sphinxext-opengraph >= 0.9.1 +sphinx >= 8.2.3 +rstcheck >=6.2.5 +sphinxext-opengraph >= 0.13.0 sphinx-copybutton >= 0.5.2 sphinx-tabs >= 3.4.7 -tomli >= 2.2.1; python_version < '3.11' -furo >= 2024.8.6 +furo >= 2025.9.25 diff --git a/docs/scripts/general.rst b/docs/scripts/general.rst index fda9d1500d..9498730bce 100644 --- a/docs/scripts/general.rst +++ b/docs/scripts/general.rst @@ -22,3 +22,10 @@ pagefromfile script .. automodule:: scripts.pagefromfile :no-members: :noindex: + +tracking param remover script +============================= + +.. automodule:: scripts.tracking_param_remover + :no-members: + :noindex: diff --git a/docs/scripts_ref/scripts.rst b/docs/scripts_ref/scripts.rst index 814f3338e3..34c1db6677 100644 --- a/docs/scripts_ref/scripts.rst +++ b/docs/scripts_ref/scripts.rst @@ -244,6 +244,11 @@ touch script .. automodule:: scripts.touch +tracking param remover script +============================= + +.. automodule:: scripts.tracking_param_remover + transferbot script ================== diff --git a/docs/tests_ref/index.rst b/docs/tests_ref/index.rst index 4c782e372a..2d4eefce44 100644 --- a/docs/tests_ref/index.rst +++ b/docs/tests_ref/index.rst @@ -15,3 +15,5 @@ Test utilities aspects basepage utils + precommit + pytest diff --git a/docs/tests_ref/precommit.rst b/docs/tests_ref/precommit.rst new file mode 100644 index 0000000000..1fb5769b58 --- /dev/null +++ b/docs/tests_ref/precommit.rst @@ -0,0 +1,8 @@ +********************** +precommit hooks module +********************** + +.. automodule:: tests.hooks.copyright_fixer + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/tests_ref/pytest.rst b/docs/tests_ref/pytest.rst new file mode 100644 index 0000000000..5a3d69ec9c --- /dev/null +++ b/docs/tests_ref/pytest.rst @@ -0,0 +1,8 @@ +************************* +pytest mypy plugin module +************************* + +.. automodule:: conftest + :members: + :undoc-members: + :show-inheritance: diff --git a/make_dist.py b/make_dist.py index 72dfea631a..238928ac65 100755 --- a/make_dist.py +++ b/make_dist.py @@ -52,7 +52,7 @@ The pywikibot-scripts distribution can be created. """ # -# (C) Pywikibot team, 2022-2024 +# (C) Pywikibot team, 2022-2025 # # Distributed under the terms of the MIT license. # @@ -228,7 +228,7 @@ class SetupScripts(SetupBase): package = 'pywikibot_scripts' replace = 'MANIFEST.in', 'pyproject.toml', 'setup.py' - def copy_files(self) -> None: + def copy_files(self) -> None: # pragma: no cover """Ignore copy files yet.""" info('<>Copy files ...', newline=False) for filename in self.replace: @@ -238,7 +238,7 @@ def copy_files(self) -> None: shutil.copy(self.folder / 'scripts' / filename, self.folder) info('<>done') - def cleanup(self) -> None: + def cleanup(self) -> None: # pragma: no cover """Ignore cleanup yet.""" info('<>Copy files ...', newline=False) for filename in self.replace: @@ -247,13 +247,13 @@ def cleanup(self) -> None: info('<>done') -def handle_args() -> tuple[bool, bool, bool, bool]: +def handle_args() -> tuple[bool, bool, bool, bool, bool]: """Handle arguments and print documentation if requested. :return: Return whether dist is to be installed locally or to be uploaded """ - if '-help' in sys.argv: + if '-help' in sys.argv: # pragma: no cover import re import setup diff --git a/pwb.py b/pwb.py index 9c4af3b36e..07debd327f 100755 --- a/pwb.py +++ b/pwb.py @@ -18,6 +18,12 @@ This version of Pywikibot only supports Python 3.8+. """ +DEPRECATED_PYTHON_MESSAGE = """ + +Python {version} will be dropped soon with Pywikibot 11. +It is recommended to use Python 3.9 or above. +See phab: T401802 for further information. +""" def python_is_supported(): @@ -25,9 +31,21 @@ def python_is_supported(): return sys.version_info[:3] >= (3, 8) +def python_is_deprecated(): + """Check that Python is deprecated.""" + return sys.version_info[:3] < (3, 9) + + if not python_is_supported(): # pragma: no cover sys.exit(VERSIONS_REQUIRED_MESSAGE.format(version=sys.version)) +if python_is_deprecated(): + import warnings + msg = DEPRECATED_PYTHON_MESSAGE.format( + version=sys.version.split(maxsplit=1)[0]) + warnings.warn(msg, FutureWarning) # adjust this line no in utils.execute() + del warnings + def main() -> None: """Entry point for :func:`tests.utils.execute_pwb`.""" diff --git a/pyproject.toml b/pyproject.toml index cea7b2762e..9c305cd163 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,14 @@ classifiers=[ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: 3.15", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Wiki", @@ -123,32 +131,33 @@ Tracker = "https://phabricator.wikimedia.org/tag/pywikibot/" ignore_errors = true skip_empty = true -exclude_lines = [ - # Have to re-enable the standard pragma - "pragma: no cover", - "except ImportError", +exclude_also = [ + "@(abc\\.)?abstractmethod", + "@deprecated\\([^\\)]+\\)", + "@unittest\\.skip", + "class .+\\bProtocol\\):", + "except (ImportError|ModuleNotFoundError)", "except KeyboardInterrupt", "except OSError", - "except \\w*ServerError", "except SyntaxError", - "raise ImportError", - "raise NotImplementedError", - "raise unittest\\.SkipTest", - "self\\.skipTest", - "if __name__ == '__main__':", + "except \\w*ServerError", + "if (0|False):", "if .+PYWIKIBOT_TEST_\\w+.+:", + "if (typing\\.)?TYPE_CHECKING:", + "if __debug__:", + "if __name__ == .__main__.:", "if self\\.mw_version < .+:", - "if TYPE_CHECKING:", - "@(abc\\.)?abstractmethod", - "@deprecated\\([^\\)]+\\)", - "@unittest\\.skip", -] - -exclude_also = [ # Comments to turn coverage on and off: "no cover: start(?s:.)*?no cover: stop", + "raise (ImportError|ModuleNotFoundError)", + "raise NotImplementedError", + "raise unittest\\.SkipTest", + "self\\.skipTest", ] +[tool.coverage.run] +concurrency = ["multiprocessing", "thread"] +parallel = true [tool.docsig] disable = [ @@ -174,10 +183,11 @@ include_trailing_comma = true lines_after_imports = 2 multi_line_output = 3 use_parentheses = true +extra_standard_library = ["tomllib"] [tool.mypy] -python_version = 3.8 +python_version = "3.9" enable_error_code = [ "ignore-without-code", ] @@ -185,7 +195,7 @@ ignore_missing_imports = true [tool.rstcheck] -ignore_directives = ["automodule", "autoclass", "autofunction", "tabs"] +ignore_directives = ["automodule", "autoclass", "autodata", "autofunction", "tabs"] ignore_messages = '(Undefined substitution referenced: "(release|today|version)")' ignore_roles = ["api", "phab", "pylib", "source", "wiki"] diff --git a/pywikibot/CONTENT.rst b/pywikibot/CONTENT.rst index 2ff8924d94..8fc54f8cfb 100644 --- a/pywikibot/CONTENT.rst +++ b/pywikibot/CONTENT.rst @@ -87,7 +87,9 @@ The contents of the package +----------------------------+------------------------------------------------------+ | data | Module providing layers of data access to wiki | +============================+======================================================+ - | api.py | Interface Module to MediaWiki's api | + | __init__.py | WaitingMixin: A mixin to implement wait cycles | + +----------------------------+------------------------------------------------------+ + | api (folder) | Interface Module to MediaWiki's api | | +----------------+-------------------------------------+ | | __init__.py | Interface to MediaWiki's api.php | | +----------------+-------------------------------------+ @@ -99,12 +101,16 @@ The contents of the package | +----------------+-------------------------------------+ | | _requests.py | API Requests interface | +----------------------------+----------------+-------------------------------------+ + | citoid.py | Citoid Query interface | + +----------------------------+------------------------------------------------------+ | memento.py | memento_client 0.6.1 package fix | +----------------------------+------------------------------------------------------+ | mysql.py | Miscellaneous helper functions for mysql queries | +----------------------------+------------------------------------------------------+ | sparql.py | Objects representing SPARQL query API | +----------------------------+------------------------------------------------------+ + | superset.py | Superset Query interface | + +----------------------------+------------------------------------------------------+ | wikistats.py | Objects representing WikiStats API | +----------------------------+------------------------------------------------------+ @@ -140,6 +146,8 @@ The contents of the package +----------------------------+------------------------------------------------------+ | pagegenerators | Page generators module | +============================+======================================================+ + | __init__.py | Page generators options and special page generators | + +----------------------------+------------------------------------------------------+ | _factory.py | Generator factory class to handle options | +----------------------------+------------------------------------------------------+ | _filter.py | Filter functions | @@ -196,6 +204,8 @@ The contents of the package +----------------------------+------------------------------------------------------+ | _tokenwallet.py | Objects representing api tokens | +----------------------------+------------------------------------------------------+ + | _upload.py | Objects representing API upload to MediaWiki sites | + +----------------------------+------------------------------------------------------+ +----------------------------+------------------------------------------------------+ @@ -236,10 +246,12 @@ The contents of the package +----------------------------+------------------------------------------------------+ - | User Interface | + | userinterfaces | User Interfaces | +============================+======================================================+ | _interface_base.py | Abstract base user interface module | +----------------------------+------------------------------------------------------+ + | buffer_interface.py | Non-interactive interface that stores output | + +----------------------------+------------------------------------------------------+ | gui.py | GUI with a Unicode textfield where the user can edit | +----------------------------+------------------------------------------------------+ | terminal_interface.py | Platform independent terminal interface module | diff --git a/pywikibot/__metadata__.py b/pywikibot/__metadata__.py index 4e9889b3da..53458c1208 100644 --- a/pywikibot/__metadata__.py +++ b/pywikibot/__metadata__.py @@ -12,6 +12,6 @@ from time import strftime -__version__ = '10.3.0.dev0' +__version__ = '10.7.0' __url__ = 'https://www.mediawiki.org/wiki/Manual:Pywikibot' __copyright__ = f'2003-{strftime("%Y")}, Pywikibot team' diff --git a/pywikibot/_wbtypes.py b/pywikibot/_wbtypes.py index 8ca75578c4..89dea48a51 100644 --- a/pywikibot/_wbtypes.py +++ b/pywikibot/_wbtypes.py @@ -12,6 +12,7 @@ import math import re from collections.abc import Mapping +from contextlib import suppress from decimal import Decimal from typing import TYPE_CHECKING, Any @@ -19,14 +20,20 @@ from pywikibot import exceptions from pywikibot.backports import Iterator from pywikibot.time import Timestamp -from pywikibot.tools import issue_deprecation_warning, remove_last_args +from pywikibot.tools import ( + deprecated_signature, + issue_deprecation_warning, + remove_last_args, +) if TYPE_CHECKING: + from typing import Union, cast + from pywikibot.site import APISite, BaseSite, DataSite - ItemPageStrNoneType = str | pywikibot.ItemPage | None - ToDecimalType = int | float | str | Decimal | None + ItemPageStrNoneType = Union[str, pywikibot.ItemPage, None] + ToDecimalType = Union[int, float, str, Decimal, None] __all__ = ( @@ -44,9 +51,11 @@ class WbRepresentation(abc.ABC): """Abstract class for Wikibase representations.""" + _items: tuple[str, ...] + @abc.abstractmethod def __init__(self) -> None: - """Constructor.""" + """Initializer.""" raise NotImplementedError @abc.abstractmethod @@ -65,28 +74,37 @@ def fromWikibase( raise NotImplementedError def __str__(self) -> str: - return json.dumps(self.toWikibase(), indent=4, sort_keys=True, - separators=(',', ': ')) + return json.dumps( + self.toWikibase(), + indent=4, + sort_keys=True, + separators=(',', ': ') + ) def __repr__(self) -> str: + """String representation of this object. + + .. versionchanged:: 10.4 + Parameters are shown as representations instead of plain + strings. + + :meta public: + """ assert isinstance(self._items, tuple) assert all(isinstance(item, str) for item in self._items) - values = ((attr, getattr(self, attr)) for attr in self._items) - attrs = ', '.join(f'{attr}={value}' - for attr, value in values) - return f'{self.__class__.__name__}({attrs})' + attrs = ', '.join(f'{attr}={getattr(self, attr)!r}' + for attr in self._items) + return f'{type(self).__name__}({attrs})' def __eq__(self, other: object) -> bool: if isinstance(other, self.__class__): return self.toWikibase() == other.toWikibase() + return NotImplemented def __hash__(self) -> int: - return hash(frozenset(self.toWikibase().items())) - - def __ne__(self, other: object) -> bool: - return not self.__eq__(other) + return hash(json.dumps(self.toWikibase(), sort_keys=True)) class Coordinate(WbRepresentation): @@ -95,28 +113,40 @@ class Coordinate(WbRepresentation): _items = ('lat', 'lon', 'entity') - def __init__(self, lat: float, lon: float, alt: float | None = None, - precision: float | None = None, - globe: str | None = None, typ: str = '', - name: str = '', dim: int | None = None, - site: DataSite | None = None, - globe_item: ItemPageStrNoneType = None, - primary: bool = False) -> None: + @deprecated_signature(since='10.4.0') + def __init__( + self, + lat: float, + lon: float, + *, + alt: float | None = None, + precision: float | None = None, + globe: str | None = None, + typ: str = '', + name: str = '', + dim: int | None = None, + site: DataSite | None = None, + globe_item: ItemPageStrNoneType = None, + primary: bool = False + ) -> None: """Represent a geo coordinate. - :param lat: Latitude - :param lon: Longitude - :param alt: Altitude - :param precision: precision - :param globe: Which globe the point is on - :param typ: The type of coordinate point - :param name: The name - :param dim: Dimension (in meters) - :param site: The Wikibase site - :param globe_item: The Wikibase item for the globe, or the - entity URI of this Wikibase item. Takes precedence over - 'globe' if present. - :param primary: True for a primary set of coordinates + .. versionchanged:: 10.4 + The parameters after `lat` and `lon` are now keyword-only. + + :param lat: Latitude coordinate + :param lon: Longitude coordinate + :param alt: Altitude in meters + :param precision: Precision of the coordinate + :param globe: The globe the coordinate is on (e.g. 'earth') + :param typ: Type of coordinate point + :param name: Name associated with the coordinate + :param dim: Dimension in meters used for precision calculation + :param site: The Wikibase site instance + :param globe_item: Wikibase item or entity URI for the globe; + takes precedence over *globe* + :param primary: Indicates if this is a primary coordinate set + (default: False) """ self.lat = lat self.lon = lon @@ -137,11 +167,16 @@ def __init__(self, lat: float, lon: float, alt: float | None = None, @property def entity(self) -> str: - """Return the entity uri of the globe.""" + """Return the entity URI of the globe. + + :raises CoordinateGlobeUnknownError: the globe is not supported + by Wikibase + """ if not self._entity: if self.globe not in self.site.globes(): raise exceptions.CoordinateGlobeUnknownError( f'{self.globe} is not supported in Wikibase yet.') + return self.site.globes()[self.globe] if isinstance(self._entity, pywikibot.ItemPage): @@ -152,37 +187,41 @@ def entity(self) -> str: def toWikibase(self) -> dict[str, Any]: """Export the data to a JSON object for the Wikibase API. - FIXME: Should this be in the DataSite object? - - :return: Wikibase JSON + :return: Wikibase JSON representation of the coordinate """ - return {'latitude': self.lat, - 'longitude': self.lon, - 'altitude': self.alt, - 'globe': self.entity, - 'precision': self.precision, - } + return { + 'latitude': self.lat, + 'longitude': self.lon, + 'altitude': self.alt, + 'globe': self.entity, + 'precision': self.precision, + } @classmethod def fromWikibase(cls, data: dict[str, Any], site: DataSite | None = None) -> Coordinate: - """Constructor to create an object from Wikibase's JSON output. + """Create an object from Wikibase's JSON output. - :param data: Wikibase JSON - :param site: The Wikibase site + :param data: Wikibase JSON data + :param site: The Wikibase site instance + :return: Coordinate instance """ - if site is None: - site = pywikibot.Site().data_repository() - + site = site or pywikibot.Site().data_repository() globe = None - if data['globe']: + if data.get('globe'): globes = {entity: name for name, entity in site.globes().items()} globe = globes.get(data['globe']) - return cls(data['latitude'], data['longitude'], - data['altitude'], data['precision'], - globe, site=site, globe_item=data['globe']) + return cls( + data['latitude'], + data['longitude'], + alt=data.get('altitude'), + precision=data.get('precision'), + globe=globe, + site=site, + globe_item=data.get('globe') + ) @property def precision(self) -> float | None: @@ -214,17 +253,28 @@ def precision(self) -> float | None: precision = math.degrees( self._dim / (radius * math.cos(math.radians(self.lat)))) + + :return: precision in degrees or None """ - if self._dim is None and self._precision is None: + if self._precision is not None: + return self._precision + + if self._dim is None: return None - if self._precision is None and self._dim is not None: - radius = 6378137 # TODO: Support other globes + + radius = 6378137 # Earth radius in meters (TODO: support other globes) + with suppress(ZeroDivisionError): self._precision = math.degrees( self._dim / (radius * math.cos(math.radians(self.lat)))) + return self._precision @precision.setter def precision(self, value: float) -> None: + """Set the precision value. + + :param value: precision in degrees + """ self._precision = value def precisionToDim(self) -> int | None: @@ -251,38 +301,50 @@ def precisionToDim(self) -> int | None: But this is not valid, since it returns a float value for dim which is an integer. We must round it off to the nearest integer. - Therefore:: + Therefore: + + .. code-block:: python - dim = int(round(math.radians( - precision)*radius*math.cos(math.radians(self.lat)))) + dim = int(round(math.radians( + precision)*radius*math.cos(math.radians(self.lat)))) + + :return: dimension in meters + :raises ValueError: if neither dim nor precision is set """ - if self._dim is None and self._precision is None: + if self._dim is not None: + return self._dim + + if self._precision is None: raise ValueError('No values set for dim or precision') - if self._dim is None and self._precision is not None: - radius = 6378137 - self._dim = int( - round( - math.radians(self._precision) * radius * math.cos( - math.radians(self.lat)) - ) + + radius = 6378137 + self._dim = int( + round( + math.radians(self._precision) * radius * math.cos( + math.radians(self.lat)) ) + ) return self._dim - def get_globe_item(self, repo: DataSite | None = None, + @deprecated_signature(since='10.4.0') + def get_globe_item(self, repo: DataSite | None = None, *, lazy_load: bool = False) -> pywikibot.ItemPage: """Return the ItemPage corresponding to the globe. - Note that the globe need not be in the same data repository as - the Coordinate itself. + .. note:: The globe need not be in the same data repository as + the Coordinate itself. A successful lookup is stored as an internal value to avoid the need for repeated lookups. + .. versionchanged:: 10.4 + The *lazy_load* parameter is now keyword-only. + :param repo: the Wikibase site for the globe, if different from - that provided with the Coordinate. - :param lazy_load: Do not raise NoPage if ItemPage does not - exist. - :return: pywikibot.ItemPage + that provided with the Coordinate + :param lazy_load: Do not raise :exc:`exceptions.NoPageError` if + ItemPage does not exist + :return: pywikibot.ItemPage of the globe """ if isinstance(self._entity, pywikibot.ItemPage): return self._entity @@ -313,7 +375,7 @@ class _Precision(Mapping): 'second': 14, } - def __getitem__(self, key) -> int: + def __getitem__(self, key: str) -> int: if key == 'millenia': issue_deprecation_warning( f'{key!r} key for precision', "'millennium'", since='10.0.0') @@ -321,7 +383,7 @@ def __getitem__(self, key) -> int: return self.PRECISION[key] - def __iter__(self) -> Iterator[int]: + def __iter__(self) -> Iterator[str]: return iter(self.PRECISION) def __len__(self) -> int: @@ -343,6 +405,12 @@ class WbTime(WbRepresentation): :class:`pywikibot.Timestamp` and :meth:`fromTimestamp`. """ + month: int + day: int + hour: int + minute: int + second: int + PRECISION = _Precision() FORMATSTR = '{0:+012d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}Z' @@ -365,19 +433,26 @@ class WbTime(WbRepresentation): 12: 334, # Nov -> Dec: 30 days, plus 304 days in Jan -> Nov } - def __init__(self, - year: int | None = None, - month: int | None = None, - day: int | None = None, - hour: int | None = None, - minute: int | None = None, - second: int | None = None, - precision: int | str | None = None, - before: int = 0, - after: int = 0, - timezone: int = 0, - calendarmodel: str | None = None, - site: DataSite | None = None) -> None: + _timestr_re = re.compile( + r'([-+]?\d{1,16})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z') + + @deprecated_signature(since='10.4.0') + def __init__( + self, + year: int, + month: int | None = None, + day: int | None = None, + hour: int | None = None, + minute: int | None = None, + second: int | None = None, + *, + precision: int | str | None = None, + before: int = 0, + after: int = 0, + timezone: int = 0, + calendarmodel: str | None = None, + site: DataSite | None = None + ) -> None: """Create a new WbTime object. The precision can be set by the Wikibase int value (0-14) or by @@ -406,6 +481,11 @@ def __init__(self, *precision* value 'millenia' is deprecated; 'millennium' must be used instead. + .. versionchanged:: 10.4 + The parameters except timestamp values are now keyword-only. + A TypeError is raised if *year* is not an int. Previously, a + ValueError was raised if *year* was None. + :param year: The year as a signed integer of between 1 and 16 digits. :param month: Month of the timestamp, if it exists. @@ -427,56 +507,55 @@ def __init__(self, :param site: The Wikibase site. If not provided, retrieves the data repository from the default site from user-config.py. Only used if calendarmodel is not given. + :raises TypeError: Invalid *year* type. + :raises ValueError: Invalid *precision* or *site* or default + site has no data repository. """ - if year is None: - raise ValueError('no year given') - self.precision = self.PRECISION['year'] - if month is not None: - self.precision = self.PRECISION['month'] - else: - month = 1 - if day is not None: - self.precision = self.PRECISION['day'] - else: - day = 1 - if hour is not None: - self.precision = self.PRECISION['hour'] - else: - hour = 0 - if minute is not None: - self.precision = self.PRECISION['minute'] - else: - minute = 0 - if second is not None: - self.precision = self.PRECISION['second'] - else: - second = 0 + if not isinstance(year, int): + raise TypeError(f'year must be an int, not {type(year).__name__}') + + units = [ + ('month', month, 1), + ('day', day, 1), + ('hour', hour, 0), + ('minute', minute, 0), + ('second', second, 0), + ] + + # set unit attribute values self.year = year - self.month = month - self.day = day - self.hour = hour - self.minute = minute - self.second = second + for unit, value, default in units: + setattr(self, unit, value if value is not None else default) + + if precision is None: + # Autodetection of precision based on the passed time values + prec = self.PRECISION['year'] + + for unit, value, _ in units: + if value is not None: + prec = self.PRECISION[unit] + else: + # explicit precision is given + if (isinstance(precision, int) + and precision in self.PRECISION.values()): + prec = precision + elif isinstance(precision, str) and precision in self.PRECISION: + prec = self.PRECISION[cast(str, precision)] + else: + raise ValueError(f'Invalid precision: "{precision}"') + + self.precision = prec self.after = after self.before = before self.timezone = timezone if calendarmodel is None: + site = site or pywikibot.Site().data_repository() if site is None: - site = pywikibot.Site().data_repository() - if site is None: - raise ValueError( - f'Site {pywikibot.Site()} has no data repository') + raise ValueError( + f'Site {pywikibot.Site()} has no data repository') calendarmodel = site.calendarmodel() + self.calendarmodel = calendarmodel - # if precision is given it overwrites the autodetection above - if precision is not None: - if (isinstance(precision, int) - and precision in self.PRECISION.values()): - self.precision = precision - elif precision in self.PRECISION: - self.precision = self.PRECISION[precision] - else: - raise ValueError(f'Invalid precision: "{precision}"') def _getSecondsAdjusted(self) -> int: """Return an internal representation of the time object as seconds. @@ -572,60 +651,78 @@ def equal_instant(self, other: WbTime) -> bool: return self._getSecondsAdjusted() == other._getSecondsAdjusted() @classmethod - def fromTimestr(cls, - datetimestr: str, - precision: int | str = 14, - before: int = 0, - after: int = 0, - timezone: int = 0, - calendarmodel: str | None = None, - site: DataSite | None = None) -> WbTime: + @deprecated_signature(since='10.4.0') + def fromTimestr( + cls, + datetimestr: str, + *, + precision: int | str = 14, + before: int = 0, + after: int = 0, + timezone: int = 0, + calendarmodel: str | None = None, + site: DataSite | None = None + ) -> WbTime: """Create a new WbTime object from a UTC date/time string. - The timestamp differs from ISO 8601 in that: - - * The year is always signed and having between 1 and 16 digits; - * The month, day and time are zero if they are unknown; - * The Z is discarded since time zone is determined from the timezone - param. - - :param datetimestr: Timestamp in a format resembling ISO 8601, - e.g. +2013-01-01T00:00:00Z - :param precision: The unit of the precision of the time. Defaults to - 14 (second). - :param before: Number of units after the given time it could be, if - uncertain. The unit is given by the precision. - :param after: Number of units before the given time it could be, if - uncertain. The unit is given by the precision. - :param timezone: Timezone information in minutes. + The timestamp format must match a string resembling ISO 8601 + with the following constraints: + + - Year is signed and can have between 1 and 16 digits. + - Month, day, hour, minute and second are always two digits. + They may be zero. + - Time is always in UTC and ends with ``Z``. + - Example: ``+0000000000123456-01-01T00:00:00Z``. + + .. versionchanged:: 10.4 + The parameters except *datetimestr* are now keyword-only. + + :param datetimestr: Timestamp string to parse + :param precision: The unit of the precision of the time. Defaults + to 14 (second). + :param before: Number of units after the given time it could be, + if uncertain. The unit is given by the precision. + :param after: Number of units before the given time it could be, + if uncertain. The unit is given by the precision. + :param timezone: Timezone offset in minutes. :param calendarmodel: URI identifying the calendar model. - :param site: The Wikibase site. If not provided, retrieves the data - repository from the default site from user-config.py. + :param site: The Wikibase site. If not provided, retrieves the + data repository from the default site from user-config.py. Only used if calendarmodel is not given. + :raises ValueError: If the string does not match the expected + format. """ - match = re.match(r'([-+]?\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z', - datetimestr) + match = cls._timestr_re.match(datetimestr) if not match: raise ValueError(f"Invalid format: '{datetimestr}'") + t = match.groups() return cls(int(t[0]), int(t[1]), int(t[2]), int(t[3]), int(t[4]), int(t[5]), - precision, before, after, timezone, calendarmodel, site) + precision=precision, before=before, after=after, + timezone=timezone, calendarmodel=calendarmodel, site=site) @classmethod - def fromTimestamp(cls, - timestamp: Timestamp, - precision: int | str = 14, - before: int = 0, - after: int = 0, - timezone: int = 0, - calendarmodel: str | None = None, - site: DataSite | None = None, - copy_timezone: bool = False) -> WbTime: + @deprecated_signature(since='10.4.0') + def fromTimestamp( + cls, + timestamp: Timestamp, + *, + precision: int | str = 14, + before: int = 0, + after: int = 0, + timezone: int = 0, + calendarmodel: str | None = None, + site: DataSite | None = None, + copy_timezone: bool = False + ) -> WbTime: """Create a new WbTime object from a pywikibot.Timestamp. .. versionchanged:: 8.0 Added *copy_timezone* parameter. + .. versionchanged:: 10.4 + The parameters except *timestamp* are now keyword-only. + :param timestamp: Timestamp :param precision: The unit of the precision of the time. @@ -650,6 +747,89 @@ def fromTimestamp(cls, before=before, after=after, timezone=timezone, calendarmodel=calendarmodel, site=site) + @staticmethod + def _normalize_millennium(year: int) -> int: + """Round the given year to the start of its millennium. + + The rounding is performed towards positive infinity for positive + years and towards negative infinity for negative years. + + .. versionadded:: 10.4 + + :param year: The year as an integer. + :return: The first year of the millennium containing the given + year. + """ + # For negative years, floor rounds away from zero to correctly handle + # BCE dates. For positive years, ceil rounds up to the next + # millennium/century. + year_float = year / 1000 + if year_float < 0: + year = math.floor(year_float) + else: + year = math.ceil(year_float) + return year * 1000 + + @staticmethod + def _normalize_century(year: int) -> int: + """Round the given year to the start of its century. + + The rounding is performed towards positive infinity for positive + years and towards negative infinity for negative years. + + .. versionadded:: 10.4 + + :param year: The year as an integer. + :return: The first year of the century containing the given year. + """ + # For century, -1301 is the same century as -1400 but not -1401. + # Similar for 1901 and 2000 vs 2001. + year_float = year / 100 + if year_float < 0: + year = math.floor(year_float) + else: + year = math.ceil(year_float) + return year * 100 + + @staticmethod + def _normalize_decade(year: int) -> int: + """Round the given year down to the start of its decade. + + Unlike millennium or century normalization, this always + truncates towards zero. + + .. versionadded:: 10.4 + + :param year: The year as an integer. + :return: The first year of the decade containing the given year. + """ + # For decade, -1340 is the same decade as -1349 but not -1350. + # Similar for 2010 and 2019 vs 2020 + year_float = year / 10 + year = math.trunc(year_float) + return year * 10 + + @staticmethod + def _normalize_power_of_ten(year: int, precision: int) -> int: + """Round the year to the given power-of-ten precision. + + This is used for very coarse historical precision levels, where + the time unit represents a power-of-ten number of years. + + .. versionadded:: 10.4 + + :param year: The year as an integer. + :param precision: The precision level (Wikibase int value). + :return: The normalized year rounded to the nearest matching + power-of-ten boundary. + """ + # Wikidata rounds the number based on the first non-decimal digit. + # Python's round function will round -15.5 to -16, and +15.5 to +16 + # so we don't need to do anything complicated like the other + # examples. + power_of_10 = 10 ** (9 - precision) + return round(year / power_of_10) * power_of_10 + def normalize(self) -> WbTime: """Normalizes the WbTime object to account for precision. @@ -663,45 +843,24 @@ def normalize(self) -> WbTime: Normalization will delete timezone information if the precision is less than or equal to DAY. - Note: Normalized WbTime objects can only be compared to other - normalized WbTime objects of the same precision. Normalization - might make a WbTime object that was less than another WbTime object - before normalization, greater than it after normalization, or vice - versa. + .. note:: Normalized WbTime objects can only be compared to + other normalized WbTime objects of the same precision. + Normalization might make a WbTime object that was less than + another WbTime object before normalization, greater than it + after normalization, or vice versa. """ year = self.year - # This is going to get messy. - if self.PRECISION['1000000000'] <= self.precision <= self.PRECISION['10000']: # noqa: E501 - # 1000000000 == 10^9 - power_of_10 = 10 ** (9 - self.precision) - # Wikidata rounds the number based on the first non-decimal digit. - # Python's round function will round -15.5 to -16, and +15.5 to +16 - # so we don't need to do anything complicated like the other - # examples. - year = round(year / power_of_10) * power_of_10 - elif self.precision == self.PRECISION['millennium']: - # Similar situation with centuries - year_float = year / 1000 - if year_float < 0: - year = math.floor(year_float) - else: - year = math.ceil(year_float) - year *= 1000 - elif self.precision == self.PRECISION['century']: - # For century, -1301 is the same century as -1400 but not -1401. - # Similar for 1901 and 2000 vs 2001. - year_float = year / 100 - if year_float < 0: - year = math.floor(year_float) - else: - year = math.ceil(year_float) - year *= 100 - elif self.precision == self.PRECISION['decade']: - # For decade, -1340 is the same decade as -1349 but not -1350. - # Similar for 2010 and 2019 vs 2020 - year_float = year / 10 - year = math.trunc(year_float) - year *= 10 + for prec in 'millennium', 'century', 'decade': + if self.precision == self.PRECISION[prec]: + handler = getattr(self, '_normalize_' + prec) + year = handler(year) + break + else: + lower = self.PRECISION['1000000000'] + upper = self.PRECISION['10000'] + if lower <= self.precision <= upper: + year = self._normalize_power_of_ten(year, self.precision) + kwargs = { 'precision': self.precision, 'before': self.before, @@ -709,18 +868,14 @@ def normalize(self) -> WbTime: 'calendarmodel': self.calendarmodel, 'year': year } - if self.precision >= self.PRECISION['month']: - kwargs['month'] = self.month - if self.precision >= self.PRECISION['day']: - kwargs['day'] = self.day - if self.precision >= self.PRECISION['hour']: - # See T326693 - kwargs['timezone'] = self.timezone - kwargs['hour'] = self.hour - if self.precision >= self.PRECISION['minute']: - kwargs['minute'] = self.minute - if self.precision >= self.PRECISION['second']: - kwargs['second'] = self.second + + for prec in 'month', 'day', 'hour', 'minute', 'second': + if self.precision >= self.PRECISION[prec]: + kwargs[prec] = getattr(self, prec) + if prec == 'hour': + # Add timezone, see T326693 + kwargs['timezone'] = self.timezone + return type(self)(**kwargs) @remove_last_args(['normalize']) # since 8.2.0 @@ -796,9 +951,15 @@ def fromWikibase(cls, data: dict[str, Any], :param site: The Wikibase site. If not provided, retrieves the data repository from the default site from user-config.py. """ - return cls.fromTimestr(data['time'], data['precision'], - data['before'], data['after'], - data['timezone'], data['calendarmodel'], site) + return cls.fromTimestr( + data['time'], + precision=data['precision'], + before=data['before'], + after=data['after'], + timezone=data['timezone'], + calendarmodel=data['calendarmodel'], + site=site + ) class WbQuantity(WbRepresentation): diff --git a/pywikibot/backports.py b/pywikibot/backports.py index a3b64f773d..ca4d3c6991 100644 --- a/pywikibot/backports.py +++ b/pywikibot/backports.py @@ -16,12 +16,12 @@ import re import sys -from typing import Any +from typing import TYPE_CHECKING, Any # Placed here to omit circular import in tools -PYTHON_VERSION = sys.version_info[:3] -SPHINX_RUNNING = 'sphinx' in sys.modules +PYTHON_VERSION: tuple[int, int, int] = sys.version_info[:3] +SPHINX_RUNNING: bool = 'sphinx' in sys.modules # functools.cache if PYTHON_VERSION >= (3, 9): @@ -53,10 +53,12 @@ Generator, Iterable, Iterator, + List, Mapping, Match, Pattern, Sequence, + Set, ) else: from collections import Counter @@ -70,6 +72,8 @@ ) from re import Match, Pattern Dict = dict # type: ignore[misc] + List = list # type: ignore[misc] + Set = set # type: ignore[misc] if PYTHON_VERSION < (3, 9, 2): @@ -135,8 +139,9 @@ def pairwise(iterable): a, b = tee(iterable) next(b, None) return zip(a, b) -else: - from itertools import pairwise # type: ignore[no-redef] + +elif not TYPE_CHECKING: + from itertools import pairwise from types import NoneType @@ -200,13 +205,18 @@ def batched(iterable, n: int, *, raise ValueError(msg) yield tuple(group) else: # PYTHON_VERSION == (3, 12) - from itertools import batched as _batched + if TYPE_CHECKING: + _batched: Callable[[Iterable, int], Iterable] + else: + from itertools import batched as _batched + for group in _batched(iterable, n): if strict and len(group) < n: raise ValueError(msg) yield group -else: - from itertools import batched # type: ignore[no-redef] + +elif not TYPE_CHECKING: + from itertools import batched # gh-115942, gh-134323 @@ -289,4 +299,4 @@ def locked(self): return status == 'locked' else: - from threading import RLock + from threading import RLock # type: ignore[assignment] diff --git a/pywikibot/bot.py b/pywikibot/bot.py index 3ed061165f..b19730760d 100644 --- a/pywikibot/bot.py +++ b/pywikibot/bot.py @@ -190,9 +190,11 @@ class is mainly used for bots which work with Wikibase or together if TYPE_CHECKING: + from typing import Union + from pywikibot.site import BaseSite - AnswerType = Iterable[tuple[str, str] | Option] | Option + AnswerType = Union[Iterable[Union[tuple[str, str], Option]], Option] _GLOBAL_HELP = """ GLOBAL OPTIONS @@ -278,6 +280,9 @@ class is mainly used for bots which work with Wikibase or together """Holds a user interface object defined in :mod:`pywikibot.userinterfaces` subpackage.""" +#: global args used by tests via pwb wrapper +global_args: list[str] | None = None + def set_interface(module_name: str) -> None: """Configures any bots to use the given interface module. @@ -749,6 +754,9 @@ def handle_args(args: Iterable[str] | None = None, # not the one in pywikibot.bot. args = pywikibot.argvu[1:] + global global_args + global_args = args + # get the name of the module calling this function. This is # required because the -help option loads the module's docstring and # because the module name will be used for the filename of the log. @@ -1834,21 +1842,26 @@ def treat(self, page: pywikibot.page.BasePage) -> None: self.current_page = page self.treat_page() - def put_current(self, new_text: str, - ignore_save_related_errors: bool | None = None, - ignore_server_errors: bool | None = None, - **kwargs: Any) -> bool: + def put_current( + self, + new_text: str, + ignore_save_related_errors: bool | None = None, + ignore_server_errors: bool | None = None, + **kwargs: Any + ) -> bool: """Call :py:obj:`Bot.userPut` but use the current page. It compares the new_text to the current page text. :param new_text: The new text - :param ignore_save_related_errors: Ignore save related errors and - automatically print a message. If None uses this instances default. - :param ignore_server_errors: Ignore server errors and automatically - print a message. If None uses this instances default. + :param ignore_save_related_errors: Ignore save related errors + and automatically print a message. If None uses this + instances default. + :param ignore_server_errors: Ignore server errors and + automatically print a message. If None uses this instances + default. :param kwargs: Additional parameters directly given to - :py:obj:`Bot.userPut`. + :meth:`BaseBot.userPut`. :return: whether the page was saved successfully """ if ignore_save_related_errors is None: @@ -1856,10 +1869,13 @@ def put_current(self, new_text: str, if ignore_server_errors is None: ignore_server_errors = self.ignore_server_errors return self.userPut( - self.current_page, self.current_page.text, new_text, + self.current_page, + self.current_page.text, + new_text, ignore_save_related_errors=ignore_save_related_errors, ignore_server_errors=ignore_server_errors, - **kwargs) + **kwargs + ) class AutomaticTWSummaryBot(CurrentPageBot): @@ -1896,8 +1912,14 @@ def summary_parameters(self) -> None: """Delete the i18n dictionary.""" del self._summary_parameters - def put_current(self, *args: Any, **kwargs: Any) -> None: - """Defining a summary if not already defined and then call original.""" + def put_current(self, *args: Any, **kwargs: Any) -> bool: + """Defining a summary if not already defined and then call original. + + For parameters see :meth:`CurrentPageBot.put_current` + + .. versionchanged:: 10.6 + return whether the page was saved successfully + """ if not kwargs.get('summary'): if self.summary_key is None: raise ValueError('The summary_key must be set.') @@ -1906,7 +1928,8 @@ def put_current(self, *args: Any, **kwargs: Any) -> None: self.summary_parameters) _log(f'Use automatic summary message "{summary}"') kwargs['summary'] = summary - super().put_current(*args, **kwargs) + + return super().put_current(*args, **kwargs) class ExistingPageBot(CurrentPageBot): diff --git a/pywikibot/bot_choice.py b/pywikibot/bot_choice.py index f4ced790e8..14bfb4fc22 100644 --- a/pywikibot/bot_choice.py +++ b/pywikibot/bot_choice.py @@ -1,6 +1,6 @@ """Options and Choices for :py:meth:`pywikibot.input_choice`.""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -9,7 +9,7 @@ import re from abc import ABC, abstractmethod from textwrap import fill -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import pywikibot from pywikibot.backports import Iterable, Mapping, Sequence @@ -39,7 +39,7 @@ if TYPE_CHECKING: - from typing_extensions import Literal + from typing import Any, Literal from pywikibot.page import BaseLink, Link, Page @@ -603,7 +603,7 @@ class UnhandledAnswer(Exception): # noqa: N818 """The given answer didn't suffice.""" - def __int__(self, stop: bool = False) -> None: + def __init__(self, stop: bool = False) -> None: """Initializer.""" self.stop = stop diff --git a/pywikibot/comms/__init__.py b/pywikibot/comms/__init__.py index e76cb24e81..49a90272bc 100644 --- a/pywikibot/comms/__init__.py +++ b/pywikibot/comms/__init__.py @@ -1,6 +1,6 @@ """Communication layer.""" # -# (C) Pywikibot team, 2008-2022 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # diff --git a/pywikibot/comms/eventstreams.py b/pywikibot/comms/eventstreams.py index 3d800b0e99..6db46d938b 100644 --- a/pywikibot/comms/eventstreams.py +++ b/pywikibot/comms/eventstreams.py @@ -26,7 +26,8 @@ from requests.packages.urllib3.util.response import httplib from pywikibot import Site, Timestamp, config, debug, warning -from pywikibot.backports import NoneType +from pywikibot.backports import Dict, List, NoneType +from pywikibot.comms.http import user_agent from pywikibot.tools import cached, deprecated_args from pywikibot.tools.collections import GeneratorWrapper @@ -178,7 +179,7 @@ def __init__(self, **kwargs) -> None: if isinstance(EventSource, ModuleNotFoundError): raise ImportError(INSTALL_MSG) from EventSource - self.filter = {'all': [], 'any': [], 'none': []} + self.filter: Dict[str, List[Any]] = {'all': [], 'any': [], 'none': []} self._total: int | None = None self._canary = kwargs.pop('canary', False) @@ -207,6 +208,10 @@ def __init__(self, **kwargs) -> None: kwargs['reconnection_time'] = timedelta(milliseconds=retry) kwargs.setdefault('timeout', config.socket_timeout) + + kwargs.setdefault('headers', {}) + kwargs['headers'].setdefault('user-agent', user_agent(self._site)) + self.sse_kwargs = kwargs def __repr__(self) -> str: diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index f6d4bc07f4..281cf1693d 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -196,7 +196,7 @@ def user_agent_username(username=None): def user_agent(site: pywikibot.site.BaseSite | None = None, - format_string: str = '') -> str: + format_string: str | None = '') -> str: """Generate the user agent string for a given site and format. :param site: The site for which this user agent is intended. May be @@ -211,7 +211,7 @@ def user_agent(site: pywikibot.site.BaseSite | None = None, pywikibot.bot.calledModuleName())) values.update(dict.fromkeys(['family', 'code', 'lang', 'site'], '')) - script_comments = [] + script_comments: list[str] = [] if config.user_agent_description: script_comments.append(config.user_agent_description) @@ -539,7 +539,7 @@ def _try_decode(content: bytes, encoding: str | None) -> str | None: pywikibot.warning( f'Unknown or invalid encoding {encoding!r} for {response.url}') except UnicodeDecodeError as e: - pywikibot.warning(f'{e} found in {content}') + pywikibot.warning(f'{e} found in {content!r}') else: return encoding diff --git a/pywikibot/config.py b/pywikibot/config.py index 113e766b56..ced631c697 100644 --- a/pywikibot/config.py +++ b/pywikibot/config.py @@ -33,7 +33,7 @@ default. Editor detection functions were moved to :mod:`editor`. """ # -# (C) Pywikibot team, 2003-2024 +# (C) Pywikibot team, 2003-2025 # # Distributed under the terms of the MIT license. # @@ -146,7 +146,7 @@ class _ConfigurationDeprecationWarning(UserWarning): # User agent description # This is a free-form string that can be user to describe specific bot/tool, # provide contact information, etc. -user_agent_description = None +user_agent_description: str | None = None # Fake user agent. # Some external websites reject bot-like user agents. It is possible to use # fake user agents in requests to these websites. @@ -225,7 +225,7 @@ class _ConfigurationDeprecationWarning(UserWarning): # use them. In this case, the password file should contain a BotPassword object # in the following format: # (username, BotPassword(botname, botpassword)) -password_file = None +password_file: str | os.PathLike | None = None # edit summary to use if not supplied by bot script # WARNING: this should NEVER be used in practice, ALWAYS supply a more @@ -498,7 +498,7 @@ def register_families_folder(folder_path: str, # transliteration_target = console_encoding # After emitting the warning, this last option will be set. -transliteration_target = None +transliteration_target: str | None = None # The encoding in which textfiles are stored, which contain lists of page # titles. The most used is 'utf-8'; 'utf-8-sig' recognizes BOM. @@ -937,7 +937,8 @@ def shortpath(path: str) -> str: _filestatus = os.stat(_filename) _filemode = _filestatus[0] _fileuid = _filestatus[4] - if not OSWIN32 and _fileuid not in [os.getuid(), 0]: + if not OSWIN32 \ + and _fileuid not in [os.getuid(), 0]: # type: ignore[attr-defined] warning(f'Skipped {_filename!r}: owned by someone else.') elif OSWIN32 or _filemode & 0o02 == 0: with open(_filename, 'rb') as f: diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py index b78b8feabc..725a9240a7 100644 --- a/pywikibot/cosmetic_changes.py +++ b/pywikibot/cosmetic_changes.py @@ -59,7 +59,7 @@ import re from contextlib import suppress from enum import IntEnum -from typing import Any +from typing import Any, cast from urllib.parse import urlparse, urlunparse import pywikibot @@ -68,7 +68,7 @@ from pywikibot.site import Namespace from pywikibot.tools import first_lower, first_upper from pywikibot.tools.chars import url2string -from pywikibot.userinterfaces.transliteration import NON_LATIN_DIGITS +from pywikibot.userinterfaces.transliteration import NON_ASCII_DIGITS try: @@ -369,10 +369,8 @@ def standardizePageFooter(self, text: str) -> str: if not self.talkpage: subpage = False if self.template: - loc = None - with suppress(TypeError): - _tmpl, loc = i18n.translate(self.site.code, moved_links) - if loc is not None and loc in self.title: + loc = i18n.translate(self.site.code, moved_links) + if loc is not None and loc[1] in self.title: subpage = True # get interwiki @@ -504,25 +502,34 @@ def init_cache() -> None: cache[False] = True # signal there is nothing to replace def replace_magicword(match: Match[str]) -> str: + """Replace magic words in file link params, leaving captions.""" + linktext = match.group() if cache.get(False): - return match.group() - split = match.group().split('|') - if len(split) == 1: - return match.group() + return linktext + + params = match.group(2) # includes pre-leading | + if not params: + return linktext if not cache: init_cache() - # push ']]' out and re-add below - split[-1] = split[-1][:-2] - return '{}|{}]]'.format( - split[0], '|'.join(cache.get(x.strip(), x) for x in split[1:])) + # do the magic job + marker = textlib.findmarker(params) + params = textlib.replaceExcept( + params, r'\|', marker, ['link', 'template']) + parts = params.split(marker) + replaced = '|'.join(cache.get(p.strip(), p) for p in parts) + + # extract namespace + m = cast(Match[str], + re.match(r'\[\[\s*(?P[^:]+)\s*:', linktext)) + + return f'[[{m["namespace"]}:{match["filename"]}{replaced}]]' cache: dict[bool | str, Any] = {} exceptions = ['comment', 'nowiki', 'pre', 'syntaxhighlight'] - regex = re.compile( - textlib.FILE_LINK_REGEX % '|'.join(self.site.namespaces[6]), - flags=re.VERBOSE) + regex = textlib.get_regexes('file', self.site)[0] return textlib.replaceExcept( text, regex, replace_magicword, exceptions) @@ -735,7 +742,7 @@ def removeEmptySections(self, text: str) -> str: return text # iterate stripped sections and create a new page body - new_body: list[textlib.Section] = [] + new_body: textlib.SectionList = textlib.SectionList() for i, strip_section in enumerate(strip_sections): current_dep = sections[i].level try: @@ -1031,10 +1038,10 @@ def fixArabicLetters(self, text: str) -> str: 'syntaxhighlight', ] - digits = NON_LATIN_DIGITS['fa'] + digits = NON_ASCII_DIGITS['fa'] faChrs = 'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits - # not to let bot edits in latin content + # not to let bot edits in ascii numerals content exceptions.append(re.compile(f'[^{faChrs}] *?"*? *?, *?[^{faChrs}]')) text = textlib.replaceExcept(text, ',', '،', exceptions, site=self.site) diff --git a/pywikibot/daemonize.py b/pywikibot/daemonize.py index 9b8df1de9f..a00c131416 100644 --- a/pywikibot/daemonize.py +++ b/pywikibot/daemonize.py @@ -1,21 +1,87 @@ -"""Module to daemonize the current process on Unix.""" +"""Module to daemonize the current process on POSIX systems. + +This module provides a function :func:`daemonize` to turn the current +Python process into a background daemon process on POSIX-compatible +operating systems (Linux, macOS, FreeBSD) but not on not WASI Android or +iOS. It uses the standard double-fork technique to detach the process +from the controlling terminal and optionally closes or redirects +standard streams. + +Double-fork diagram:: + + Original process (parent) + ├── fork() → creates first child + │ └─ Parent exits via os._exit() → returns control to terminal + │ + └── First child + ├── os.setsid() → becomes session leader (detaches from terminal) + ├── fork() → creates second child (grandchild) + │ └─ First child exits → ensures grandchild is NOT a session leader + │ + └── Second child (Daemon) + ├── is_daemon = True + ├── Optionally close/redirect standard streams + ├── Optionally change working directory + └── # Daemon continues here + while True: + do_background_work() + +The "while True" loop represents the main work of the daemon: + +- It runs indefinitely in the background +- Performs tasks such as monitoring files, processing data, or logging +- Everything after :func:`daemonize` runs only in the daemon process + +Example usage: + + .. code-block:: Python + + import time + from pywikibot.daemonize import daemonize + + def background_task(): + while True: + print("Daemon is working...") + time.sleep(5) + + daemonize() + + # This code only runs in the daemon process + background_task() +""" # -# (C) Pywikibot team, 2007-2022 +# (C) Pywikibot team, 2007-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations import os +import platform import stat import sys +from contextlib import suppress +from enum import IntEnum from pathlib import Path +from pywikibot.tools import deprecated_signature + + +class StandardFD(IntEnum): + + """File descriptors for standard input, output and error.""" + + STDIN = 0 + STDOUT = 1 + STDERR = 2 + is_daemon = False -def daemonize(close_fd: bool = True, +@deprecated_signature(since='10.6.0') +def daemonize(*, + close_fd: bool = True, chdir: bool = True, redirect_std: str | None = None) -> None: """Daemonize the current process. @@ -23,11 +89,27 @@ def daemonize(close_fd: bool = True, Only works on POSIX compatible operating systems. The process will fork to the background and return control to terminal. + .. versionchanged:: 10.6 + raises NotImplementedError instead of AttributeError if daemonize + is not available for the given platform. Parameters must be given + as keyword-only arguments. + + .. caution:: + Do not use it in multithreaded scripts or in a subinterpreter. + :param close_fd: Close the standard streams and replace them by /dev/null :param chdir: Change the current working directory to / :param redirect_std: Filename to redirect stdout and stdin to + :raises RuntimeError: Must not be run in a subinterpreter + :raises NotImplementedError: Daemon mode not supported on given + platform """ + # platform check for MyPy + if not hasattr(os, 'fork') or sys.platform == 'win32': + msg = f'Daemon mode not supported on {platform.system()}' + raise NotImplementedError(msg) + # Fork away if not os.fork(): # Become session leader @@ -35,15 +117,19 @@ def daemonize(close_fd: bool = True, # Fork again to prevent the process from acquiring a # controlling terminal pid = os.fork() + if not pid: global is_daemon is_daemon = True + # Optionally close and redirect standard streams if close_fd: - os.close(0) - os.close(1) - os.close(2) + for fd in StandardFD: + with suppress(OSError): + os.close(fd) + os.open('/dev/null', os.O_RDWR) + if redirect_std: # R/W mode without execute flags mode = (stat.S_IRUSR | stat.S_IWUSR @@ -53,15 +139,18 @@ def daemonize(close_fd: bool = True, os.O_WRONLY | os.O_APPEND | os.O_CREAT, mode) else: - os.dup2(0, 1) - os.dup2(1, 2) + os.dup2(StandardFD.STDIN, StandardFD.STDOUT) + os.dup2(StandardFD.STDOUT, StandardFD.STDERR) + + # Optionally change working directory if chdir: os.chdir('/') - return + + return # Daemon continues here # Write out the pid path = Path(Path(sys.argv[0]).name).with_suffix('.pid') - path.write_text(str(pid), encoding='uft-8') + path.write_text(str(pid), encoding='utf-8') # Exit to return control to the terminal # os._exit to prevent the cleanup to run diff --git a/pywikibot/data/api/__init__.py b/pywikibot/data/api/__init__.py index 0af2e3ede1..c5af36cb00 100644 --- a/pywikibot/data/api/__init__.py +++ b/pywikibot/data/api/__init__.py @@ -1,6 +1,6 @@ """Interface to MediaWiki's api.php.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -55,7 +55,7 @@ def _invalidate_superior_cookies(family) -> None: """ if isinstance(family, SubdomainFamily): for cookie in http.cookie_jar: - if family.domain == cookie.domain: + if family.domain == cookie.domain: # type: ignore[attr-defined] http.cookie_jar.clear(cookie.domain, cookie.path, cookie.name) @@ -71,9 +71,10 @@ class CTEBinaryBytesGenerator(BytesGenerator): def _handle_text(self, msg) -> None: if msg['content-transfer-encoding'] == 'binary': - self._fp.write(msg.get_payload(decode=True)) + self._fp.write( # type: ignore[attr-defined] + msg.get_payload(decode=True)) else: - super()._handle_text(msg) + super()._handle_text(msg) # type: ignore[misc] _writeBody = _handle_text # noqa: N815 diff --git a/pywikibot/data/api/_generators.py b/pywikibot/data/api/_generators.py index 09ab0e2ed3..26795cf561 100644 --- a/pywikibot/data/api/_generators.py +++ b/pywikibot/data/api/_generators.py @@ -43,12 +43,67 @@ class APIGeneratorBase(ABC): - """A wrapper class to handle the usage of the ``parameters`` parameter. + """Base class for all API and query request generators. + + Handles request cleaning and filtering. Each instance can have an + optional filter function applied to items before yielding. Set this + via the :attr:`filter_func` property, which should be a callable + accepting a single item and returning True to yield it, alse to skip + it. If :attr:`filter_func` is None, no filtering is applied. + + Subclasses can override :meth:`filter_item` for more complex + filtering logic. .. versionchanged:: 7.6 - renamed from _RequestWrapper + Renamed from _RequestWrapper. + .. versionchanged:: 10.4 + Introduced :attr:`filter_func` and :meth:`filter_item` for + instance-level item filtering. """ + _filter_func: Callable[[Any], bool] | None = None + + @property + def filter_func(self) -> Callable[[Any], bool] | None: + """Get the filter function for this generator instance. + + Returns the instance-specific filter if set, otherwise the + class-level default (None by default). + + .. versionadded:: 10.4 + + :return: Callable that accepts an item and returns True to + yield, False to skip; or None to disable filtering + """ + return getattr(self, '_filter_func', type(self)._filter_func) + + @filter_func.setter + def filter_func(self, func: Callable[[Any], bool] | None): + """Set a filter function to apply to items before yielding. + + .. versionadded:: 10.4 + + :param func: Callable that accepts an item and returns True to + yield, False to skip; or None to disable filtering + """ + self._filter_func = func + + def filter_item(self, item: Any) -> bool: + """Determine if a given item should be yielded. + + By default, applies :attr:`filter_func` if set. Returns True if + no filter is set. + + .. versionadded:: 10.4 + + :param item: The item to check + :return: True if the item should be yielded, False otherwise + """ + if self.filter_func is not None: + return self.filter_func(item) + + return True + def _clean_kwargs(self, kwargs, **mw_api_args): """Clean kwargs, define site and request class.""" if 'site' not in kwargs: @@ -162,13 +217,20 @@ def generator(self): """Submit request and iterate the response. Continues response as needed until limit (if defined) is reached. + Applies :meth:`filter_item()` to + each item before yielding. .. versionchanged:: 7.6 - changed from iterator method to generator property + Changed from iterator method to generator property + .. versionchanged:: 10.4 + Applies `filter_item` for instance-level filtering. + + :yield: Items from the MediaWiki API, filtered by `filter_item()` """ offset = self.starting_offset n = 0 while True: + # Set the continue parameter for the request self.request[self.continue_name] = offset pywikibot.debug(f'{type(self).__name__}: Request: {self.request}') data = self.request.submit() @@ -178,14 +240,17 @@ def generator(self): f'{type(self).__name__}: Retrieved {n_items} items') if n_items > 0: for item in data[self.data_name]: - yield item - n += 1 - if self.limit is not None and n >= self.limit: - pywikibot.debug( - f'{type(self).__name__}: Stopped iterating due to' - ' exceeding item limit.' - ) - return + # Apply the instance filter function before yielding + if self.filter_item(item): + yield item + n += 1 + # Stop iterating if the limit is reached + if self.limit is not None and n >= self.limit: + pywikibot.debug( + f'{type(self).__name__}: Stopped iterating due' + ' to exceeding item limit.' + ) + return offset += n_items else: pywikibot.debug(f'{type(self).__name__}: Stopped iterating' @@ -314,7 +379,7 @@ def __init__(self, **kwargs) -> None: self._add_slots() @property - @deprecated(since='8.4.0') + @deprecated('modules', since='8.4.0') def continuekey(self) -> list[str]: """Return deprecated continuekey which is self.modules.""" return self.modules @@ -570,17 +635,36 @@ def _get_resultdata(self): return resultdata def _extract_results(self, resultdata): - """Extract results from resultdata.""" + """Extract results from resultdata, applying `filter_item()`. + + :attr:`generator` helper method which yields each result that + passes :meth:`filter_item() ` and + respects namespaces and the generator's limit. + + .. versionchanged:: 10.4 + Applies `filter_item()` for instance-level filtering. + + :param resultdata: List or iterable of raw API items + :yield: Processed items that pass the filter + :raises RuntimeError: if self.limit is reached + + :meta public: + """ for item in resultdata: result = self.result(item) if self._namespaces and not self._check_result_namespace(result): continue + # Apply the instance filter before yielding + if not self.filter_item(result): + continue + yield result modules_item_intersection = set(self.modules) & set(item) if isinstance(item, dict) and modules_item_intersection: - # if we need to count elements contained in items in + # Count elements contained in sub-items. + # If we need to count elements contained in items in # self.data["query"]["pages"], we want to count # item[self.modules] (e.g. 'revisions') and not # self.resultkey (i.e. 'pages') @@ -589,7 +673,8 @@ def _extract_results(self, resultdata): # otherwise we proceed as usual else: self._count += 1 - # note: self.limit could be -1 + + # Stop if limit is reached; note: self.limit could be -1 if self.limit and 0 < self.limit <= self._count: raise RuntimeError( 'QueryGenerator._extract_results reached the limit') @@ -599,9 +684,15 @@ def generator(self): """Submit request and iterate the response based on self.resultkey. Continues response as needed until limit (if any) is reached. + Each item is already filtered by `_extract_results()`. .. versionchanged:: 7.6 - changed from iterator method to generator property + Changed from iterator method to generator property + .. versionchanged:: 10.4 + Items are filtered via :meth:`filter_item() + ` inside :meth:`_extract_results`. + + :yield: Items from the API, already filtered """ previous_result_had_data = True prev_limit = new_limit = None @@ -616,7 +707,7 @@ def generator(self): if not self.data or not isinstance(self.data, dict): pywikibot.debug(f'{type(self).__name__}: stopped iteration' - ' because no dict retrieved from api.') + ' because no dict retrieved from API.') break if 'query' in self.data and self.resultkey in self.data['query']: @@ -638,13 +729,13 @@ def generator(self): else: if 'query' not in self.data: pywikibot.log(f"{type(self).__name__}: 'query' not found" - ' in api response.') + ' in API response.') pywikibot.log(str(self.data)) # if (query-)continue is present, self.resultkey might not have # been fetched yet if self.continue_name not in self.data: - break # No results. + break # No results # self.resultkey not in data in last request.submit() # only "(query-)continue" was retrieved. @@ -767,13 +858,18 @@ class PropertyGenerator(QueryGenerator): decide what to do with the contents of the dict. There will be one dict for each page queried via a titles= or ids= parameter (which must be supplied when instantiating this class). + + .. versionchanged:: 10.4 + Supports instance-level filtering via :attr:`filter_func + ` / :meth:`filter_item() + None: """Initializer. - Required and optional parameters are as for ``Request``, except that - action=query is assumed and prop is required. + Required and optional parameters are as for ``Request``, except + that action=query is assumed and prop is required. :param prop: the "prop=" type from api.php """ @@ -781,6 +877,7 @@ def __init__(self, prop: str, **kwargs) -> None: super().__init__(**kwargs) self._props = frozenset(prop.split('|')) self.resultkey = 'pages' + self._previous_dicts: dict[str, dict] = {} @property def props(self): @@ -789,17 +886,28 @@ def props(self): @property def generator(self): - """Yield results. + """Yield results from the API, including previously retrieved dicts. .. versionchanged:: 7.6 - changed from iterator method to generator property + Changed from iterator method to generator property. + + .. versionchanged:: 10.4 + Items are filtered via :meth:`filter_item() + None: """Set the site and valid names. @@ -64,7 +70,6 @@ def _set_site(self, site, module: str, param: str, *, thrown. :param site: The associated site - :type site: pywikibot.site.APISite :param module: The module name which is used by paraminfo. :param param: The parameter name inside the module. That parameter must have a 'type' entry. @@ -80,6 +85,7 @@ def _set_site(self, site, module: str, param: str, *, self._valid_disable = set() if site is None: return + for type_value in site._paraminfo.parameter(module, param)['type']: if type_value[0] == '!': self._valid_disable.add(type_value[1:]) @@ -96,7 +102,7 @@ def _set_site(self, site, module: str, param: str, *, '"{}"'.format('", "'.join(invalid_names))) self._site_set = True - def from_dict(self, dictionary) -> None: + def from_dict(self, dictionary: dict[str, bool | None]) -> None: """Load options from the dict. The options are not cleared before. If changes have been made @@ -107,7 +113,6 @@ def from_dict(self, dictionary) -> None: the value False, True or None. The names must be valid depending on whether they enable or disable the option. All names with the value None can be in either of the list. - :type dictionary: dict (keys are strings, values are bool/None) """ enabled = set() disabled = set() diff --git a/pywikibot/data/api/_paraminfo.py b/pywikibot/data/api/_paraminfo.py index 49a8c2248e..beaf551842 100644 --- a/pywikibot/data/api/_paraminfo.py +++ b/pywikibot/data/api/_paraminfo.py @@ -1,6 +1,6 @@ """Object representing API parameter information.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -11,8 +11,13 @@ import pywikibot from pywikibot import config -from pywikibot.backports import Iterable, batched -from pywikibot.tools import classproperty, deprecated, remove_last_args +from pywikibot.backports import Dict, Iterable, Set, batched +from pywikibot.tools import ( + classproperty, + deprecated, + deprecated_args, + remove_last_args, +) __all__ = ['ParamInfo'] @@ -31,6 +36,9 @@ class ParamInfo(Sized, Container): init_modules = frozenset(['main', 'paraminfo']) param_modules = ('list', 'meta', 'prop') + _action_modules: frozenset[str] + _modules: Dict[str, Set[str] | Dict[str, str]] + @remove_last_args(['modules_only_mode']) def __init__(self, site, @@ -45,16 +53,14 @@ def __init__(self, self.site = site # Keys are module names, values are the raw responses from the server. - self._paraminfo = {} + self._paraminfo: dict[str, Any] = {} # Cached data. - self._prefixes = {} - self._prefix_map = {} - self._with_limits = None + self._prefix_map: dict[str, str] = {} self._action_modules = frozenset() # top level modules self._modules = {} # filled in _init() (and enlarged in fetch) - self._limit = None + self._limit: int | None = None self._preloaded_modules = self.init_modules if preloaded_modules: @@ -69,7 +75,7 @@ def _add_submodules(self, name: str, if self._action_modules: assert modules == self._action_modules else: - self._action_modules = modules + self._action_modules = frozenset(modules) elif name in self._modules: # update required to updates from dict and set self._modules[name].update(modules) @@ -333,9 +339,10 @@ def __len__(self) -> int: """Return number of cached modules.""" return len(self._paraminfo) + @deprecated_args(module='module_name') # since 10.5.0 def parameter( self, - module: str, + module_name: str, param_name: str ) -> dict[str, Any] | None: """Get details about one modules parameter. @@ -347,9 +354,9 @@ def parameter( :return: metadata that describes how the parameter may be used """ try: - module = self[module] + module = self[module_name] except KeyError: - raise ValueError(f"paraminfo for '{module}' not loaded") + raise ValueError(f"paraminfo for '{module_name}' not loaded") try: params = module['parameters'] diff --git a/pywikibot/data/api/_requests.py b/pywikibot/data/api/_requests.py index 345b390a4f..5fd6e11bf4 100644 --- a/pywikibot/data/api/_requests.py +++ b/pywikibot/data/api/_requests.py @@ -44,6 +44,13 @@ __all__ = ('CachedRequest', 'Request', 'encode_url') +TEST_RUNNING = os.environ.get('PYWIKIBOT_TEST_RUNNING', '0') == '1' + +if TEST_RUNNING: + import unittest + + # lazy load unittest_print to prevent circular imports + # Actions that imply database updates on the server, used for various # things like throttling or skipping actions when we're in simulation # mode @@ -525,8 +532,16 @@ def _simulate(self, action): # for more realistic simulation if config.simulate is not True: pywikibot.sleep(float(config.simulate)) + if action == 'rollback': + result = { + 'title': self._params['title'][0].title(), + 'summary': self._params.get('summary', + ['Rollback simulation'])[0], + } + else: + result = {'result': 'Success', 'nochange': ''} return { - action: {'result': 'Success', 'nochange': ''}, + action: result, # wikibase results 'entity': {'lastrevid': -1, 'id': '-1'}, @@ -711,8 +726,15 @@ def _http_request(self, use_get: bool, uri: str, data, headers, # TODO: what other exceptions can occur here? except Exception: # for any other error on the http request, wait and retry - pywikibot.error(traceback.format_exc()) - pywikibot.log(f'{uri}, {paramstring}') + tb = traceback.format_exc() + msg = f'{uri}, {paramstring}' + if TEST_RUNNING: + from tests import unittest_print + unittest_print(tb) + unittest_print(msg) + else: + pywikibot.error(tb) + pywikibot.log(msg) else: return response, use_get @@ -753,6 +775,13 @@ def _json_loads(self, response) -> dict | None: The text message is: {text} """ + if TEST_RUNNING: + if response.status_code == 403 \ + and 'Requests from your IP have been blocked' in text: + raise unittest.SkipTest(msg) # T399367 + + from tests import unittest_print + unittest_print(msg) # Do not retry for AutoFamily but raise a SiteDefinitionError # Note: family.AutoFamily is a function to create that class @@ -814,6 +843,10 @@ def _handle_warnings(self, result: dict[str, Any]) -> bool: .. versionchanged:: 7.2 Return True to retry the current request and False to resume. + .. versionchanged:: 10.5 + Handle warnings of formatversion 2. + + .. seealso:: :api:`Errors and warnings` :meta public: """ @@ -824,7 +857,9 @@ def _handle_warnings(self, result: dict[str, Any]) -> bool: for mod, warning in result['warnings'].items(): if mod == 'info': continue - if '*' in warning: + if 'warnings' in warning: # formatversion 2 + text = warning['warnings'] + elif '*' in warning: # formatversion 1 text = warning['*'] elif 'html' in warning: # bug T51978 @@ -993,8 +1028,6 @@ def submit(self) -> dict: :return: a dict containing data retrieved from api.php """ - test_running = os.environ.get('PYWIKIBOT_TEST_RUNNING', '0') == '1' - self._add_defaults() use_get = self._use_get() retries = 0 @@ -1039,11 +1072,15 @@ def submit(self) -> dict: assert key not in error error[key] = result[key] - if '*' in error: - # help text returned - error['help'] = error.pop('*') + # help text returned + # see also: https://www.mediawiki.org/wiki/API:Errors_and_warnings + if 'docref' in error: + error['help'] = error.pop('docref') # formatversion 2 + elif '*' in error: + error['help'] = error.pop('*') # formatversion 1 + code = error.setdefault('code', 'Unknown') - info = error.setdefault('info', None) + info = error.setdefault('info', '') if (code == self.last_error['code'] and info == self.last_error['info']): @@ -1139,7 +1176,7 @@ def submit(self) -> dict: param_repr = str(self._params) msg = (f'API Error: query=\n{pprint.pformat(param_repr)}\n' f' response=\n{result}') - if test_running: + if TEST_RUNNING: from tests import unittest_print unittest_print(msg) else: @@ -1150,8 +1187,7 @@ def submit(self) -> dict: raise RuntimeError(result) msg = 'Maximum retries attempted due to maxlag without success.' - if test_running: - import unittest + if TEST_RUNNING: raise unittest.SkipTest(msg) raise MaxlagTimeoutError(msg) diff --git a/pywikibot/data/citoid.py b/pywikibot/data/citoid.py new file mode 100644 index 0000000000..96a432691b --- /dev/null +++ b/pywikibot/data/citoid.py @@ -0,0 +1,64 @@ +"""Citoid Query interface. + +.. versionadded:: 10.6 +""" +# +# (C) Pywikibot team, 2025 +# +# Distributed under the terms of the MIT license. +# +from __future__ import annotations + +import urllib.parse +from dataclasses import dataclass +from typing import Any + +import pywikibot +from pywikibot.comms import http +from pywikibot.exceptions import ApiNotAvailableError, Error +from pywikibot.site import BaseSite + + +VALID_FORMAT = [ + 'mediawiki', 'wikibase', 'zotero', 'bibtex', 'mediawiki-basefields' +] + + +@dataclass(eq=False) +class CitoidClient: + + """Citoid client class. + + This class allows to call the Citoid API used in production. + """ + + site: BaseSite + + def get_citation( + self, + response_format: str, + ref_url: str + ) -> dict[str, Any]: + """Get a citation from the citoid service. + + :param response_format: Return format, e.g. 'bibtex', 'wikibase', etc. + :param ref_url: The URL to get the citation for. + :return: A dictionary with the citation data. + """ + if response_format not in VALID_FORMAT: + raise ValueError(f'Invalid format {response_format}, ' + f'must be one of {VALID_FORMAT}') + if (not hasattr(self.site.family, 'citoid_endpoint') + or not self.site.family.citoid_endpoint): + raise ApiNotAvailableError( + f'Citoid endpoint not configured for {self.site.family.name}') + base_url = self.site.family.citoid_endpoint + ref_url = urllib.parse.quote(ref_url, safe='') + api_url = urllib.parse.urljoin(base_url, + f'{response_format}/{ref_url}') + try: + json = http.request(self.site, api_url).json() + return json + except Error as e: + pywikibot.log(f'Caught pywikibot error {e}') + raise diff --git a/pywikibot/data/memento.py b/pywikibot/data/memento.py index 4cb6cb042b..83784cc483 100644 --- a/pywikibot/data/memento.py +++ b/pywikibot/data/memento.py @@ -1,6 +1,8 @@ """Fix ups for memento-client package version 0.6.1. .. versionadded:: 7.4 +.. versionchanged:: 10.7 + Set default timegate to :attr:`DEFAULT_TIMEGATE_BASE_URI` .. seealso:: https://github.com/mementoweb/py-memento-client#readme """ # @@ -32,6 +34,10 @@ ) +#: Default timegate; overrides the origin library setting. +DEFAULT_TIMEGATE_BASE_URI: str = 'https://web.archive.org/web/' + + class MementoClient(OldMementoClient): """A Memento Client. @@ -41,6 +47,8 @@ class MementoClient(OldMementoClient): .. versionchanged:: 7.4 `timeout` is used in several methods. + .. versionchanged:: 10.7 + Set default timegate to :attr`DEFAULT_TIMEGATE_BASE_URI` Basic usage: @@ -50,7 +58,7 @@ class MementoClient(OldMementoClient): >>> mi['original_uri'] 'http://www.bbc.com/' >>> mi['timegate_uri'] - 'http://timetravel.mementoweb.org/timegate/http://www.bbc.com/' + 'https://web.archive.org/web/http://www.bbc.com/' >>> sorted(mi['mementos']) ['closest', 'first', 'last', 'next', 'prev'] >>> from pprint import pprint @@ -67,32 +75,38 @@ class MementoClient(OldMementoClient): 'prev': {'datetime': datetime.datetime(2009, 10, 15, 19, 7, 5), 'uri': ['http://wayback.nli.org.il:8080/20091015190705/http://www.bbc.com/']}} - The output conforms to the Memento API format explained here: - http://timetravel.mementoweb.org/guide/api/#memento-json + The output conforms to the Memento API format but its description at + http://timetravel.mementoweb.org/guide/api/#memento-json is no + longer available .. note:: The mementos result is not deterministic. It may be different for the same parameters. - By default, MementoClient uses the Memento Aggregator: - http://mementoweb.org/depot/ - It is also possible to use different TimeGate, simply initialize - with a preferred timegate base uri. Toggle check_native_timegate to - see if the original uri has its own timegate. The native timegate, - if found will be used instead of the timegate_uri preferred. If no - native timegate is found, the preferred timegate_uri will be used. + with a preferred timegate base uri. Toggle *check_native_timegate* + to see if the original uri has its own timegate. The native + timegate, if found will be used instead of the *timegate_uri* + preferred. If no native timegate is found, the preferred + *timegate_uri* will be used. :param str timegate_uri: A valid HTTP base uri for a timegate. - Must start with http(s):// and end with a /. + Must start with http(s):// and end with a /. Default is + :attr:`DEFAULT_TIMEGATE_BASE_URI` + :param bool check_native_timegate: If True, the client will first + check whether the original URI has a native TimeGate. If found, + the native TimeGate is used instead of the preferred + *timegate_uri*. If False, the preferred *timegate_uri* is always + used. Default is True. :param int max_redirects: the maximum number of redirects allowed - for all HTTP requests to be made. + for all HTTP requests to be made. Default is 30. + :param requests.Session|None session: a Session object :return: A :class:`MementoClient` obj. """ # noqa: E501, W505 def __init__(self, *args, **kwargs) -> None: """Initializer.""" - # To prevent documentation inclusion from inherited class - # because it is malformed. + if 'timegate_uri' not in kwargs and not args: + kwargs['timegate_uri'] = DEFAULT_TIMEGATE_BASE_URI super().__init__(*args, **kwargs) def get_memento_info(self, request_uri: str, @@ -326,7 +340,7 @@ def get_closest_memento_url(url: str, datetime is used if none is provided. :param timegate_uri: A valid HTTP base uri for a timegate. Must start with http(s):// and end with a /. Default value is - http://timetravel.mementoweb.org/timegate/. + :attr:`DEFAULT_TIMEGATE_BASE_URI`. :param timeout: The timeout value for the HTTP connection. If None, a default value is used in :meth:`MementoClient.request_head`. """ diff --git a/pywikibot/data/mysql.py b/pywikibot/data/mysql.py index 6193a4dc39..a171b3052a 100644 --- a/pywikibot/data/mysql.py +++ b/pywikibot/data/mysql.py @@ -1,6 +1,6 @@ """Miscellaneous helper functions for mysql queries.""" # -# (C) Pywikibot team, 2016-2022 +# (C) Pywikibot team, 2016-2025 # # Distributed under the terms of the MIT license. # @@ -44,7 +44,7 @@ def mysql_query(query: str, params=None, """ # These are specified in config.py or your user config file if verbose is None: - verbose = config.verbose_output + verbose = config.verbose_output > 0 if config.db_connect_file is None: credentials = {'user': config.db_username, diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py index 4713888ca3..ca5042d819 100644 --- a/pywikibot/data/sparql.py +++ b/pywikibot/data/sparql.py @@ -1,19 +1,20 @@ """SPARQL Query interface.""" # -# (C) Pywikibot team, 2016-2024 +# (C) Pywikibot team, 2016-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations from textwrap import fill +from typing import Any from urllib.parse import quote from requests import JSONDecodeError from requests.exceptions import Timeout from pywikibot import Site -from pywikibot.backports import removeprefix +from pywikibot.backports import Dict, removeprefix from pywikibot.comms import http from pywikibot.data import WaitingMixin from pywikibot.exceptions import Error, NoUsernameError, ServerError @@ -111,7 +112,7 @@ def select(self, result = [] qvars = data['head']['vars'] for row in data['results']['bindings']: - values = {} + values: Dict[str, Any] = {} for var in qvars: if var not in row: # var is not available (OPTIONAL is probably used) diff --git a/pywikibot/data/superset.py b/pywikibot/data/superset.py index 7b4c969fc6..9f49509790 100644 --- a/pywikibot/data/superset.py +++ b/pywikibot/data/superset.py @@ -3,7 +3,7 @@ .. versionadded:: 9.2 """ # -# (C) Pywikibot team, 2024 +# (C) Pywikibot team, 2024-2025 # # Distributed under the terms of the MIT license. # @@ -15,7 +15,6 @@ import pywikibot from pywikibot.comms import http -from pywikibot.data import WaitingMixin from pywikibot.exceptions import NoUsernameError, ServerError @@ -23,7 +22,7 @@ from pywikibot.site import BaseSite -class SupersetQuery(WaitingMixin): +class SupersetQuery: """Superset Query class. diff --git a/pywikibot/data/wikistats.py b/pywikibot/data/wikistats.py index d85a3710ad..e3570cf513 100644 --- a/pywikibot/data/wikistats.py +++ b/pywikibot/data/wikistats.py @@ -1,6 +1,6 @@ """Objects representing WikiStats API.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -122,10 +122,10 @@ def sorted(self, table: str, key: str, alphanumeric keys are sorted in normal way. :return: The sorted table """ - table = self.get(table) + data = self.get(table) # take the first entry to determine the sorting key - first_entry = table[0] + first_entry = data[0] if first_entry[key].isdigit(): def sort_key(d): return int(d[key]) reverse = reverse if reverse is not None else True @@ -133,7 +133,7 @@ def sort_key(d): return int(d[key]) def sort_key(d): return d[key] reverse = reverse if reverse is not None else False - return sorted(table, key=sort_key, reverse=reverse) + return sorted(data, key=sort_key, reverse=reverse) def languages_by_size(self, table: str): """Return ordered list of languages by size from WikiStats.""" diff --git a/pywikibot/date.py b/pywikibot/date.py index e6b4eae818..886efebd9c 100644 --- a/pywikibot/date.py +++ b/pywikibot/date.py @@ -26,20 +26,21 @@ ) from pywikibot.site import BaseSite from pywikibot.tools import deprecate_arg, first_lower, first_upper -from pywikibot.userinterfaces.transliteration import NON_LATIN_DIGITS +from pywikibot.userinterfaces.transliteration import NON_ASCII_DIGITS if TYPE_CHECKING: - tuplst_type = list[tuple[Callable[[int | str], Any], - Callable[[int | str], bool]]] - encf_type = Callable[[int], int | Sequence[int]] + from typing import Union + tuplst_type = list[tuple[Callable[[Union[int, str]], Any], + Callable[[Union[int, str]], bool]]] + encf_type = Callable[[int], Union[int, Sequence[int]]] decf_type = Callable[[Sequence[int]], int] # decoders are three value tuples, with an optional fourth to represent a # required number of digits - decoder_type = ( - tuple[str, Callable[[int], str], Callable[[str], int]] - | tuple[str, Callable[[int], str], Callable[[str], int], int] - ) + decoder_type = Union[ + tuple[str, Callable[[int], str], Callable[[str], int]], + tuple[str, Callable[[int], str], Callable[[str], int], int] + ] # # Different collections of well known formats @@ -288,27 +289,27 @@ def monthName(lang: str, ind: int) -> str: # Helper for KN: digits representation -_knDigits = NON_LATIN_DIGITS['kn'] +_knDigits = NON_ASCII_DIGITS['kn'] _knDigitsToLocal = {ord(str(i)): _knDigits[i] for i in range(10)} _knLocalToDigits = {ord(_knDigits[i]): str(i) for i in range(10)} # Helper for Urdu/Persian languages -_faDigits = NON_LATIN_DIGITS['fa'] +_faDigits = NON_ASCII_DIGITS['fa'] _faDigitsToLocal = {ord(str(i)): _faDigits[i] for i in range(10)} _faLocalToDigits = {ord(_faDigits[i]): str(i) for i in range(10)} # Helper for HI:, MR: -_hiDigits = NON_LATIN_DIGITS['hi'] +_hiDigits = NON_ASCII_DIGITS['hi'] _hiDigitsToLocal = {ord(str(i)): _hiDigits[i] for i in range(10)} _hiLocalToDigits = {ord(_hiDigits[i]): str(i) for i in range(10)} # Helper for BN: -_bnDigits = NON_LATIN_DIGITS['bn'] +_bnDigits = NON_ASCII_DIGITS['bn'] _bnDigitsToLocal = {ord(str(i)): _bnDigits[i] for i in range(10)} _bnLocalToDigits = {ord(_bnDigits[i]): str(i) for i in range(10)} # Helper for GU: -_guDigits = NON_LATIN_DIGITS['gu'] +_guDigits = NON_ASCII_DIGITS['gu'] _guDigitsToLocal = {ord(str(i)): _guDigits[i] for i in range(10)} _guLocalToDigits = {ord(_guDigits[i]): str(i) for i in range(10)} diff --git a/pywikibot/diff.py b/pywikibot/diff.py index 052e93474c..c1ba3a87c3 100644 --- a/pywikibot/diff.py +++ b/pywikibot/diff.py @@ -1,6 +1,6 @@ """Diff module.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -9,7 +9,8 @@ import difflib import math from collections import abc -from difflib import SequenceMatcher, _format_range_unified +from difflib import _format_range_unified # type: ignore[attr-defined] +from difflib import SequenceMatcher from heapq import nlargest from itertools import zip_longest @@ -612,12 +613,14 @@ def html_comparator(compare_string: str) -> dict[str, list[str]]: return comparands -def get_close_matches_ratio(word: Sequence, - possibilities: list[Sequence], - *, - n: int = 3, - cutoff: float = 0.6, - ignorecase: bool = False) -> list[float, Sequence]: +def get_close_matches_ratio( + word: str, + possibilities: list[str], + *, + n: int = 3, + cutoff: float = 0.6, + ignorecase: bool = False +) -> list[tuple[float, str]]: """Return a list of the best “good enough” matches and its ratio. This method is similar to Python's :pylib:`difflib.get_close_matches() diff --git a/pywikibot/echo.py b/pywikibot/echo.py index bd135ed9ef..2dcd683a5e 100644 --- a/pywikibot/echo.py +++ b/pywikibot/echo.py @@ -1,31 +1,52 @@ """Classes and functions for working with the Echo extension.""" # -# (C) Pywikibot team, 2014-2022 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations +from dataclasses import dataclass from typing import Any import pywikibot +@dataclass(eq=False) class Notification: - """A notification issued by the Echo extension.""" + """A notification issued by the Echo extension. - def __init__(self, site: pywikibot.site.BaseSite) -> None: - """Initialize an empty Notification object.""" - self.site = site + .. versionchanged:: 3.0.20190204 + The ``id`` attribute was renamed to ``event_id``, and its type + changed from ``str`` to ``int``. + .. deprecated:: 3.0.20190204 + The ``id`` attribute was retained temporarily for backward + compatibility, but is deprecated and scheduled for removal. + + .. versionremoved:: 7.0 + The ``id`` attribute was removed. + + .. versionchanged:: 10.3 + The class is now defined using the ``@dataclass`` decorator to + simplify internal initialization and improve maintainability. + """ + + site: pywikibot.site.BaseSite + + def __post_init__(self) -> None: + """Initialize attributes for an empty Notification object. + + .. versionadded: 10.3 + """ self.event_id: int | None = None self.type = None self.category = None - self.timestamp = None - self.page = None - self.agent = None - self.read: bool | None = None + self.timestamp: pywikibot.Timestamp | None = None + self.page: pywikibot.Page | None = None + self.agent: pywikibot.User | None = None + self.read: pywikibot.Timestamp | bool | None = None self.content = None self.revid = None @@ -33,12 +54,19 @@ def __init__(self, site: pywikibot.site.BaseSite) -> None: def fromJSON(cls, # noqa: N802 site: pywikibot.site.BaseSite, data: dict[str, Any]) -> Notification: - """Construct a Notification object from our API's JSON data.""" + """Construct a Notification object from API JSON data. + + :param site: The pywikibot site object. + :param data: The JSON data dictionary representing a + notification. + :return: An instance of Notification. + """ notif = cls(site) notif.event_id = int(data['id']) notif.type = data['type'] notif.category = data['category'] + notif.timestamp = pywikibot.Timestamp.fromtimestampformat( data['timestamp']['mw']) @@ -59,8 +87,16 @@ def fromJSON(cls, # noqa: N802 notif.content = data.get('*') notif.revid = data.get('revid') + return notif def mark_as_read(self) -> bool: - """Mark the notification as read.""" - return self.site.notifications_mark_read(list=self.id) + """Mark the notification as read. + + :return: True if the notification was successfully marked as + read, else False. + """ + if self.event_id is None: + return False + + return self.site.notifications_mark_read(**{'list': self.event_id}) diff --git a/pywikibot/exceptions.py b/pywikibot/exceptions.py index 66ae9b06ea..d1a095ad83 100644 --- a/pywikibot/exceptions.py +++ b/pywikibot/exceptions.py @@ -172,7 +172,7 @@ instead. """ # -# (C) Pywikibot team, 2008-2023 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -286,17 +286,20 @@ class PageRelatedError(Error): This class should be used when the Exception concerns a particular Page, and when a generic message can be written once for all. + + .. versionchanged:: 10.5 + A pageid is accepted with the first parameter """ # Preformatted message where the page title will be inserted. # Override this in subclasses. message = '' - def __init__(self, page: pywikibot.page.BasePage, + def __init__(self, page: pywikibot.page.BasePage | int, message: str | None = None) -> None: """Initializer. - :param page: Page that caused the exception + :param page: Page object or pageid that caused the exception """ if message: self.message = message @@ -305,13 +308,17 @@ def __init__(self, page: pywikibot.page.BasePage, raise Error("PageRelatedError is abstract. Can't instantiate it!") self.page = page - self.title = page.title(as_link=True) - self.site = page.site + if isinstance(page, pywikibot.page.BasePage): + self.title = str(page) + self.site = page.site + else: + self.title = f'{page} (pageid)' + self.site = '' if re.search(r'\{\w+\}', self.message): msg = self.message.format_map(self.__dict__) else: - msg = self.message.format(page) + msg = self.message.format(self.title) super().__init__(msg) @@ -721,6 +728,11 @@ class MaxlagTimeoutError(TimeoutError): """Request failed with a maxlag timeout error.""" +class ApiNotAvailableError(Error): + + """API is not available, e.g. due to a network error or configuration.""" + + wrapper = ModuleDeprecationWrapper(__name__) wrapper.add_deprecated_attr( 'Server414Error', Client414Error, since='8.1.0') diff --git a/pywikibot/families/commons_family.py b/pywikibot/families/commons_family.py index 30773bda87..527ce55b58 100644 --- a/pywikibot/families/commons_family.py +++ b/pywikibot/families/commons_family.py @@ -1,6 +1,6 @@ """Family module for Wikimedia Commons.""" # -# (C) Pywikibot team, 2005-2023 +# (C) Pywikibot team, 2005-2025 # # Distributed under the terms of the MIT license. # @@ -24,7 +24,7 @@ class Family(family.WikimediaFamily, family.DefaultWikibaseFamily): langs = { 'commons': 'commons.wikimedia.org', 'test': 'test-commons.wikimedia.org', - 'beta': 'commons.wikimedia.beta.wmflabs.org' + 'beta': 'commons.wikimedia.beta.wmcloud.org' } # Sites we want to edit but not count as real languages diff --git a/pywikibot/families/wikibooks_family.py b/pywikibot/families/wikibooks_family.py index d25044388b..e846f59ac1 100644 --- a/pywikibot/families/wikibooks_family.py +++ b/pywikibot/families/wikibooks_family.py @@ -34,10 +34,10 @@ class Family(family.SubdomainFamily, family.WikimediaFamily): 'af', 'ar', 'az', 'ba', 'be', 'bg', 'bn', 'bs', 'ca', 'cs', 'cv', 'cy', 'da', 'de', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'fy', 'gl', 'he', 'hi', 'hr', 'hu', 'hy', 'ia', 'id', 'is', 'it', 'ja', 'ka', - 'kk', 'km', 'ko', 'ku', 'ky', 'la', 'li', 'lt', 'mg', 'mk', 'ml', 'mr', - 'ms', 'ne', 'nl', 'no', 'oc', 'pa', 'pl', 'pt', 'ro', 'ru', 'sa', - 'shn', 'si', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'tg', 'th', - 'tl', 'tr', 'tt', 'uk', 'ur', 'vi', 'zh', + 'kk', 'km', 'ko', 'ku', 'ky', 'la', 'li', 'lt', 'mg', 'min', 'mk', + 'ml', 'mr', 'ms', 'ne', 'nl', 'no', 'oc', 'pa', 'pl', 'pt', 'ro', 'ru', + 'sa', 'shn', 'si', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'tg', + 'th', 'tl', 'tr', 'tt', 'uk', 'ur', 'vi', 'zh', } category_redirect_templates = { diff --git a/pywikibot/families/wikidata_family.py b/pywikibot/families/wikidata_family.py index 89b79faa33..329b567da3 100644 --- a/pywikibot/families/wikidata_family.py +++ b/pywikibot/families/wikidata_family.py @@ -1,6 +1,6 @@ """Family module for Wikidata.""" # -# (C) Pywikibot team, 2012-2023 +# (C) Pywikibot team, 2012-2025 # # Distributed under the terms of the MIT license. # @@ -18,7 +18,7 @@ class Family(family.WikimediaFamily, family.DefaultWikibaseFamily): langs = { 'wikidata': 'www.wikidata.org', 'test': 'test.wikidata.org', - 'beta': 'wikidata.beta.wmflabs.org', + 'beta': 'wikidata.beta.wmcloud.org', } # Sites we want to edit but not count as real languages diff --git a/pywikibot/families/wikipedia_family.py b/pywikibot/families/wikipedia_family.py index a546d23144..8f960fea4f 100644 --- a/pywikibot/families/wikipedia_family.py +++ b/pywikibot/families/wikipedia_family.py @@ -51,9 +51,9 @@ class Family(family.SubdomainFamily, family.WikimediaFamily): 'nds', 'nds-nl', 'ne', 'new', 'nia', 'nl', 'nn', 'no', 'nov', 'nqo', 'nr', 'nrm', 'nso', 'nup', 'nv', 'ny', 'oc', 'olo', 'om', 'or', 'os', 'pa', 'pag', 'pam', 'pap', 'pcd', 'pcm', 'pdc', 'pfl', 'pi', 'pl', - 'pms', 'pnb', 'pnt', 'ps', 'pt', 'pwn', 'qu', 'rm', 'rmy', 'rn', 'ro', - 'roa-rup', 'roa-tara', 'rsk', 'ru', 'rue', 'rw', 'sa', 'sah', 'sat', - 'sc', 'scn', 'sco', 'sd', 'se', 'sg', 'sh', 'shi', 'shn', 'si', + 'pms', 'pnb', 'pnt', 'ps', 'pt', 'pwn', 'qu', 'rki', 'rm', 'rmy', 'rn', + 'ro', 'roa-rup', 'roa-tara', 'rsk', 'ru', 'rue', 'rw', 'sa', 'sah', + 'sat', 'sc', 'scn', 'sco', 'sd', 'se', 'sg', 'sh', 'shi', 'shn', 'si', 'simple', 'sk', 'skr', 'sl', 'sm', 'smn', 'sn', 'so', 'sq', 'sr', 'srn', 'ss', 'st', 'stq', 'su', 'sv', 'sw', 'syl', 'szl', 'szy', 'ta', 'tay', 'tcy', 'tdd', 'te', 'tet', 'tg', 'th', 'ti', 'tig', 'tk', 'tl', @@ -203,6 +203,7 @@ class Family(family.SubdomainFamily, family.WikimediaFamily): 'he': ('בעבודה',), 'hr': ('Radovi',), 'hy': ('Խմբագրում եմ',), + 'ro': ('Dezvoltare', 'S-dezvoltare', 'Modific acum'), 'ru': ('Редактирую',), 'sr': ('Радови у току', 'Рут'), 'test': ('In use',), @@ -219,8 +220,11 @@ class Family(family.SubdomainFamily, family.WikimediaFamily): 'Archivace start', 'Posloupnost archivů', 'Rfa-archiv-start', 'Rfc-archiv-start'), 'de': ('Archiv',), + 'ro': ('Arhivă',), } + citoid_endpoint = '/api/rest_v1/data/citation/' + @classmethod def __post_init__(cls) -> None: """Add 'yue' code alias due to :phab:`T341960`. diff --git a/pywikibot/families/wikiquote_family.py b/pywikibot/families/wikiquote_family.py index 5e59f89f39..4d6cce39da 100644 --- a/pywikibot/families/wikiquote_family.py +++ b/pywikibot/families/wikiquote_family.py @@ -33,9 +33,9 @@ class Family(family.SubdomainFamily, family.WikimediaFamily): 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'gor', 'gu', 'guw', 'he', 'hi', 'hr', 'hu', 'hy', 'id', 'ig', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'ky', 'la', 'li', - 'lt', 'ml', 'mr', 'nl', 'nn', 'no', 'pl', 'pt', 'ro', 'ru', 'sa', - 'sah', 'sk', 'sl', 'sq', 'sr', 'su', 'sv', 'ta', 'te', 'th', 'tl', - 'tr', 'uk', 'ur', 'uz', 'vi', 'zh', + 'lt', 'ml', 'mr', 'ms', 'nl', 'nn', 'no', 'pcm', 'pl', 'pt', 'ro', + 'ru', 'sa', 'sah', 'sk', 'sl', 'sq', 'sr', 'su', 'sv', 'ta', 'te', + 'th', 'tl', 'tr', 'uk', 'ur', 'uz', 'vi', 'zh', } category_redirect_templates = { diff --git a/pywikibot/families/wikisource_family.py b/pywikibot/families/wikisource_family.py index 09802e2c7d..c8fcff2147 100644 --- a/pywikibot/families/wikisource_family.py +++ b/pywikibot/families/wikisource_family.py @@ -30,11 +30,11 @@ class Family(family.SubdomainFamily, family.WikimediaFamily): 'ar', 'as', 'az', 'ban', 'bcl', 'be', 'bg', 'bn', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'he', 'hi', 'hr', 'hu', 'hy', 'id', 'is', 'it', - 'ja', 'jv', 'ka', 'kn', 'ko', 'la', 'li', 'lij', 'lt', 'mk', 'ml', - 'mr', 'ms', 'mul', 'my', 'nap', 'nl', 'no', 'or', 'pa', 'pl', 'pms', - 'pt', 'ro', 'ru', 'sa', 'sah', 'sk', 'sl', 'sr', 'su', 'sv', 'ta', - 'tcy', 'te', 'th', 'tr', 'uk', 'vec', 'vi', 'wa', 'yi', 'zh', - 'zh-min-nan', + 'ja', 'jv', 'ka', 'kn', 'ko', 'la', 'li', 'lij', 'lt', 'mad', 'min', + 'mk', 'ml', 'mr', 'ms', 'mul', 'my', 'nap', 'nl', 'no', 'or', 'pa', + 'pl', 'pms', 'pt', 'ro', 'ru', 'sa', 'sah', 'sk', 'sl', 'sr', 'su', + 'sv', 'ta', 'tcy', 'te', 'th', 'tl', 'tr', 'uk', 'vec', 'vi', 'wa', + 'yi', 'zh', 'zh-min-nan', } # Sites we want to edit but not count as real languages @@ -56,7 +56,7 @@ class Family(family.SubdomainFamily, family.WikimediaFamily): def langs(cls): cls.langs = super().langs cls.langs['mul'] = cls.domain - cls.langs['beta'] = 'en.wikisource.beta.wmflabs.org' + cls.langs['beta'] = 'en.wikisource.beta.wmcloud.org' return cls.langs # Need to explicitly inject the beta domain diff --git a/pywikibot/families/wiktionary_family.py b/pywikibot/families/wiktionary_family.py index b9647c0d6c..1f238a0cf5 100644 --- a/pywikibot/families/wiktionary_family.py +++ b/pywikibot/families/wiktionary_family.py @@ -34,22 +34,22 @@ class Family(family.SubdomainFamily, family.WikimediaFamily): ] codes = { - 'af', 'am', 'an', 'ang', 'ar', 'ast', 'ay', 'az', 'bcl', 'be', 'bg', - 'bjn', 'blk', 'bn', 'br', 'bs', 'btm', 'ca', 'chr', 'ckb', 'co', 'cs', - 'csb', 'cy', 'da', 'de', 'diq', 'dv', 'el', 'en', 'eo', 'es', 'et', - 'eu', 'fa', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', - 'gom', 'gor', 'gu', 'guw', 'gv', 'ha', 'he', 'hi', 'hif', 'hr', 'hsb', - 'hu', 'hy', 'ia', 'id', 'ie', 'ig', 'io', 'is', 'it', 'iu', 'ja', - 'jbo', 'jv', 'ka', 'kaa', 'kbd', 'kcg', 'kk', 'kl', 'km', 'kn', 'ko', - 'ks', 'ku', 'kw', 'ky', 'la', 'lb', 'li', 'lmo', 'ln', 'lo', 'lt', - 'lv', 'mad', 'mg', 'mi', 'min', 'mk', 'ml', 'mn', 'mni', 'mnw', 'mr', - 'ms', 'mt', 'my', 'na', 'nah', 'nds', 'ne', 'nia', 'nl', 'nn', 'no', - 'oc', 'om', 'or', 'pa', 'pl', 'pnb', 'ps', 'pt', 'qu', 'ro', 'roa-rup', - 'ru', 'rw', 'sa', 'sat', 'scn', 'sd', 'sg', 'sh', 'shn', 'shy', 'si', - 'simple', 'sk', 'skr', 'sl', 'sm', 'so', 'sq', 'sr', 'ss', 'st', 'su', - 'sv', 'sw', 'ta', 'tcy', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', - 'tpi', 'tr', 'ts', 'tt', 'ug', 'uk', 'ur', 'uz', 'vec', 'vi', 'vo', - 'wa', 'wo', 'yi', 'yue', 'zh', 'zh-min-nan', 'zu', + 'af', 'am', 'an', 'ang', 'ar', 'ast', 'ay', 'az', 'bcl', 'be', 'bew', + 'bg', 'bjn', 'blk', 'bn', 'br', 'bs', 'btm', 'ca', 'chr', 'ckb', 'co', + 'cs', 'csb', 'cy', 'da', 'de', 'diq', 'dv', 'el', 'en', 'eo', 'es', + 'et', 'eu', 'fa', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', + 'gn', 'gom', 'gor', 'gu', 'guw', 'gv', 'ha', 'he', 'hi', 'hif', 'hr', + 'hsb', 'hu', 'hy', 'ia', 'id', 'ie', 'ig', 'io', 'is', 'it', 'iu', + 'ja', 'jbo', 'jv', 'ka', 'kaa', 'kbd', 'kcg', 'kk', 'kl', 'km', 'kn', + 'ko', 'ks', 'ku', 'kw', 'ky', 'la', 'lb', 'li', 'lmo', 'ln', 'lo', + 'lt', 'lv', 'mad', 'mg', 'mi', 'min', 'mk', 'ml', 'mn', 'mni', 'mnw', + 'mr', 'ms', 'mt', 'my', 'na', 'nah', 'nds', 'ne', 'nia', 'nl', 'nn', + 'no', 'oc', 'om', 'or', 'pa', 'pl', 'pnb', 'ps', 'pt', 'qu', 'ro', + 'roa-rup', 'ru', 'rw', 'sa', 'sat', 'scn', 'sd', 'sg', 'sh', 'shn', + 'shy', 'si', 'simple', 'sk', 'skr', 'sl', 'sm', 'so', 'sq', 'sr', 'ss', + 'st', 'su', 'sv', 'sw', 'ta', 'tcy', 'te', 'tg', 'th', 'ti', 'tk', + 'tl', 'tn', 'tpi', 'tr', 'ts', 'tt', 'ug', 'uk', 'ur', 'uz', 'vec', + 'vi', 'vo', 'wa', 'wo', 'yi', 'yue', 'zgh', 'zh', 'zh-min-nan', 'zu', } category_redirect_templates = { diff --git a/pywikibot/family.py b/pywikibot/family.py index d7e9b8e10a..390e8b005c 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -703,8 +703,12 @@ def shared_image_repository(self, code): """Return the shared image repository, if any.""" return (None, None) + @deprecated(since='10.6.0') def isPublic(self, code) -> bool: - """Check the wiki require logging in before viewing it.""" + """Check the wiki require logging in before viewing it. + + .. deprecated:: 10.6 + """ return True def post_get_convert(self, site, getText): @@ -732,7 +736,7 @@ def obsolete(self) -> types.MappingProxyType[str, str | None]: :return: mapping of old codes to new codes (or None) """ data = dict.fromkeys(self.interwiki_removals) - data.update(self.interwiki_replacements) + data.update(self.code_aliases) return types.MappingProxyType(data) @classproperty @@ -749,6 +753,7 @@ def codes(cls) -> set[str]: return set(cls.langs.keys()) @classproperty + @deprecated('code_aliases', since='10.6.0') def interwiki_replacements(cls) -> Mapping[str, str]: """Return an interwiki code replacement mapping. @@ -757,6 +762,8 @@ def interwiki_replacements(cls) -> Mapping[str, str]: xx: now should get code yy:, add {'xx':'yy'} to :attr:`code_aliases`. + .. deprecated:: 10.6 + Use :attr:`code_aliases` directly instead. .. versionchanged:: 8.2 changed from dict to invariant mapping. """ @@ -961,8 +968,13 @@ class WikimediaFamily(Family): 'dk': 'da', 'jp': 'ja', - # Language aliases, see T86924 - 'nb': 'no', + # Language aliases + 'gsw': 'als', # T399411 + 'lzh': 'zh-classical', # T399697 + 'nb': 'no', # T86924 + 'rup': 'roa-rup', # T399693 + 'sgs': 'bat-smg', # T399438 + 'vro': 'fiu-vro', # T399444 # Closed wiki redirection aliases 'mo': 'ro', @@ -1106,6 +1118,16 @@ class DefaultWikibaseFamily(WikibaseFamily): .. versionadded:: 8.2 """ + @property + def interval_start_property(self) -> str: + """Return the property for the start of an interval.""" + return 'P580' + + @property + def interval_end_property(self) -> str: + """Return the property for the end of an interval.""" + return 'P582' + def calendarmodel(self, code) -> str: """Default calendar model for WbTime datatype.""" return 'http://www.wikidata.org/entity/Q1985727' diff --git a/pywikibot/fixes.py b/pywikibot/fixes.py index db65059151..684850c03e 100644 --- a/pywikibot/fixes.py +++ b/pywikibot/fixes.py @@ -1,18 +1,18 @@ """File containing all standard fixes.""" # -# (C) Pywikibot team, 2008-2022 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations -import os.path +from pathlib import Path from pywikibot import config parameter_help = """ - Currently available predefined fixes are: + Currently available predefined fixes: * HTML - Convert HTML tags to wiki syntax, and fix XHTML. @@ -20,21 +20,20 @@ * syntax - Try to fix bad wiki markup. Do not run this in automatic mode, as the bot may make mistakes. - * syntax-safe - Like syntax, but less risky, so you can - run this in automatic mode. - * case-de - fix upper/lower case errors in German - * grammar-de - fix grammar and typography in German - * vonbis - Ersetze Binde-/Gedankenstrich durch "bis" - in German - * music - Links auf Begriffsklärungen in German - * datum - specific date formats in German - * correct-ar - Typo corrections for Arabic Wikipedia and any - Arabic wiki. - * yu-tld - Fix links to .yu domains because it is - disabled, see: + * syntax-safe - Like syntax, but less risky; can be run + in automatic mode. + * case-de - Fix upper/lower case errors in German. + * grammar-de - Fix grammar and typography in German. + * vonbis - Replace hyphens or dashes with "bis" + in German. + * music - Links to disambiguation pages in German. + * datum - Specific date formats in German. + * correct-ar - Typo corrections for Arabic Wikipedia + and other Arabic wikis. + * yu-tld - Fix links to .yu domains, which are disabled. + See: https://lists.wikimedia.org/pipermail/wikibots-l/2009-February/000290.html - * fckeditor - Try to convert FCKeditor HTML tags to wiki - syntax. + * fckeditor - Convert FCKeditor HTML tags to wiki syntax. """ __doc__ += parameter_help @@ -673,20 +672,27 @@ 'msg': 'pywikibot-fixes-fckeditor', 'replacements': [ # replace
with a new line - (r'(?i)
', r'\n'), + (r'(?i)
', r'\n'), # replace   with a space - (r'(?i) ', r' '), + (r'(?i) ', r' '), ], }, } def _load_file(filename: str) -> bool: - """Load the fixes from the given filename.""" - if os.path.exists(filename): - # load binary, to let compile decode it according to the file header - with open(filename, 'rb') as f: - exec(compile(f.read(), filename, 'exec'), globals()) + """Load the fixes from the given filename. + + Returns True if the file existed and was loaded, False otherwise. + + :meta public: + """ + path = Path(filename) + if path.exists(): + # Read file as binary, so that compile can detect encoding from header + with path.open('rb') as f: + code = compile(f.read(), filename, 'exec') + exec(code, globals()) # intentionally in globals return True return False diff --git a/pywikibot/i18n.py b/pywikibot/i18n.py index e6c7a872a2..62d5b27574 100644 --- a/pywikibot/i18n.py +++ b/pywikibot/i18n.py @@ -29,6 +29,7 @@ from contextlib import suppress from pathlib import Path from textwrap import fill +from typing import Any import pywikibot from pywikibot import __url__, config @@ -554,9 +555,9 @@ def __len__(self) -> int: def translate(code: str | pywikibot.site.BaseSite, - xdict: str | Mapping[str, str], + xdict: str | Mapping[str, Any], parameters: Mapping[str, int] | None = None, - fallback: bool | Iterable[str] = False) -> str | None: + fallback: bool | Iterable[str] = False) -> Any | None: """Return the most appropriate localization from a localization dict. Given a site code and a dictionary, returns the dictionary's value @@ -591,7 +592,7 @@ def translate(code: str | pywikibot.site.BaseSite, :param parameters: For passing (plural) parameters :param fallback: Try an alternate language code. If it's iterable it'll also try those entries and choose the first match. - :return: the localized string + :return: the localized value, usually a string :raise IndexError: If the language supports and requires more plurals than defined for the given PLURAL pattern. :raise KeyError: No fallback key found if fallback is not False @@ -736,13 +737,8 @@ def twtranslate( "Robot: Changer %(descr)s {{PLURAL:num|une page|quelques pages}}.", } - and so on. - - >>> # this code snippet is running in test environment - >>> # ignore test message "tests: max_retries reduced from 15 to 1" >>> import os >>> os.environ['PYWIKIBOT_TEST_QUIET'] = '1' - >>> from pywikibot import i18n >>> i18n.set_messages_package('tests.i18n') >>> # use a dictionary @@ -752,7 +748,7 @@ def twtranslate( >>> str(i18n.twtranslate( ... 'fr', 'test-plural', {'num': 1, 'descr': 'seulement'})) 'Robot: Changer seulement une page.' - >>> # use format strings also outside + >>> # use parameter for plural and format strings outside >>> str(i18n.twtranslate( ... 'fr', 'test-plural', {'num': 10}, only_plural=True ... ) % {'descr': 'seulement'}) @@ -761,73 +757,92 @@ def twtranslate( .. versionchanged:: 8.1 the *bot_prefix* parameter was added. - :param source: When it's a site it's using the lang attribute and otherwise - it is using the value directly. The site object is recommended. - :param twtitle: The TranslateWiki string title, in - format - :param parameters: For passing parameters. It should be a mapping but for - backwards compatibility can also be a list, tuple or a single value. - They are also used for plural entries in which case they must be a - Mapping and will cause a TypeError otherwise. + .. versionchanged:: 10.5 + *fallback_prompt* is now returned whenever no translation is found, + including unknown keys in existing packages. + + :param source: When it's a site it's using the lang attribute and + otherwise it is using the value directly. The site object is + recommended. + :param twtitle: The TranslateWiki string title, in - + format + :param parameters: For passing parameters. It should be a mapping + but for backwards compatibility can also be a list, tuple or a + single value. They are also used for plural entries in which + case they must be a Mapping and will cause a TypeError otherwise. :param fallback: Try an alternate language code :param fallback_prompt: The English message if i18n is not available - :param only_plural: Define whether the parameters should be only applied to - plural instances. If this is False it will apply the parameters also - to the resulting string. If this is True the placeholders must be - manually applied afterwards. + :param only_plural: Define whether the parameters should be only + applied to plural instances. If this is False it will apply the + parameters also to the resulting string. If this is True the + placeholders must be manually applied afterwards. :param bot_prefix: If True, prepend the message with a bot prefix which depends on the ``config.bot_prefix`` setting - :raise IndexError: If the language supports and requires more plurals than - defined for the given translation template. + :raise IndexError: If the language supports and requires more + plurals than defined for the given translation template. + :raise TypeError: If parameters are not a mapping for plural + messages. + :raise ValueError: If parameters are not a mapping but required. + :raise TranslationError: If no translation found and + *fallback_prompt* is None. """ - prefix = get_bot_prefix(source, use_prefix=bot_prefix) - - if not messages_available(): - if fallback_prompt: + def _return_fallback_or_raise() -> str: + """Return formatted fallback_prompt, or raise TranslationError.""" + if fallback_prompt is not None: if parameters and not only_plural: - return fallback_prompt % parameters - return fallback_prompt - + return prefix + fallback_prompt % parameters + return prefix + fallback_prompt raise pywikibot.exceptions.TranslationError( - f'Unable to load messages package {_messages_package_name} for ' - f' bundle {twtitle}\nIt can happen due to lack of i18n submodule ' - f'or files. See {__url__}/i18n' + fill( + f'No translation available for key {twtitle} of ' + f'{_messages_package_name} package in language ' + f'{getattr(source, "lang", source)}. It can happen due to an ' + f'outdated or missing i18n submodule or files. ' + f'See {__url__}/i18n.' + ) ) - # if source is a site then use its lang attribute, otherwise it's a str + # Get the bot prefix, if requested + prefix = get_bot_prefix(source, use_prefix=bot_prefix) + + # If the messages package isn't available at all, use fallback_prompt + if not messages_available(): + return _return_fallback_or_raise() + + # Determine language code from source lang = getattr(source, 'lang', source) - # There are two possible failure modes: the translation dict might not have - # the language altogether, or a specific key could be untranslated. Both - # modes are caught with the KeyError. + # Prepare list of languages to try; fallback adds alternatives and English langs = [lang] if fallback: langs += [*_altlang(lang), 'en'] + + # Try each language until a translation is found for alt in langs: trans = _get_translation(alt, twtitle) if trans: break else: - raise pywikibot.exceptions.TranslationError(fill( - 'No {} translation has been defined for TranslateWiki key "{}". ' - 'It can happen due to lack of i18n submodule or files or an ' - 'outdated submodule. See {}/i18n' - .format('English' if 'en' in langs else f"'{lang}'", - twtitle, __url__))) + # No translation found: return fallback_prompt if available + return _return_fallback_or_raise() + # Handle plural forms if present if '{{PLURAL:' in trans: - # _extract_plural supports in theory non-mappings, but they are - # deprecated if not isinstance(parameters, Mapping): raise TypeError('parameters must be a mapping.') trans = _extract_plural(alt, trans, parameters) + # Validate parameters type for string formatting if parameters is not None and not isinstance(parameters, Mapping): raise ValueError( f'parameters should be a mapping, not {type(parameters).__name__}' ) + # Apply string formatting if requested if not only_plural and parameters: trans = trans % parameters + + # Return the final translation with bot prefix return prefix + trans @@ -884,7 +899,7 @@ def bundles(stem: bool = False) -> Generator[Path | str, None, None]: >>> from pywikibot import i18n >>> bundles = sorted(i18n.bundles(stem=True)) >>> len(bundles) - 39 + 40 >>> bundles[:4] ['add_text', 'archivebot', 'basic', 'blockpageschecker'] >>> bundles[-5:] @@ -913,15 +928,19 @@ def bundles(stem: bool = False) -> Generator[Path | str, None, None]: def known_languages() -> list[str]: - """All languages we have localizations for. + """Return all languages we have localizations for. >>> from pywikibot import i18n - >>> i18n.known_languages()[:10] - ['ab', 'aeb', 'af', 'am', 'an', 'ang', 'anp', 'ar', 'arc', 'arz'] - >>> i18n.known_languages()[-10:] + >>> i18n.known_languages()[:2] + ['ab', 'aeb'] + >>> i18n.known_languages()[-10:] # doctest: +SKIP ['vo', 'vro', 'wa', 'war', 'xal', 'xmf', 'yi', 'yo', 'yue', 'zh'] - >>> len(i18n.known_languages()) - 251 + >>> len(i18n.known_languages()) > 250 + True + >>> 'ab' in i18n.known_languages() + True + >>> 'zh' in i18n.known_languages() + True The implementation is roughly equivalent to: diff --git a/pywikibot/logging.py b/pywikibot/logging.py index 4df2669ba1..c86ac7e7da 100644 --- a/pywikibot/logging.py +++ b/pywikibot/logging.py @@ -24,7 +24,7 @@ - :python:`Logging Cookbook` """ # -# (C) Pywikibot team, 2010-2024 +# (C) Pywikibot team, 2010-2025 # # Distributed under the terms of the MIT license. # @@ -61,7 +61,7 @@ """ _init_routines: list[Callable[[], Any]] = [] -_inited_routines = set() +_inited_routines: set[Callable[[], Any]] = set() def add_init_routine(routine: Callable[[], Any]) -> None: @@ -349,7 +349,7 @@ def exception(msg: Any = None, *args: Any, if msg is None: exc_type, value, _tb = sys.exc_info() msg = str(value) - if not exc_info: + if exc_type is not None and not exc_info: msg += f' ({exc_type.__name__})' assert msg is not None error(msg, *args, exc_info=exc_info, **kwargs) diff --git a/pywikibot/login.py b/pywikibot/login.py index 42e41ab7d1..94650e93eb 100644 --- a/pywikibot/login.py +++ b/pywikibot/login.py @@ -153,9 +153,8 @@ def check_user_exists(self) -> None: if user['name'] != main_username: # Report the same error as server error code NotExists - raise NoUsernameError( - f"Username '{main_username}' does not exist on {self.site}" - ) + msg = f"Username '{main_username}' does not exist on {self.site}" + raise NoUsernameError(msg) def botAllowed(self) -> bool: """Check whether the bot is listed on a specific page. @@ -322,7 +321,7 @@ def login(self, retry: bool = False, autocreate: bool = False) -> bool: if error_code in ('NotExists', 'Illegal', 'readapidenied', 'Failed', 'Aborted', 'FAIL'): - error_msg = f'{e.code}: {e.info}' + error_msg = f'{e.code} on {self.site}: {e.info}' raise NoUsernameError(error_msg) pywikibot.error(f'Login failed ({error_code}).') @@ -454,14 +453,16 @@ def login_to_site(self) -> None: if status in ('NeedToken', 'WrongToken', 'badtoken'): # if incorrect login token was used, # force relogin and generate fresh one - pywikibot.error('Received incorrect login token. ' - 'Forcing re-login.') + pywikibot.error(f'{status}: Received incorrect login token.' + ' Forcing re-login.') # invalidate superior wiki cookies (T224712) pywikibot.data.api._invalidate_superior_cookies( self.site.family) - self.site.tokens.clear() - login_request[ - self.keyword('token')] = self.site.tokens['login'] + token = response.get('token') + if not token: + self.site.tokens.clear() + token = self.site.tokens['login'] + login_request[self.keyword('token')] = token continue if status == 'UI': # pragma: no cover diff --git a/pywikibot/page/_basepage.py b/pywikibot/page/_basepage.py index c3f9678b54..93af7cf31b 100644 --- a/pywikibot/page/_basepage.py +++ b/pywikibot/page/_basepage.py @@ -12,7 +12,7 @@ from contextlib import suppress from itertools import islice from textwrap import shorten, wrap -from typing import TYPE_CHECKING, Any, NoReturn +from typing import TYPE_CHECKING from urllib.parse import quote_from_bytes from warnings import warn @@ -38,15 +38,15 @@ from pywikibot.tools import ( ComparableMixin, cached, - deprecate_positionals, deprecated, deprecated_args, + deprecated_signature, first_upper, ) if TYPE_CHECKING: - from typing_extensions import Literal + from typing import Any, Literal, NoReturn from pywikibot.page import Revision @@ -391,8 +391,7 @@ def get(self, force: bool = False, get_redirect: bool = False) -> str: page_section = self.section() if page_section: content = textlib.extract_sections(text, self.site) - headings = {section.heading for section in content.sections} - if page_section not in headings: + if page_section not in content.sections: raise SectionError(f'{page_section!r} is not a valid section ' f'of {self.title(with_section=False)}') @@ -1458,13 +1457,40 @@ def put(self, newtext: str, force=force, asynchronous=asynchronous, callback=callback, **kwargs) - def watch(self, unwatch: bool = False) -> bool: - """Add or remove this page to/from bot account's watchlist. + @deprecated_signature(since='10.4.0') + def watch( + self, *, + unwatch: bool = False, + expiry: Timestamp | str | Literal[ + 'infinite', 'indefinite', 'infinity', 'never'] | None = None + ) -> bool: + """Add or remove this page from the bot account's watchlist. - :param unwatch: True to unwatch, False (default) to watch. + .. versionchanged:: 10.4.0 + Added the *expiry* parameter to specify watch expiry time. + Positional parameters are deprecated; all parameters must be + passed as keyword arguments. + + .. seealso:: + - :meth:`Site.watch()` + - :meth:`Site.watched_pages() + ` + - :api:`Watch` + + :param unwatch: If True, the page will be from the watchlist. + :param expiry: Expiry timestamp to apply to the watch. Passing + None or omitting this parameter leaves any existing expiry + unchanged. Expiry values may be relative (e.g. ``5 months`` + or ``2 weeks``) or absolute (e.g. ``2014-09-18T12:34:56Z``). + For no expiry, use ``infinite``, ``indefinite``, ``infinity`` + or `never`. For absolute timestamps the :class:`Timestamp` + class can be used. :return: True if successful, False otherwise. + :raises APIError: badexpiry: Invalid value for expiry parameter + :raises KeyError: 'watch' isn't in API response + :raises TypeError: unexpected keyword argument """ - return self.site.watch(self, unwatch) + return self.site.watch(self, unwatch=unwatch, expiry=expiry) def clear_cache(self) -> None: """Clear the cached attributes of the page.""" @@ -1634,7 +1660,7 @@ def data_item(self) -> pywikibot.page.ItemPage: """Convenience function to get the Wikibase item of a page.""" return pywikibot.ItemPage.fromPage(self) - @deprecate_positionals(since='9.2') + @deprecated_signature(since='9.2') def templates(self, *, content: bool = False, @@ -1679,7 +1705,7 @@ def templates(self, return list(self._templates) - @deprecate_positionals(since='9.2') + @deprecated_signature(since='9.2') def itertemplates( self, total: int | None = None, @@ -1978,6 +2004,50 @@ def move(self, noredirect=noredirect, movesubpages=movesubpages) + def rollback(self, **kwargs: Any) -> dict[str, int | str]: + """Roll back this page to the version before the last edit by a user. + + .. versionadded:: 10.5 + + .. seealso:: + :meth:`Site.rollbackpage() + ` + + :keyword tags: Tags to apply to the rollback. + :kwtype tags: str | Sequence[str] | None + :keyword str user: The last user to be rolled back; Default is + :attr:`BasePage.latest_revision.user + `. + :keyword str | None summary: Custom edit summary for the rollback + :keyword bool | None markbot: Mark the reverted edits and the + revert as bot edits. If not given, it is set to True if the + rollback user belongs to the 'bot' group, otherwise False. + :keyword watchlist: Unconditionally add or remove the page from + the current user's watchlist; 'preferences' is ignored for + bot users. + :kwtype watchlist: Literal['watch', 'unwatch', 'preferences', + 'nochange'] | None + :keyword watchlistexpiry: Watchlist expiry timestamp. Omit this + parameter entirely to leave the current expiry unchanged. + :kwtype watchlistexpiry: pywikibot.Timestamp | str | Literal[ + 'infinite', 'indefinite', 'infinity', 'never'] | None + :returns: Dictionary containing rollback result like + + .. code:: python + + { + 'title': , + 'pageid': , + 'summary': , + 'revid': , + 'old_revid': , + 'last_revid': , + } + + raises exceptions.Error: The rollback fails. + """ + return self.site.rollbackpage(self, **kwargs) + def delete( self, reason: str | None = None, diff --git a/pywikibot/page/_category.py b/pywikibot/page/_category.py index f352394e01..ccc457eaaa 100644 --- a/pywikibot/page/_category.py +++ b/pywikibot/page/_category.py @@ -61,7 +61,7 @@ def aslink(self, sort_key: str | None = None) -> str: def subcategories(self, *, recurse: int | bool = False, - **kwargs: Any) -> Generator[Page, None, None]: + **kwargs: Any) -> Generator[Category, None, None]: """Yield all subcategories of the current category. **Usage:** @@ -200,7 +200,7 @@ def articles(self, *, return def members(self, *, - recurse: bool = False, + recurse: int | bool = False, total: int | None = None, **kwargs: Any) -> Generator[Page, None, None]: """Yield all category contents (subcats, pages, and files). diff --git a/pywikibot/page/_collections.py b/pywikibot/page/_collections.py index 0bcc05c3fd..dea73af86c 100644 --- a/pywikibot/page/_collections.py +++ b/pywikibot/page/_collections.py @@ -1,11 +1,12 @@ """Structures holding data for Wikibase entities.""" # -# (C) Pywikibot team, 2019-2024 +# (C) Pywikibot team, 2019-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations +import reprlib from collections import defaultdict from collections.abc import MutableMapping, MutableSequence from typing import Any @@ -31,7 +32,7 @@ class BaseDataDict(MutableMapping): in subclasses. """ - def __init__(self, data=None) -> None: + def __init__(self, data: dict[str, Any] = None) -> None: super().__init__() self._data = {} if data: @@ -42,15 +43,15 @@ def new_empty(cls, repo): """Construct a new empty BaseDataDict.""" return cls() - def __getitem__(self, key): + def __getitem__(self, key: BaseSite | str) -> Any: key = self.normalizeKey(key) return self._data[key] - def __setitem__(self, key, value) -> None: + def __setitem__(self, key: BaseSite | str, value: Any) -> None: key = self.normalizeKey(key) self._data[key] = value - def __delitem__(self, key) -> None: + def __delitem__(self, key: BaseSite | str) -> None: key = self.normalizeKey(key) del self._data[key] @@ -60,12 +61,12 @@ def __iter__(self): def __len__(self) -> int: return len(self._data) - def __contains__(self, key) -> bool: + def __contains__(self, key: BaseSite | str) -> bool: key = self.normalizeKey(key) return key in self._data def __repr__(self) -> str: - return f'{type(self)}({self._data})' + return f'{type(self).__name__}({reprlib.repr(self._data)})' @staticmethod def normalizeKey(key) -> str: @@ -241,7 +242,7 @@ def __contains__(self, key) -> bool: return key in self._data def __repr__(self) -> str: - return f'{type(self)}({self._data})' + return f'{type(self).__name__}({reprlib.repr(self._data)})' @classmethod def normalizeData(cls, data) -> dict: diff --git a/pywikibot/page/_page.py b/pywikibot/page/_page.py index 1b30fbaa90..8a5a5a7d90 100644 --- a/pywikibot/page/_page.py +++ b/pywikibot/page/_page.py @@ -9,7 +9,7 @@ itself, including its contents. """ # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -182,7 +182,7 @@ def set_redirect_target( if save: self.save(**kwargs) - def get_best_claim(self, prop: str): + def get_best_claim(self, prop: str) -> pywikibot.Claim | None: """Return the first best Claim for this page. Return the first 'preferred' ranked Claim specified by Wikibase @@ -190,40 +190,29 @@ def get_best_claim(self, prop: str): .. versionadded:: 3.0 - :param prop: property id, "P###" + .. seealso:: :meth:`pywikibot.ItemPage.get_best_claim` + + :param prop: Wikibase property ID, must be of the form ``P`` + followed by one or more digits (e.g. ``P31``). :return: Claim object given by Wikibase property number for this page object. - :rtype: pywikibot.Claim or None :raises UnknownExtensionError: site has no Wikibase extension """ - def find_best_claim(claims): - """Find the first best ranked claim.""" - index = None - for i, claim in enumerate(claims): - if claim.rank == 'preferred': - return claim - if index is None and claim.rank == 'normal': - index = i - if index is None: - index = 0 - return claims[index] - - if not self.site.has_data_repository: - raise UnknownExtensionError( - f'Wikibase is not implemented for {self.site}.') - - def get_item_page(func, *args): + def get_item_page(page): + if not page.site.has_data_repository: + raise UnknownExtensionError( + f'Wikibase is not implemented for {page.site}.') try: - item_p = func(*args) + item_p = page.data_item() item_p.get() return item_p except NoPageError: return None except IsRedirectPageError: - return get_item_page(item_p.getRedirectTarget) + return get_item_page(item_p.getRedirectTarget()) - item_page = get_item_page(pywikibot.ItemPage.fromPage, self) - if item_page and prop in item_page.claims: - return find_best_claim(item_page.claims[prop]) + item_page = get_item_page(page=self) + if item_page: + return item_page.get_best_claim(prop) return None diff --git a/pywikibot/page/_user.py b/pywikibot/page/_user.py index 206c1be7d3..8f515e6124 100644 --- a/pywikibot/page/_user.py +++ b/pywikibot/page/_user.py @@ -112,12 +112,13 @@ def getprops(self, force: bool = False) -> dict: if force and hasattr(self, '_userprops'): del self._userprops if not hasattr(self, '_userprops'): - self._userprops = list(self.site.users([self.username]))[0] + self._userprops = next(self.site.users([self.username])) if self.isAnonymous() or self.is_CIDR(): - r = list(self.site.blocks(iprange=self.username, total=1)) + r = next(self.site.blocks(iprange=self.username, total=1), + None) if r: - self._userprops['blockedby'] = r[0]['by'] - self._userprops['blockreason'] = r[0]['reason'] + self._userprops['blockedby'] = r['by'] + self._userprops['blockreason'] = r['reason'] return self._userprops def registration(self, diff --git a/pywikibot/page/_wikibase.py b/pywikibot/page/_wikibase.py index 5bf8dbd44b..5da0ea6ad8 100644 --- a/pywikibot/page/_wikibase.py +++ b/pywikibot/page/_wikibase.py @@ -64,15 +64,17 @@ ) if TYPE_CHECKING: - LANGUAGE_IDENTIFIER = str | pywikibot.site.APISite + from typing import Union + LANGUAGE_IDENTIFIER = Union[str, pywikibot.site.APISite] ALIASES_TYPE = dict[LANGUAGE_IDENTIFIER, list[str]] LANGUAGE_TYPE = dict[LANGUAGE_IDENTIFIER, str] - SITELINK_TYPE = ( - pywikibot.page.BasePage - | pywikibot.page.BaseLink - | dict[str, str] - ) - ENTITY_DATA_TYPE = dict[str, LANGUAGE_TYPE | ALIASES_TYPE | SITELINK_TYPE] + SITELINK_TYPE = Union[ + pywikibot.page.BasePage, + pywikibot.page.BaseLink, + dict[str, str] + ] + ENTITY_DATA_TYPE = dict[str, + Union[LANGUAGE_TYPE, ALIASES_TYPE, SITELINK_TYPE]] class WikibaseEntity: @@ -1341,6 +1343,109 @@ def isRedirectPage(self): return self._isredir return super().isRedirectPage() + def get_best_claim(self, prop: str) -> pywikibot.Claim | None: + """Return the first best Claim for this page. + + Return the first 'preferred' ranked Claim specified by Wikibase + property or the first 'normal' one otherwise. + + .. versionadded:: 10.4 + + .. seealso:: :meth:`pywikibot.Page.get_best_claim` + + :param prop: Wikibase property ID, must be of the form ``P`` + followed by one or more digits (e.g. ``P31``). + :return: Claim object given by Wikibase property number + for this page object. + + :raises UnknownExtensionError: site has no Wikibase extension + """ + + def find_best_claim(claims): + """Find the first best ranked claim.""" + index = None + for i, claim in enumerate(claims): + if claim.rank == 'preferred': + return claim + if index is None and claim.rank == 'normal': + index = i + if index is None: + index = 0 + return claims[index] + + if prop in self.claims: + return find_best_claim(self.claims[prop]) + return None + + def get_value_at_timestamp( + self, + prop: str, + timestamp: pywikibot.WbTime, + lang: str = 'en' + ) -> pywikibot.WbRepresentation | None: + """Return the best value for this page at a given timestamp. + + .. versionadded:: 10.4 + + :param prop: property id, "P###" + :param timestamp: the timestamp to check the value at + :param lang: the language to return the value in + :return: :class:`pywikibot.WbRepresentation` object given by + Wikibase property number for this page object and valid for + the given timestamp and language. + + :raises NoWikibaseEntityError: site has no time interval properties + """ + fam = self.site.family + if not hasattr(fam, 'interval_start_property') or \ + not hasattr(fam, 'interval_end_property'): + raise NoWikibaseEntityError( + f'{fam} does not have time interval properties') + + startp, endp = fam.interval_start_property, fam.interval_end_property + + def timestamp_in_interval(p, ts): + """Check if timestamp is within the qualifiers.""" + q1 = p.qualifiers.get(startp, []) + q2 = p.qualifiers.get(endp, []) + d1 = d2 = None + if q1: + d1 = q1[0].getTarget() + if q2: + d2 = q2[0].getTarget() + if d1 and d2: + return d1 <= ts <= d2 + if d1: + return d1 <= ts + if d2: + return d2 >= ts + return False + + def find_value_at_timestamp(claims, ts, language): + """Find the first best ranked claim at a given timestamp.""" + sorted_claims = sorted( + claims, + key=(lambda c: c.qualifiers.get(startp)[0].getTarget() + if c.qualifiers.get(startp) + else pywikibot.WbTime(0, site=self.site)), + reverse=True + ) + best_claim = None + for claim in sorted_claims: + if claim.rank == 'deprecated': + continue + if timestamp_in_interval(claim, ts): + if (claim.type != 'monolingualtext' + or claim.getTarget().language == language)\ + and claim.has_better_rank(best_claim): + best_claim = claim + return best_claim and best_claim.getTarget() + + if prop in self.claims: + return find_value_at_timestamp(self.claims[prop], timestamp, lang) + + return None + class Property: @@ -2134,6 +2239,19 @@ def _formatDataValue(self) -> dict: 'type': self.value_types.get(self.type, self.type) } + def has_better_rank(self, other: Claim | None) -> bool: + """Check if this claim has a better rank than the other claim. + + .. versionadded:: 10.6 + + :param other: The other claim to compare with. + :return: True if this claim has a better rank, False otherwise. + """ + if other is None: + return True + rank_order = {'preferred': 3, 'normal': 2, 'deprecated': 1} + return rank_order.get(self.rank, 0) > rank_order.get(other.rank, 0) + class LexemePage(WikibasePage): diff --git a/pywikibot/pagegenerators/__init__.py b/pywikibot/pagegenerators/__init__.py index dea213cd1d..58cfb4b86d 100644 --- a/pywikibot/pagegenerators/__init__.py +++ b/pywikibot/pagegenerators/__init__.py @@ -12,7 +12,7 @@ ¶ms; """ # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -594,7 +594,9 @@ def PageWithTalkPageGenerator( if not return_talk_only or page.isTalkPage(): yield page if not page.isTalkPage(): - yield page.toggleTalkPage() + talk_page = page.toggleTalkPage() + if talk_page is not None: + yield talk_page def RepeatingGenerator( diff --git a/pywikibot/pagegenerators/_factory.py b/pywikibot/pagegenerators/_factory.py index 3c076def4e..ba4c648c6c 100644 --- a/pywikibot/pagegenerators/_factory.py +++ b/pywikibot/pagegenerators/_factory.py @@ -1,6 +1,6 @@ """GeneratorFactory module which handles pagegenerators options.""" # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -12,7 +12,7 @@ from datetime import timedelta from functools import partial from itertools import zip_longest -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import pywikibot from pywikibot import i18n @@ -62,13 +62,13 @@ if TYPE_CHECKING: - from typing_extensions import Literal + from typing import Any, Literal, Optional from pywikibot.site import BaseSite, Namespace HANDLER_GEN_TYPE = Iterable[pywikibot.page.BasePage] GEN_FACTORY_CLAIM_TYPE = list[tuple[str, str, dict[str, str], bool]] - OPT_GENERATOR_TYPE = HANDLER_GEN_TYPE | None + OPT_GENERATOR_TYPE = Optional[HANDLER_GEN_TYPE] # This is the function that will be used to de-duplicate page iterators. diff --git a/pywikibot/pagegenerators/_filters.py b/pywikibot/pagegenerators/_filters.py index cd006e2a14..641724d1ba 100644 --- a/pywikibot/pagegenerators/_filters.py +++ b/pywikibot/pagegenerators/_filters.py @@ -1,6 +1,6 @@ """Page filter generators provided by the pagegenerators module.""" # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -20,16 +20,18 @@ if TYPE_CHECKING: + from typing import Union + from pywikibot.site import BaseSite, Namespace PRELOAD_SITE_TYPE = dict[pywikibot.site.BaseSite, list[pywikibot.page.BasePage]] - PATTERN_STR_OR_SEQ_TYPE = ( - str - | Pattern[str] - | Sequence[str] - | Sequence[Pattern[str]] - ) + PATTERN_STR_OR_SEQ_TYPE = Union[ + str, + Pattern[str], + Sequence[str], + Sequence[Pattern[str]], + ] # This is the function that will be used to de-duplicate page iterators. diff --git a/pywikibot/pagegenerators/_generators.py b/pywikibot/pagegenerators/_generators.py index 4e04b8c569..c74a972e42 100644 --- a/pywikibot/pagegenerators/_generators.py +++ b/pywikibot/pagegenerators/_generators.py @@ -282,18 +282,26 @@ def upcast(gen): def UnconnectedPageGenerator( site: BaseSite | None = None, - total: int | None = None + total: int | None = None, + *, + strict: bool = False ) -> Iterable[pywikibot.page.Page]: """Iterate Page objects for all unconnected pages to a Wikibase repository. - :param total: Maximum number of pages to retrieve in total + .. versionchanged:: + The *strict* parameter was added. + :param site: Site for generator results. + :param total: Maximum number of pages to retrieve in total + :param strict: If ``True``, verify that each page still has no data + item before yielding it. + :raises ValueError: The given site does not have Wikibase repository """ if site is None: site = pywikibot.Site() if not site.data_repository(): raise ValueError('The given site does not have Wikibase repository.') - return site.unconnected_pages(total=total) + return site.unconnected_pages(total=total, strict=strict) def FileLinksGenerator( diff --git a/pywikibot/plural.py b/pywikibot/plural.py index df526d52fd..d353bcf7c1 100644 --- a/pywikibot/plural.py +++ b/pywikibot/plural.py @@ -1,16 +1,17 @@ """Module containing plural rules of various languages.""" # -# (C) Pywikibot team, 2011-2022 +# (C) Pywikibot team, 2011-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING if TYPE_CHECKING: - PluralRule = dict[str, int | Callable[[int], bool | int]] + from typing import Callable, Union + PluralRule = dict[str, Union[int, Callable[[int], Union[bool, int]]]] plural_rules: dict[str, PluralRule] = { '_default': {'nplurals': 2, 'plural': lambda n: (n != 1)}, @@ -62,11 +63,12 @@ 0 if (n == 0) else 1 if n == 1 else 2}, - 'mt': {'nplurals': 4, 'plural': lambda n: - 0 if (n == 1) else - 1 if (n == 0 or (1 < (n % 100) < 11)) else - 2 if (10 < (n % 100) < 20) else - 3}, + 'mt': {'nplurals': 5, 'plural': lambda n: + 0 if n == 1 else + 1 if n == 2 else + 2 if n == 0 or 3 <= (n % 100) <= 10 else + 3 if 11 <= (n % 100) <= 19 else + 4}, 'pl': {'nplurals': 3, 'plural': lambda n: 0 if (n == 1) else 1 if (2 <= (n % 10) <= 4) and (n % 100 < 10 or n % 100 >= 20) diff --git a/pywikibot/scripts/__init__.py b/pywikibot/scripts/__init__.py index 73a1e556de..415cf12752 100644 --- a/pywikibot/scripts/__init__.py +++ b/pywikibot/scripts/__init__.py @@ -2,10 +2,11 @@ .. versionadded:: 7.0 .. versionremoved:: 9.4 - ``preload_sites`` script was removed (:phab:`T348925`). + ``preload_sites`` script, previously added in release 6.0 + (:phab:`T226157`), was removed (:phab:`T348925`). """ # -# (C) Pywikibot team, 2021-2022 +# (C) Pywikibot team, 2021-2025 # # Distributed under the terms of the MIT license. # @@ -27,6 +28,6 @@ def _import_with_no_user_config(*import_args): # Reset this flag if not orig_no_user_config: del environ['PYWIKIBOT_NO_USER_CONFIG'] - else: + else: # pragma: no cover environ['PYWIKIBOT_NO_USER_CONFIG'] = orig_no_user_config return result diff --git a/pywikibot/scripts/generate_family_file.py b/pywikibot/scripts/generate_family_file.py index 3c97c3d646..1c382130fe 100755 --- a/pywikibot/scripts/generate_family_file.py +++ b/pywikibot/scripts/generate_family_file.py @@ -43,6 +43,7 @@ import sys from contextlib import suppress from pathlib import Path +from textwrap import fill from urllib.parse import urlparse, urlunparse @@ -96,7 +97,12 @@ def __init__(self, self.wikis = {} # {'https://wiki/$1': Wiki('https://wiki/$1'), ...} self.langs = [] # [Wiki('https://wiki/$1'), ...] - def get_params(self) -> bool: # pragma: no cover + @staticmethod + def show(*args, **kwargs): + """Wrapper around print to be mocked in tests.""" + print(*args, **kwargs) + + def get_params(self) -> bool: """Ask for parameters if necessary.""" if self.base_url is None: with suppress(KeyboardInterrupt): @@ -117,8 +123,8 @@ def get_params(self) -> bool: # pragma: no cover return False if any(x not in NAME_CHARACTERS for x in self.name): - print(f'ERROR: Name of family "{self.name}" must be ASCII letters' - ' and digits [a-zA-Z0-9]') + self.show(f'ERROR: Name of family "{self.name}" must be ASCII' + ' letters and digits [a-zA-Z0-9]') return False return True @@ -141,7 +147,7 @@ def get_wiki(self): break else: return w, verify - return None, None # pragma: no cover + return None, None def run(self) -> None: """Main method, generate family file.""" @@ -153,10 +159,10 @@ def run(self) -> None: return self.wikis[w.lang] = w - print('\n==================================' - f'\nAPI url: {w.api}' - f'\nMediaWiki version: {w.version}' - '\n==================================\n') + self.show('\n==================================' + f'\nAPI url: {w.api}' + f'\nMediaWiki version: {w.version}' + '\n==================================\n') self.getlangs(w) self.getapis() @@ -171,13 +177,14 @@ def getlangs(self, w) -> None: same domain are collected. A [h]elp answer was added to show more information about possible answers. """ - print('Determining other sites...', end='') + self.show('Determining other sites...', end='') try: self.langs = w.langs - print(' '.join(sorted(wiki['prefix'] for wiki in self.langs))) - except Exception as e: # pragma: no cover + self.show(fill(' '.join(sorted(wiki['prefix'] + for wiki in self.langs)))) + except Exception as e: self.langs = [] - print(e, '; continuing...') + self.show(e, '; continuing...') if len([lang for lang in self.langs if lang['url'] == w.iwpath]) == 0: if w.private_wiki: @@ -190,7 +197,7 @@ def getlangs(self, w) -> None: code_len = len(self.langs) if code_len > 1: if self.dointerwiki is None: - while True: # pragma: no cover + while True: makeiw = input( '\n' f'There are {code_len} sites available.' @@ -199,7 +206,7 @@ def getlangs(self, w) -> None: '([y]es, [s]trict, [N]o, [e]dit), [h]elp) ').lower() if makeiw in ('y', 's', 'n', 'e', ''): break - print( + self.show( '\n' '[y]es: create interwiki links for all sites\n' '[s]trict: yes, but for sites with same domain only\n' @@ -218,9 +225,9 @@ def getlangs(self, w) -> None: self.langs = [wiki for wiki in self.langs if domain in wiki['url']] - elif makeiw == 'e': # pragma: no cover + elif makeiw == 'e': for wiki in self.langs: - print(wiki['prefix'], wiki['url']) + self.show(wiki['prefix'], wiki['url']) do_langs = re.split(' *,| +', input('Which sites do you want: ')) self.langs = [wiki for wiki in self.langs @@ -235,20 +242,20 @@ def getlangs(self, w) -> None: def getapis(self) -> None: """Load other site pages.""" - print(f'Loading {len(self.langs)} wikis... ') + self.show(f'Loading {len(self.langs)} wikis... ') remove = [] for lang in self.langs: key = lang['prefix'] - print(f' * {key}... ', end='') + self.show(f' * {key}... ', end='') if key not in self.wikis: try: self.wikis[key] = self.Wiki(lang['url']) - print('downloaded') - except Exception as e: # pragma: no cover - print(e) + self.show('downloaded') + except Exception as e: + self.show(e) remove.append(lang) else: - print('in cache') + self.show('in cache') for lang in remove: self.langs.remove(lang) @@ -256,11 +263,11 @@ def getapis(self) -> None: def writefile(self, verify) -> None: """Write the family file.""" fp = Path(self.base_dir, 'families', f'{self.name}_family.py') - print(f'Writing {fp}... ') + self.show(f'Writing {fp}... ') if fp.exists() and input( f'{fp} already exists. Overwrite? (y/n) ').lower() == 'n': - print('Terminating.') + self.show('Terminating.') sys.exit(1) code_hostname_pairs = '\n '.join( diff --git a/pywikibot/scripts/generate_user_files.py b/pywikibot/scripts/generate_user_files.py index f4d1612637..13957680f8 100755 --- a/pywikibot/scripts/generate_user_files.py +++ b/pywikibot/scripts/generate_user_files.py @@ -311,7 +311,7 @@ def input_sections(variant: str, select = pywikibot.input_choice( f'Do you want to select {variant} setting sections?', answers, default=default, force=force, automatic_quit=False) - if select == 'h': # pragma: no cover + if select == 'h': answers.pop(-1) pywikibot.info( f'The following {variant} setting sections are provided:') @@ -324,7 +324,7 @@ def input_sections(variant: str, choice = {'a': 'all', 'n': 'none', 'y': 'h'}[select] # mapping for item in filter(skip, sections): answers = [('Yes', 'y'), ('No', 'n'), ('Help', 'h')] - while choice == 'h': # pragma: no cover + while choice == 'h': choice = pywikibot.input_choice( f'Do you want to add {item.head} section?', answers, default='n', force=force, automatic_quit=False) @@ -439,7 +439,8 @@ def create_user_config( save_botpasswords(botpasswords, f_pass) -def save_botpasswords(botpasswords: str, path: Path) -> None: +def save_botpasswords(botpasswords: str, + path: Path) -> None: """Write botpasswords to file. :param botpasswords: botpasswords for password file diff --git a/pywikibot/scripts/shell.py b/pywikibot/scripts/shell.py index 2751e3af31..487bcab666 100755 --- a/pywikibot/scripts/shell.py +++ b/pywikibot/scripts/shell.py @@ -16,7 +16,7 @@ .. versionchanged:: 7.0 moved to pywikibot.scripts """ -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # diff --git a/pywikibot/scripts/version.py b/pywikibot/scripts/version.py index 454f7f0261..661432bb6f 100755 --- a/pywikibot/scripts/version.py +++ b/pywikibot/scripts/version.py @@ -7,9 +7,24 @@ registered family .. versionchanged:: 7.0 - version script was moved to the framework scripts folder + version script was moved to the framework scripts folder. .. versionadded:: 9.1.2 - the *-nouser* option. + the *-nouser* option was added. +.. versionchanged:: 10.6 + The User-Agent string is now printed for the default site. To print + it for another site, call the ``pwb`` wrapper with the global option, + e.g.: + + pwb -site:wikipedia:test version + + .. note:: + The shown UA reflects the default config settings. It might differ + if a user-agent is passed via the *headers* parameter to + :func:`comms.http.request`, :func:`comms.http.fetch` or to + :class:`comms.eventstreams.EventStreams`. It can also differ if + :func:`comms.http.fetch` is used with *use_fake_user_agent* set to + ``True`` or to a custom UA string, or if + *fake_user_agent_exceptions* is defined in the :mod:`config` file. """ # # (C) Pywikibot team, 2007-2025 @@ -23,6 +38,7 @@ from pathlib import Path import pywikibot +from pywikibot.comms.http import user_agent from pywikibot.version import getversion @@ -93,6 +109,7 @@ def main(*args: str) -> None: pywikibot.info(' Please reinstall requests!') pywikibot.info('Python: ' + sys.version) + pywikibot.info('User-Agent: ' + user_agent(pywikibot.Site())) # check environment settings settings = {key for key in os.environ if key.startswith('PYWIKIBOT')} diff --git a/pywikibot/scripts/wrapper.py b/pywikibot/scripts/wrapper.py index d4b0081f7c..eff65f66a0 100755 --- a/pywikibot/scripts/wrapper.py +++ b/pywikibot/scripts/wrapper.py @@ -77,13 +77,13 @@ def check_pwb_versions(package: str) -> None: scripts_version = Version(getattr(package, '__version__', pwb.__version__)) wikibot_version = Version(pwb.__version__) - if scripts_version.release > wikibot_version.release: # pragma: no cover + if scripts_version.release > wikibot_version.release: print(f'WARNING: Pywikibot version {wikibot_version} is behind ' f'scripts package version {scripts_version}.\n' 'Your Pywikibot may need an update or be misconfigured.\n') # calculate previous minor release - if wikibot_version.minor > 0: # pragma: no cover + if wikibot_version.minor > 0: prev_wikibot = Version( f'{wikibot_version.major}.{wikibot_version.minor - 1}.' f'{wikibot_version.micro}' @@ -94,7 +94,7 @@ def check_pwb_versions(package: str) -> None: f'behind legacy Pywikibot version {prev_wikibot} and ' f'current version {wikibot_version}\n' 'Your scripts may need an update or be misconfigured.\n') - elif scripts_version.release < wikibot_version.release: # pragma: no cover + elif scripts_version.release < wikibot_version.release: print(f'WARNING: Scripts package version {scripts_version} is behind ' f'current version {wikibot_version}\n' 'Your scripts may need an update or be misconfigured.\n') @@ -141,7 +141,7 @@ def run_python_file(filename: str, args: list[str], package=None) -> None: # set environment values old_env = os.environ.copy() - for key, value in environ: # pragma: no cover + for key, value in environ: os.environ[key] = value sys.argv = [filename, *args] @@ -165,7 +165,7 @@ def run_python_file(filename: str, args: list[str], package=None) -> None: # end of snippet from coverage # Restore environment values - for key, value in environ: # pragma: no cover + for key, value in environ: if key in old_env: os.environ[key] = old_env[key] else: @@ -210,7 +210,7 @@ def handle_args( def _print_requirements(requirements, script, - variant) -> None: # pragma: no cover + variant) -> None: """Print pip command to install requirements.""" if not requirements: return @@ -341,7 +341,7 @@ def find_alternates(filename, script_paths): scripts = {} for folder in script_paths: - if not folder.exists(): # pragma: no cover + if not folder.exists(): warning( f'{folder} does not exists; remove it from user_script_paths') continue @@ -376,7 +376,7 @@ def find_alternates(filename, script_paths): alternatives, default='1') except QuitKeyboardInterrupt: return None - print() # pragma: no cover + print() return str(scripts[script]) @@ -406,7 +406,7 @@ def test_paths(paths, root: Path): # search through user scripts paths user_script_paths = [''] - if config.user_script_paths: # pragma: no cover + if config.user_script_paths: if isinstance(config.user_script_paths, list): user_script_paths += config.user_script_paths else: @@ -415,10 +415,11 @@ def test_paths(paths, root: Path): ' Ignoring this setting.', stacklevel=2) found = test_paths(user_script_paths, Path(config.base_dir)) - if found: # pragma: no cover + if found: return found - if site_package: # search for entry points + if site_package: # pragma: no cover + # search for entry points import importlib from importlib.metadata import entry_points @@ -477,7 +478,7 @@ def execute() -> bool: if global_args: # don't use sys.argv unknown_args = pwb.handle_args(global_args) - if unknown_args: # pragma: no cover + if unknown_args: print('ERROR: unknown pwb.py argument{}: {}\n' .format('' if len(unknown_args) == 1 else 's', ', '.join(unknown_args))) @@ -535,7 +536,7 @@ def main() -> None: .. versionchanged:: 7.0 previous implementation was renamed to :func:`execute` """ - if not check_modules(): # pragma: no cover + if not check_modules(): sys.exit() if not execute(): diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py index 5098e40f78..a79681454e 100644 --- a/pywikibot/site/_apisite.py +++ b/pywikibot/site/_apisite.py @@ -9,12 +9,11 @@ import datetime import re import time -import typing import webbrowser from collections import OrderedDict, defaultdict from contextlib import suppress from textwrap import fill -from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar +from typing import TYPE_CHECKING, Any, Literal, NamedTuple, TypeVar from warnings import warn import pywikibot @@ -75,6 +74,7 @@ cached, deprecate_arg, deprecated, + deprecated_signature, issue_deprecation_warning, merge_unique_dicts, normalize_username, @@ -828,13 +828,23 @@ def get_searched_namespaces(self, force: bool = False) -> set[Namespace]: @property def articlepath(self) -> str: - """Get the nice article path with ``{}``placeholder. + """Return article path with a ``{}`` placeholder. + + Replaces the ``$1`` placeholder from MediaWiki with a + Python-compatible ``{}``. .. versionadded:: 7.0 + + .. versionchanged:: 10.3 + raises ValueError instead of AttributeError if "$1" + placeholder is missing. + + :raises ValueError: missing "$1" placeholder """ - path = self.siteinfo['general']['articlepath'] - # Assert $1 placeholder is present - assert '$1' in path, 'articlepath must contain "$1" placeholder' + path = self.siteinfo['articlepath'] + if '$1' not in path: + raise ValueError( + f'Invalid article path "{path}": missing "$1" placeholder') return path.replace('$1', '{}') @cached @@ -854,7 +864,7 @@ def linktrail(self) -> str: 'ca': "(?:[a-zàèéíòóúç·ïü]|'(?!'))*", 'kaa': "(?:[a-zıʼ’“»]|'(?!'))*", } - linktrail = self.siteinfo['general']['linktrail'] + linktrail = self.siteinfo['linktrail'] if linktrail == '/^()(.*)$/sD': # empty linktrail return '' @@ -1057,7 +1067,7 @@ def months_names(self) -> list[tuple[str, str]]: return self._months_names - def list_to_text(self, args: typing.Iterable[str]) -> str: + def list_to_text(self, args: Iterable[str]) -> str: """Convert a list of strings into human-readable text. The MediaWiki messages 'and' and 'word-separator' are used as @@ -1173,10 +1183,10 @@ def _build_namespaces(self) -> dict[int, Namespace]: for nsdata in self.siteinfo.get('namespaces', cache=False).values(): ns = nsdata.pop('id') if ns == 0: - canonical_name = nsdata.pop('*') + custom_name = canonical_name = nsdata.pop('name') custom_name = canonical_name else: - custom_name = nsdata.pop('*') + custom_name = nsdata.pop('name') canonical_name = nsdata.pop('canonical') default_case = Namespace.default_case(ns) @@ -1189,16 +1199,16 @@ def _build_namespaces(self) -> dict[int, Namespace]: namespace = Namespace(ns, canonical_name, custom_name, **nsdata) _namespaces[ns] = namespace - for item in self.siteinfo.get('namespacealiases'): + for item in self.siteinfo['namespacealiases']: ns = int(item['id']) try: namespace = _namespaces[ns] except KeyError: pywikibot.warning('Broken namespace alias "{}" (id: {}) on {}' - .format(item['*'], ns, self)) + .format(item['alias'], ns, self)) else: - if item['*'] not in namespace: - namespace.aliases.append(item['*']) + if item['alias'] not in namespace: + namespace.aliases.append(item['alias']) return _namespaces @@ -1556,7 +1566,7 @@ def page_can_be_edited( :raises ValueError: invalid action parameter """ - if action not in self.siteinfo.get('restrictions')['types']: + if action not in self.restrictions['types']: raise ValueError( f'{type(self).__name__}.page_can_be_edited(): ' f'Invalid value "{action}" for "action" parameter' @@ -2495,68 +2505,122 @@ def movepage( # catalog of rollback errors for use in error messages _rb_errors = { - 'noapiwrite': 'API editing not enabled on {site} wiki', - 'writeapidenied': 'User {user} not allowed to edit through the API', - 'alreadyrolled': - 'Page [[{title}]] already rolled back; action aborted.', - } # other errors shouldn't arise because we check for those errors + 'alreadyrolled': 'The last edit of page {title!r} by user {user!r} ' + 'was already rolled back.', + 'onlyauthor': 'The page {title!r} has only {user!r} as author', + } # standard error messages raises API error @need_right('rollback') def rollbackpage( self, - page: BasePage, + page: BasePage | None = None, + *, + pageid: int | None = None, **kwargs: Any - ) -> None: - """Roll back page to version before last user's edits. + ) -> dict[str, int | str]: + """Roll back a page to the version before the last edit by a user. - .. seealso:: :api:`Rollback` + This method wraps the MediaWiki :api:`Rollback`. The rollback + will revert the last edit(s) made by the specified user on the + given page. - The keyword arguments are those supported by the rollback API. + .. versionchanged:: 10.5 + Added *pageid* as alternative to *page* (one must be given). + *markbot* defaults to True if the rollbacker is a bot and not + explicitly given. The method now returns a dictionary with + rollback information. - As a precaution against errors, this method will fail unless - the page history contains at least two revisions, and at least - one that is not by the same user who made the last edit. + .. seealso:: + :meth:`page.BasePage.rollback` + + :param page: the Page to be rolled back. Cannot be used together + with *pageid*. + :param pageid: Page ID of the page to be rolled back. Cannot be + used together with *page*. + :keyword tags: Tags to apply to the rollback. + :kwtype tags: str | Sequence[str] | None + :keyword str user: The last user to be rolled back; Must be + given with *pageid*. Default is + :attr:`BasePage.latest_revision.user + ` if *page* is given. + :keyword str | None summary: Custom edit summary for the rollback + :keyword bool | None markbot: Mark the reverted edits and the + revert as bot edits. If not given, it is set to True if the + rollback user belongs to the 'bot' group, otherwise False. + :keyword watchlist: Unconditionally add or remove the page from + the current user's watchlist; 'preferences' is ignored for + bot users. + :kwtype watchlist: Literal['watch', 'unwatch', 'preferences', + 'nochange'] | None + :keyword watchlistexpiry: Watchlist expiry timestamp. Omit this + parameter entirely to leave the current expiry unchanged. + :kwtype watchlistexpiry: pywikibot.Timestamp | str | Literal[ + 'infinite', 'indefinite', 'infinity', 'never'] | None + :returns: Dictionary containing rollback result like + + .. code:: python + + { + 'title': , + 'pageid': , + 'summary': , + 'revid': , + 'old_revid': , + 'last_revid': , + } + + :raises APIError: An error was returned by the rollback API, or + another standard API error occurred. + :raises Error: The page was already rolled back, or the given + *user* is the only author. + :raises NoPageError: The given *page* or *pageid* does not exist. + :raises TypeError: *pageid* is of invalid type. + :raises ValueError: Both *page* and *pageid* were given, or none + of them, or *pageid* has an invalid value. + """ + if page is not None and pageid is not None: + raise ValueError( + "The parameters 'page' and 'pageid' cannot be used together.") - :param page: the Page to be rolled back (must exist) - :keyword user: the last user to be rollbacked; - default is page.latest_revision.user - """ - if len(page._revisions) < 2: - raise Error( - f'Rollback of {page} aborted; load revision history first.') + if page is None and pageid is None: + raise ValueError( + "One of parameters 'page' or 'pageid' is required.") + + if page is None and pageid is not None: + page = next(self.load_pages_from_pageids(str(pageid)), None) + + if page is None: + raise NoPageError(pageid) user = kwargs.pop('user', page.latest_revision.user) - for rev in sorted(page._revisions.values(), reverse=True, - key=lambda r: r.timestamp): - # start with most recent revision first - if rev.user != user: - break - else: - raise Error(f'Rollback of {page} aborted; only one user in ' - f'revision history.') - - parameters = merge_unique_dicts(kwargs, - action='rollback', - title=page, - token=self.tokens['rollback'], - user=user) + params = merge_unique_dicts( + kwargs, + action='rollback', + title=page, + token=self.tokens['rollback'], + user=user, + ) + + rb_user = self.user() + if rb_user is not None and 'markbot' not in kwargs: + params['markbot'] = self.has_group('bot') + self.lock_page(page) - req = self.simple_request(**parameters) + req = self.simple_request(**params) try: - req.submit() + result = req.submit() except APIError as err: errdata = { - 'site': self, 'title': page.title(with_section=False), - 'user': self.user(), + 'user': user, } if err.code in self._rb_errors: raise Error( self._rb_errors[err.code].format_map(errdata) ) from None - pywikibot.debug( - f"rollback: Unexpected error code '{err.code}' received.") raise + else: + return result['rollback'] finally: self.unlock_page(page) @@ -2724,17 +2788,7 @@ def undelete( finally: self.unlock_page(page) - _protect_errors = { - 'noapiwrite': 'API editing not enabled on {site} wiki', - 'writeapidenied': 'User {user} not allowed to edit through the API', - 'permissiondenied': - 'User {user} not authorized to protect pages on {site} wiki.', - 'cantedit': - "User {user} can't protect this page because user {user} " - "can't edit it.", - 'protect-invalidlevel': 'Invalid protection level' - } - + @deprecated("the 'restrictions' property", since='10.5.0') def protection_types(self) -> set[str]: """Return the protection types available on this site. @@ -2744,12 +2798,14 @@ def protection_types(self) -> set[str]: >>> sorted(site.protection_types()) ['create', 'edit', 'move', 'upload'] - .. seealso:: :py:obj:`Siteinfo._get_default()` + .. deprecated:: 10.5 + Use :attr:`restrictions[types]` instead. :return: protection types available """ - return set(self.siteinfo.get('restrictions')['types']) + return self.restrictions['types'] + @deprecated("the 'restrictions' property", since='10.5.0') def protection_levels(self) -> set[str]: """Return the protection levels available on this site. @@ -2759,11 +2815,44 @@ def protection_levels(self) -> set[str]: >>> sorted(site.protection_levels()) ['', 'autoconfirmed', ... 'sysop', 'templateeditor'] - .. seealso:: :py:obj:`Siteinfo._get_default()` + .. deprecated:: 10.5 + Use :attr:`restrictions[levels]` instead. - :return: protection types available + :return: protection levels available """ - return set(self.siteinfo.get('restrictions')['levels']) + return self.restrictions['levels'] + + @property + def restrictions(self) -> dict[str, set[str]]: + """Return the page restrictions available on this site. + + **Example:** + + >>> site = pywikibot.Site('wikipedia:test') + >>> r = site.restrictions + >>> sorted(r['types']) + ['create', 'edit', 'move', 'upload'] + >>> sorted(r['levels']) + ['', 'autoconfirmed', ... 'sysop', 'templateeditor'] + + .. versionadded:: 10.5 + .. seealso:: :meth:`page_restrictions` + + :return: dict with keys 'types', 'levels', 'cascadinglevels' and + 'semiprotectedlevels', all as sets of strings + """ + return {k: set(v) for k, v in self.siteinfo['restrictions'].items()} + + _protect_errors = { + 'noapiwrite': 'API editing not enabled on {site} wiki', + 'writeapidenied': 'User {user} not allowed to edit through the API', + 'permissiondenied': + 'User {user} not authorized to protect pages on {site} wiki.', + 'cantedit': + "User {user} can't protect this page because user {user} " + "can't edit it.", + 'protect-invalidlevel': 'Invalid protection level' + } @need_right('protect') def protect( @@ -2778,15 +2867,14 @@ def protect( .. seealso:: - :meth:`page.BasePage.protect` - - :meth:`protection_types` - - :meth:`protection_levels` + - :attr:`restrictions` + - :meth:`page_restrictions` - :api:`Protect` :param protections: A dict mapping type of protection to - protection level of that type. Refer :meth:`protection_types` - for valid restriction types and :meth:`protection_levels` - for valid restriction levels. If None is given, however, - that protection will be skipped. + protection level of that type. Refer :meth:`restrictions` + for valid restriction types restriction levels. If None is + given, however, that protection will be skipped. :param reason: Reason for the action :param expiry: When the block should expire. This expiry will be applied to all protections. If ``None``, ``'infinite'``, @@ -2907,31 +2995,76 @@ def unblockuser( return req.submit() @need_right('editmywatchlist') + @deprecated_signature(since='10.4.0') def watch( self, pages: BasePage | str | list[BasePage | str], - unwatch: bool = False + *, + unwatch: bool = False, + expiry: pywikibot.Timestamp | str | Literal[ + 'infinite', 'indefinite', 'infinity', 'never'] | None = None ) -> bool: """Add or remove pages from watchlist. - .. seealso:: :api:`Watch` + .. versionchanged:: 10.4.0 + Added the *expiry* parameter to specify watch expiry time. + Passing *unwatch* as a positional parameter is deprecated; + it must be passed as keyword argument. + + .. note:: When watching a page without *expiry*, the function + returns False if any page does not exist, because it was + not added to the watchlist. + + .. seealso:: + - :api:`Watch` + - :meth:`BasePage.watch` + - :meth:`Site.watched_pages() + ` :param pages: A single page or a sequence of pages. :param unwatch: If True, remove pages from watchlist; if False add them (default). - :return: True if API returned expected response; False otherwise + :param expiry: Expiry timestamp to apply to the watch. Passing + None or omitting this parameter leaves any existing expiry + unchanged. Expiry values may be relative (e.g. ``5 months`` + or ``2 weeks``) or absolute (e.g. ``2014-09-18T12:34:56Z``). + For no expiry, use ``infinite``, ``indefinite``, ``infinity`` + or `never`. For absolute timestamps the :class:`Timestamp` + class can be used. + :return: True if API returns expected response; False otherwise. + If *unwatch* is False, *expiry* is None or specifies no + defined end date, return False if the page does not exist. + :raises APIError: badexpiry: Invalid value for expiry parameter :raises KeyError: 'watch' isn't in API response + :raises TypeError: unexpected keyword argument """ parameters = { 'action': 'watch', 'titles': pages, 'token': self.tokens['watch'], 'unwatch': unwatch, + 'expiry': expiry or None, } + + if not unwatch: + parameters['expiry'] = expiry or None + elif expiry: + msg = (f'\nexpiry parameter ({expiry!r}) is ignored when ' + f"unwatch=True.\nPlease omit 'expiry' when unwatching.") + warn(msg, category=UserWarning, stacklevel=2) + req = self.simple_request(**parameters) results = req.submit() - unwatch_s = 'unwatched' if unwatch else 'watched' - return all(unwatch_s in r for r in results['watch']) + watchtype = 'unwatched' if unwatch else 'watched' + + for r in results['watch']: + if watchtype not in r: + return False + + if 'missing' in r and 'watched' in r and 'expiry' not in r: + return False + + return True def purgepages( self, @@ -2989,7 +3122,7 @@ def is_uploaddisabled(self) -> bool: >>> site.is_uploaddisabled() True """ - return not self.siteinfo.get('general')['uploadsenabled'] + return not self.siteinfo['uploadsenabled'] def stash_info( self, diff --git a/pywikibot/site/_datasite.py b/pywikibot/site/_datasite.py index d67f2ba560..9b3492eb1e 100644 --- a/pywikibot/site/_datasite.py +++ b/pywikibot/site/_datasite.py @@ -138,35 +138,32 @@ def get_entity_for_entity_id(self, entity_id): raise NoWikibaseEntityError(entity) @property - def sparql_endpoint(self): + def sparql_endpoint(self) -> str | None: """Return the sparql endpoint url, if any has been set. :return: sparql endpoint url - :rtype: str|None """ - return self.siteinfo['general'].get('wikibase-sparql') + return self.siteinfo.get('wikibase-sparql') @property - def concept_base_uri(self): + def concept_base_uri(self) -> str: """Return the base uri for concepts/entities. :return: concept base uri - :rtype: str """ - return self.siteinfo['general']['wikibase-conceptbaseuri'] + return self.siteinfo['wikibase-conceptbaseuri'] - def geo_shape_repository(self): + def geo_shape_repository(self) -> DataSite | None: """Return Site object for the geo-shapes repository e.g. commons.""" - url = self.siteinfo['general'].get('wikibase-geoshapestoragebaseurl') + url = self.siteinfo.get('wikibase-geoshapestoragebaseurl') if url: return pywikibot.Site(url=url, user=self.username()) return None - def tabular_data_repository(self): + def tabular_data_repository(self) -> DataSite | None: """Return Site object for the tabular-data repository e.g. commons.""" - url = self.siteinfo['general'].get( - 'wikibase-tabulardatastoragebaseurl') + url = self.siteinfo.get('wikibase-tabulardatastoragebaseurl') if url: return pywikibot.Site(url=url, user=self.username()) @@ -212,7 +209,7 @@ def preload_entities( if not hasattr(self, '_entity_namespaces'): self._cache_entity_namespaces() for batch in batched(pagelist, groupsize): - req = {'ids': [], 'titles': [], 'sites': []} + req: dict[str, list[str]] = {'ids': [], 'titles': [], 'sites': []} for p in batch: if isinstance(p, pywikibot.page.WikibaseEntity): ident = p._defined_by() diff --git a/pywikibot/site/_extensions.py b/pywikibot/site/_extensions.py index 66ea1ff20a..8b15cf846d 100644 --- a/pywikibot/site/_extensions.py +++ b/pywikibot/site/_extensions.py @@ -6,7 +6,10 @@ # from __future__ import annotations +from typing import TYPE_CHECKING, Protocol + import pywikibot +from pywikibot.backports import Generator, Iterable from pywikibot.data import api from pywikibot.echo import Notification from pywikibot.exceptions import ( @@ -20,6 +23,39 @@ from pywikibot.tools import merge_unique_dicts +if TYPE_CHECKING: + from pywikibot.site import NamespacesDict + + +class BaseSiteProtocol(Protocol): + _proofread_levels: dict[int, str] + tokens: dict[str, str] + + def _generator(self, *args, **kwargs) -> api.Request: + ... + + def _request(self, **kwargs) -> api.Request: + ... + + def _update_page(self, *args, **kwargs) -> None: + ... + + def encoding(self) -> str: + ... + + @property + def namespaces(self, **kwargs) -> NamespacesDict: + ... + + def simple_request(self, **kwargs) -> api.Request: + ... + + def querypage( + self, *args, **kwargs + ) -> Generator[tuple[pywikibot.Page, int], None, None]: + ... + + class EchoMixin: """APISite mixin for Echo extension.""" @@ -50,15 +86,13 @@ def notifications(self, **kwargs): for notification in notifications) @need_extension('Echo') - def notifications_mark_read(self, **kwargs) -> bool: + def notifications_mark_read(self: BaseSiteProtocol, **kwargs) -> bool: """Mark selected notifications as read. .. seealso:: :api:`echomarkread` :return: whether the action was successful """ - # TODO: ensure that the 'echomarkread' action - # is supported by the site kwargs = merge_unique_dicts(kwargs, action='echomarkread', token=self.tokens['csrf']) req = self.simple_request(**kwargs) @@ -74,7 +108,7 @@ class ProofreadPageMixin: """APISite mixin for ProofreadPage extension.""" @need_extension('ProofreadPage') - def _cache_proofreadinfo(self, expiry=False) -> None: + def _cache_proofreadinfo(self: BaseSiteProtocol, expiry=False) -> None: """Retrieve proofreadinfo from site and cache response. Applicable only to sites with ProofreadPage extension installed. @@ -142,7 +176,8 @@ def proofread_levels(self): return self._proofread_levels @need_extension('ProofreadPage') - def loadpageurls(self, page: pywikibot.page.BasePage) -> None: + def loadpageurls(self: BaseSiteProtocol, + page: pywikibot.page.BasePage) -> None: """Load URLs from api and store in page attributes. Load URLs to images for a given page in the "Page:" namespace. @@ -169,7 +204,7 @@ class GeoDataMixin: """APISite mixin for GeoData extension.""" @need_extension('GeoData') - def loadcoordinfo(self, page) -> None: + def loadcoordinfo(self: BaseSiteProtocol, page) -> None: """Load [[mw:Extension:GeoData]] info.""" title = page.title(with_section=False) query = self._generator(api.PropertyGenerator, @@ -187,7 +222,7 @@ class PageImagesMixin: """APISite mixin for PageImages extension.""" @need_extension('PageImages') - def loadpageimage(self, page) -> None: + def loadpageimage(self: BaseSiteProtocol, page) -> None: """Load [[mw:Extension:PageImages]] info. :param page: The page for which to obtain the image @@ -256,14 +291,41 @@ class WikibaseClientMixin: """APISite mixin for WikibaseClient extension.""" @need_extension('WikibaseClient') - def unconnected_pages(self, total=None): + def unconnected_pages( + self: BaseSiteProtocol, + total: int | None = None, + *, + strict: bool = False + ) -> Generator[pywikibot.Page, None, None]: """Yield Page objects from Special:UnconnectedPages. .. warning:: The retrieved pages may be connected in meantime. + To avoid this, use *strict* parameter to check. - :param total: number of pages to return + .. versionchanged:: + The *strict* parameter was added. + + :param total: Maximum number of pages to return, or ``None`` for + all. + :param strict: If ``True``, verify that each page still has no + data item before yielding it. """ - return self.querypage('UnconnectedPages', total) + if total is not None and total <= 0: + return + + if not strict: + return self.querypage('UnconnectedPages', total) + + count = 0 + for page in self.querypage('UnconnectedPages'): + if total is not None and count >= total: + break + + try: + page.data_item() + except NoPageError: + yield page + count += 1 class LinterMixin: @@ -271,28 +333,31 @@ class LinterMixin: """APISite mixin for Linter extension.""" @need_extension('Linter') - def linter_pages(self, lint_categories=None, total=None, - namespaces=None, pageids=None, lint_from=None): + def linter_pages( + self: BaseSiteProtocol, + lint_categories=None, + total: int | None = None, + namespaces=None, + pageids: str | int | None = None, + lint_from: str | int | None = None + ) -> Iterable[pywikibot.Page]: """Return a generator to pages containing linter errors. :param lint_categories: categories of lint errors :type lint_categories: an iterable that returns values (str), or a pipe-separated string of values. :param total: if not None, yielding this many items in total - :type total: int :param namespaces: only iterate pages in these namespaces :type namespaces: iterable of str or Namespace key, or a single instance of those types. May be a '|' separated list of namespace identifiers. :param pageids: only include lint errors from the specified pageids - :type pageids: an iterable that returns pageids (str or int), or - a comma- or pipe-separated string of pageids (e.g. - '945097,1483753, 956608' or '945097|483753|956608') + :type pageids: an iterable that returns pageids, or a comma- or + pipe-separated string of pageids (e.g. '945097,1483753, + 956608' or '945097|483753|956608') :param lint_from: Lint ID to start querying from - :type lint_from: str representing digit or integer :return: pages with Linter errors. - :rtype: typing.Iterable[pywikibot.Page] """ query = self._generator(api.ListGenerator, type_arg='linterrors', total=total, # Will set lntlimit @@ -374,7 +439,8 @@ class TextExtractsMixin: """ @need_extension('TextExtracts') - def extract(self, page: pywikibot.Page, *, + def extract(self: BaseSiteProtocol, + page: pywikibot.Page, *, chars: int | None = None, sentences: int | None = None, intro: bool = True, diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py index bf76ec7950..dc4cf6e363 100644 --- a/pywikibot/site/_generators.py +++ b/pywikibot/site/_generators.py @@ -26,7 +26,12 @@ ) from pywikibot.site._decorators import need_right from pywikibot.site._namespace import NamespaceArgType -from pywikibot.tools import deprecate_arg, is_ip_address +from pywikibot.tools import ( + deprecate_arg, + deprecated, + deprecated_signature, + is_ip_address, +) from pywikibot.tools.itertools import filter_unique @@ -89,7 +94,7 @@ def load_pages_from_pageids( # Store the order of the input data. priority_dict = dict(zip(batch, range(len(batch)))) - prio_queue = [] + prio_queue: list[tuple[int, pywikibot.Page]] = [] next_prio = 0 params = {'pageids': batch} rvgen = api.PropertyGenerator('info', site=self, parameters=params) @@ -172,7 +177,7 @@ def preloadpages( # Do not use p.pageid property as it will force page loading. pageids = [str(p._pageid) for p in batch if hasattr(p, '_pageid') and p._pageid > 0] - cache = {} + cache: dict[str, tuple[int, pywikibot.Page]] = {} # In case of duplicates, return the first entry. for priority, page in enumerate(batch): try: @@ -181,7 +186,7 @@ def preloadpages( except InvalidTitleError: pywikibot.exception() - prio_queue = [] + prio_queue: list[tuple[int, pywikibot.Page]] = [] next_prio = 0 rvgen = api.PropertyGenerator(props, site=self) rvgen.set_maximum_items(-1) # suppress use of "rvlimit" parameter @@ -925,9 +930,10 @@ def page_extlinks( for linkdata in pageitem['extlinks']: yield linkdata['*'] + @deprecated_signature(since='10.4.0') def allpages( self, - start: str = '!', + start: str = '!', *, prefix: str = '', namespace: SingleNamespaceType = 0, filterredir: bool | None = None, @@ -969,6 +975,11 @@ def allpages( type such as bool, or an iterable with more than one namespace or *filterredir* parameter has an invalid type. """ + def _maxsize_filter(item): + """Return True if page text length is within maxsize limit.""" + return len(item.text.encode(self.encoding())) <= maxsize + + misermode = self.siteinfo.get('misermode') and maxsize is not None if filterredir not in (True, False, None): raise TypeError('filterredir parameter must be True, False or ' f'None, not {type(filterredir)}') @@ -976,7 +987,7 @@ def allpages( apgen = self._generator(api.PageGenerator, type_arg='allpages', namespaces=namespace, gapfrom=start, total=total, - g_content=content) + g_content=content or misermode) if prefix: apgen.request['gapprefix'] = prefix if filterredir is not None: @@ -988,7 +999,7 @@ def allpages( 'withoutlanglinks') if isinstance(minsize, int): apgen.request['gapminsize'] = str(minsize) - if isinstance(maxsize, int): + if not misermode and isinstance(maxsize, int): apgen.request['gapmaxsize'] = str(maxsize) if isinstance(protect_type, str): apgen.request['gapprtype'] = protect_type @@ -996,8 +1007,12 @@ def allpages( apgen.request['gapprlevel'] = protect_level if reverse: apgen.request['gapdir'] = 'descending' + if misermode: + apgen.filter_func = _maxsize_filter + return apgen + @deprecated(since='10.7.0') def alllinks( self, start: str = '', @@ -1035,6 +1050,10 @@ def alllinks( The minimum read timeout value should be 60 seconds in that case. + .. deprecated:: 10.7 + This method is dysfunctional and should no longer be used. It + will probably be removed in Pywikibot 11. + .. seealso:: - :api:`Alllinks` - :meth:`pagebacklinks` @@ -1053,6 +1072,7 @@ def alllinks( inappropriate type such as bool, or an iterable with more than one namespace """ + # no cover: start if unique and fromids: raise Error('alllinks: unique and fromids cannot both be True.') algen = self._generator(api.ListGenerator, type_arg='alllinks', @@ -1084,6 +1104,7 @@ def alllinks( if fromids: p._fromid = link['fromid'] # type: ignore[attr-defined] yield p + # no cover: stop def allcategories( self, @@ -2337,7 +2358,7 @@ def redirectpages( """ return self.querypage('Listredirects', total) - @deprecate_arg('type', 'protect_type') + @deprecate_arg('type', 'protect_type') # since 9.0 def protectedpages( self, namespace: NamespaceArgType = 0, @@ -2359,13 +2380,13 @@ def protectedpages( :param namespace: The searched namespace. :param protect_type: The protection type to search for (default 'edit'). - :param level: The protection level (like 'autoconfirmed'). If False it - shows all protection levels. + :param level: The protection level (like 'autoconfirmed'). If + False it shows all protection levels. :return: The pages which are protected. """ namespaces = self.namespaces.resolve(namespace) # always assert, so we are be sure that protect_type could be 'create' - assert 'create' in self.protection_types(), \ + assert 'create' in self.restrictions['types'], \ "'create' should be a valid protection type." if protect_type == 'create': return self._generator( diff --git a/pywikibot/site/_namespace.py b/pywikibot/site/_namespace.py index 00de8fde7d..e28fd5538b 100644 --- a/pywikibot/site/_namespace.py +++ b/pywikibot/site/_namespace.py @@ -1,6 +1,6 @@ """Objects representing Namespaces of MediaWiki site.""" # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -91,6 +91,26 @@ class Namespace(Iterable, ComparableMixin, metaclass=MetaNamespace): metaclass from :class:`MetaNamespace` """ + # Hints of BuiltinNamespace types added with initializer + MEDIA: int + SPECIAL: int + MAIN: int + TALK: int + USER: int + USER_TALK: int + PROJECT: int + PROJECT_TALK: int + FILE: int + FILE_TALK: int + MEDIAWIKI: int + MEDIAWIKI_TALK: int + TEMPLATE: int + TEMPLATE_TALK: int + HELP: int + HELP_TALK: int + CATEGORY: int + CATEGORY_TALK: int + def __init__(self, id, canonical_name: str | None = None, custom_name: str | None = None, @@ -323,11 +343,7 @@ def normalize_name(name): class NamespacesDict(Mapping): - """An immutable dictionary containing the Namespace instances. - - It adds a deprecation message when called as the 'namespaces' - property of APISite was callable. - """ + """An immutable dictionary containing the Namespace instances.""" def __init__(self, namespaces) -> None: """Create new dict using the given namespaces.""" diff --git a/pywikibot/site/_siteinfo.py b/pywikibot/site/_siteinfo.py index 3f79f3b174..46c6b9b26c 100644 --- a/pywikibot/site/_siteinfo.py +++ b/pywikibot/site/_siteinfo.py @@ -1,6 +1,6 @@ """Objects representing site info data contents.""" # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -11,94 +11,114 @@ import re from collections.abc import Container from contextlib import suppress -from typing import Any +from typing import TYPE_CHECKING, Any, Literal, cast import pywikibot +from pywikibot.backports import Dict, List from pywikibot.exceptions import APIError from pywikibot.tools.collections import EMPTY_DEFAULT +if TYPE_CHECKING: + from pywikibot.site import APISite + + class Siteinfo(Container): - """A 'dictionary' like container for siteinfo. + """A dictionary-like container for siteinfo. This class queries the server to get the requested siteinfo - property. Optionally it can cache this directly in the instance so - that later requests don't need to query the server. + property. Results can be cached in the instance to avoid repeated + queries. + + All values of the 'general' property are directly available. + + .. versionchanged:: 10.5 + formatversion 2 is used for API calls. + + .. admonition:: Compatibility note + :class: note + + For formatversion 2, some siteinfo data structures differ from + version 1. Fallback '*' keys are added in the data structure for + 'namespaces', 'languages', 'namespacealiases' and 'skins' + properties for backwards compatibility. These fallbacks may be + removed in future versions of Pywikibot. + + The 'thumblimits', 'imagelimits' and 'magiclinks' entries of the + 'general' property are normalized to lists for easier use and to + match the format used in formatversion 1. For example: - All values of the siteinfo property 'general' are directly - available. + :code:`'thumblimits': [120, 150, 180, 200, 220, 250, 300, 400]` + + .. deprecated:: 10.5 + Accessing the fallback '*' keys in 'languages', 'namespaces', + 'namespacealiases', and 'skins' properties are deprecated and + will be removed in a future release of Pywikibot. + + .. seealso:: :api:`siteinfo` """ WARNING_REGEX = re.compile(r'Unrecognized values? for parameter ' r'["\']siprop["\']: (.+?)\.?') - # Until we get formatversion=2, we have to convert empty-string properties - # into booleans so they are easier to use. - BOOLEAN_PROPS = { - 'general': [ - 'imagewhitelistenabled', - 'langconversion', - 'titleconversion', - 'rtl', - 'readonly', - 'writeapi', - 'variantarticlepath', - 'misermode', - 'uploadsenabled', - ], - 'namespaces': [ # for each namespace - 'subpages', - 'content', - 'nonincludable', - ], - 'magicwords': [ # for each magicword - 'case-sensitive', - ], - } - - def __init__(self, site) -> None: - """Initialise it with an empty cache.""" + def __init__(self, site: APISite) -> None: + """Initialize Siteinfo for a given site with an empty cache.""" self._site = site - self._cache: dict[str, Any] = {} + self._cache: dict[str, + tuple[Any, datetime.datetime | Literal[False]]] = {} def clear(self) -> None: - """Remove all items from Siteinfo. + """Clear all cached siteinfo properties. .. versionadded:: 7.1 """ self._cache.clear() @staticmethod - def _post_process(prop, data) -> None: - """Do some default handling of data. + def _post_process(prop: str, + data: dict[str, Any] | list[dict[str, Any]]) -> None: + """Convert empty-string boolean properties to actual booleans. + + Modifies *data* in place. + + .. versionchanged:: 10.5 + Modify *data* for formatversion 1 compatibility and easier + to use lists. - Directly modifies data. + :param prop: The siteinfo property name (e.g., 'general', + 'namespaces', 'magicwords') + :param data: The raw data returned from the server + + :meta public: """ # Be careful with version tests inside this here as it might need to # query this method to actually get the version number - # Convert boolean props from empty strings to actual boolean values - if prop in Siteinfo.BOOLEAN_PROPS: - # siprop=namespaces and - # magicwords has properties per item in result - if prop in ('namespaces', 'magicwords'): - for index, value in enumerate(data): - # namespaces uses a dict, while magicwords uses a list - key = index if isinstance(data, list) else value - for p in Siteinfo.BOOLEAN_PROPS[prop]: - data[key][p] = p in data[key] - else: - for p in Siteinfo.BOOLEAN_PROPS[prop]: - data[p] = p in data + if prop == 'general': + data = cast(Dict[str, Any], data) + for key in 'thumblimits', 'imagelimits': + data[key] = list(data[key].values()) + data['magiclinks'] = [k for k, v in data['magiclinks'].items() + if v] + elif prop == 'namespaces': + data = cast(Dict[str, Any], data) + for ns_info in data.values(): + ns_info['*'] = ns_info['name'] + elif prop in ('languages', 'namespacealiases'): + data = cast(List[Dict[str, Any]], data) + for ns_info in data: + key = 'name' if 'name' in ns_info else 'alias' + ns_info['*'] = ns_info[key] + elif prop == 'skins': + data = cast(List[Dict[str, Any]], data) + for ns_info in data: + ns_info['*'] = ns_info['name'] + for key in 'default', 'unusable': + ns_info.setdefault(key, False) def _get_siteinfo(self, prop, expiry) -> dict: - """Retrieve a siteinfo property. - - All properties which the site doesn't - support contain the default value. Because pre-1.12 no data was - returned when a property doesn't exists, it queries each property - independently if a property is invalid. + """Retrieve one or more siteinfo properties from the server. .. seealso:: :api:Siteinfo @@ -110,6 +130,8 @@ def _get_siteinfo(self, prop, expiry) -> dict: the dictionary is a tuple of the value and a boolean to save if it is the default value. """ + invalid_properties: list[str] = [] + def warn_handler(mod, message) -> bool: """Return True if the warning is handled.""" matched = Siteinfo.WARNING_REGEX.fullmatch(message) @@ -119,21 +141,25 @@ def warn_handler(mod, message) -> bool: return True return False - props = [prop] if isinstance(prop, str) else prop + # Convert to list for consistent iteration + props = [prop] if isinstance(prop, str) else list(prop) if not props: raise ValueError('At least one property name must be provided.') - invalid_properties: list[str] = [] request = self._site._request( expiry=pywikibot.config.API_config_expiry if expiry is False else expiry, parameters={ - 'action': 'query', 'meta': 'siteinfo', 'siprop': props, + 'action': 'query', + 'meta': 'siteinfo', + 'siprop': props, + 'formatversion': 2, } ) # warnings are handled later request._warning_handler = warn_handler + try: data = request.submit() except APIError as e: @@ -151,13 +177,14 @@ def warn_handler(mod, message) -> bool: return results raise - result = {} + result: dict[str, tuple[Any, datetime.datetime | Literal[False]]] = {} if invalid_properties: for invalid_prop in invalid_properties: result[invalid_prop] = (EMPTY_DEFAULT, False) pywikibot.log("Unable to get siprop(s) '{}'" .format("', '".join(invalid_properties))) + # Process valid properties if 'query' in data: # If the request is a CachedRequest, use the _cachetime attr. cache_time = getattr( @@ -169,8 +196,16 @@ def warn_handler(mod, message) -> bool: return result @staticmethod - def _is_expired(cache_date, expire): - """Return true if the cache date is expired.""" + def _is_expired(cache_date: datetime.datetime | Literal[False] | None, + expire: datetime.timedelta | Literal[False]) -> bool: + """Return true if the cache date is expired. + + :param cache_date: The timestamp when the value was cached, or + False if default, None if never. + :param expire: Expiry period as timedelta, or False to never + expire. + :return: True if expired, False otherwise. + """ if isinstance(expire, bool): return expire @@ -215,8 +250,10 @@ def _get_general(self, key: str, expiry): self._cache[prop] = default_info[prop] if key in default_info: return default_info[key] + if key in self._cache['general'][0]: return self._cache['general'][0][key], self._cache['general'] + return None def __getitem__(self, key: str): @@ -306,18 +343,27 @@ def is_cached(self, key: str) -> bool: return True - def __contains__(self, key: str) -> bool: - """Return whether the value is in Siteinfo container. + def __contains__(self, key: object) -> bool: + """Check whether the given key is present in the Siteinfo container. + + This method implements the Container protocol and allows usage + like `key in container`.Only string keys are valid. Non-string + keys always return False. .. versionchanged:: 7.1 Previous implementation only checked for cached keys. + + :param key: The key to check for presence. Should be a string. + :return: True if the key exists in the container, False otherwise. + + :meta public: """ - try: - self[key] - except KeyError: - return False + if isinstance(key, str): + with suppress(KeyError): + self[key] + return True - return True + return False def is_recognised(self, key: str) -> bool | None: """Return if 'key' is a valid property name. diff --git a/pywikibot/site/_tokenwallet.py b/pywikibot/site/_tokenwallet.py index 0baabdecd2..a6911999cf 100644 --- a/pywikibot/site/_tokenwallet.py +++ b/pywikibot/site/_tokenwallet.py @@ -1,6 +1,6 @@ """Objects representing api tokens.""" # -# (C) Pywikibot team, 2008-2023 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -119,11 +119,25 @@ def update_tokens(self, tokens: list[str]) -> list[str]: r._params['token'] = r.site.tokens.update_tokens(r._params['token']) .. versionadded:: 8.0 + + :param tokens: A list of token types that need to be updated. + :return: A list of updated tokens corresponding to the given + *tokens* types. + :raises KeyError: If no valid token types can be determined to + update. """ # find the token types types = [key for key, value in self._tokens.items() for token in tokens - if value == token] or [self._last_token_key] + if value == token] + + # fallback to _last_token_key if no types found + if not types and self._last_token_key is not None: + types = [self._last_token_key] + + if not types: + raise KeyError('No valid token types found to update.') + self.clear() # clear the cache return [self[token_type] for token_type in types] diff --git a/pywikibot/site/_upload.py b/pywikibot/site/_upload.py index 2477d3fe5c..9ce19a188d 100644 --- a/pywikibot/site/_upload.py +++ b/pywikibot/site/_upload.py @@ -1,6 +1,6 @@ -"""Objects representing API upload to MediaWiki site.""" +"""Objects representing API upload to MediaWiki sites.""" # -# (C) Pywikibot team, 2009-2024 +# (C) Pywikibot team, 2009-2025 # # Distributed under the terms of the MIT license. # diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 039bc25890..97ea55a130 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -8,9 +8,11 @@ import itertools import re +import sys from collections import OrderedDict from collections.abc import Sequence from contextlib import closing, suppress +from dataclasses import dataclass from html.parser import HTMLParser from typing import NamedTuple @@ -24,12 +26,13 @@ from pywikibot.family import Family from pywikibot.time import TZoneFixedOffset from pywikibot.tools import ( + ModuleDeprecationWrapper, deprecated, deprecated_args, first_lower, first_upper, ) -from pywikibot.userinterfaces.transliteration import NON_LATIN_DIGITS +from pywikibot.userinterfaces.transliteration import NON_ASCII_DIGITS try: @@ -71,18 +74,25 @@ (?P{{\s*[^{\|#0-9][^{\|#]*?\s* [^{]* {{ .* }}) """, re.VERBOSE | re.DOTALL) -# The following regex supports wikilinks anywhere after the first pipe -# and correctly matches the end of the file link if the wikilink contains -# [[ or ]]. -# The namespace names must be substituted into this regex. -# e.g. FILE_LINK_REGEX % 'File' -# or FILE_LINK_REGEX % '|'.join(site.namespaces[6]) +# Regex matching file links with optional parameters. +# +# Captures the filename and parameters, including nested links +# within the parameters. The regex safely matches the closing +# brackets even if inner wikilinks contain [[ or ]]. +# The Namespace names must be substituted into the pattern, e.g.: +# FILE_LINK_REGEX % 'File' +# or: FILE_LINK_REGEX % '|'.join(site.namespaces[6]) +# +# Don't use this regex directly; use textlib.get_regexes('file', site)` +# instead. +# +# 10.7: Exclude empty filename FILE_LINK_REGEX = r""" \[\[\s* (?:%s) # namespace aliases \s*: (?=(?P - [^]|]* + [^]|]+ ))(?P=filename) ( \| @@ -111,10 +121,11 @@ def to_local_digits(phrase: str | int, lang: str) -> str: - """Change Latin digits based on language to localized version. + """Change ASCII digits based on language to localized version. - Be aware that this function only works for several languages, and that it - returns an unchanged string if an unsupported language is given. + .. attention:: Be aware that this function only works for several + languages, and that it returns an unchanged string if an + unsupported language is given. .. versionchanged:: 7.5 always return a string even `phrase` is an int. @@ -123,7 +134,7 @@ def to_local_digits(phrase: str | int, lang: str) -> str: :param lang: language code :return: The localized version """ - digits = NON_LATIN_DIGITS.get(lang) + digits = NON_ASCII_DIGITS.get(lang) phrase = str(phrase) if digits: trans = str.maketrans('0123456789', digits) @@ -131,24 +142,26 @@ def to_local_digits(phrase: str | int, lang: str) -> str: return phrase -def to_latin_digits(phrase: str, +def to_ascii_digits(phrase: str, langs: SequenceType[str] | str | None = None) -> str: - """Change non-latin digits to latin digits. + """Change non-ascii digits to ascii digits. .. versionadded:: 7.0 + .. versionchanged:: 10.3 + this function was renamed from to_latin_digits. - :param phrase: The phrase to convert to latin numerical. + :param phrase: The phrase to convert to ascii numerical. :param langs: Language codes. If langs parameter is None, use all known languages to convert. - :return: The string with latin digits + :return: The string with ascii digits """ if langs is None: - langs = NON_LATIN_DIGITS.keys() + langs = NON_ASCII_DIGITS.keys() elif isinstance(langs, str): langs = [langs] - digits = [NON_LATIN_DIGITS[key] for key in langs - if key in NON_LATIN_DIGITS] + digits = [NON_ASCII_DIGITS[key] for key in langs + if key in NON_ASCII_DIGITS] if digits: trans = str.maketrans(''.join(digits), '0123456789' * len(digits)) phrase = phrase.translate(trans) @@ -535,82 +548,208 @@ def removeDisabledParts(text: str, return text -def removeHTMLParts(text: str, keeptags: list[str] | None = None) -> str: - """Return text without portions where HTML markup is disabled. +def removeHTMLParts(text: str, + keeptags: list[str] | None = None, + *, + removetags: list[str] | None = None) -> str: + """Remove selected HTML tags, their content, and comments from text. - Parts that can/will be removed are HTML tags and all wiki tags. The - exact set of parts which should NOT be removed can be passed as the - *keeptags* parameter, which defaults to - ``['tt', 'nowiki', 'small', 'sup']``. + This function removes HTML tags and their contents for tags listed + in ``removetags``. Tags specified in ``keeptags`` are preserved + along with their content and markup. This is a wrapper around the + :class:`GetDataHTML` parser class. **Example:** - >>> removeHTMLParts('
Hi all!
') + >>> remove = removeHTMLParts + >>> remove('
Hi all!
') 'Hi all!' - - .. seealso:: :class:`_GetDataHTML` + >>> remove('', keeptags=['style']) + '' + >>> remove('Note: This is important!') + 'Note: This is important!' + >>> remove('Note: This is important!', removetags=['a']) + ' This is important!' + + .. caution:: Tag names must be given in lowercase. + + .. versionchanged:: 10.3 + The *removetags* parameter was added. Refactored to use + :class:`GetDataHTML` and its ``__call__`` method. tag attributes + will be kept. + + :param text: The input HTML text to clean. + :param keeptags: List of tag names to keep, including their content + and markup. Defaults to :code:`['tt', 'nowiki', 'small', 'sup']` + if None. + :param removetags: List of tag names whose tags and content should + be removed. The tags ca be preserved if listed in *keeptags*. + Defaults to :code:`['style', 'script']` if None. + :return: The cleaned text with specified HTML parts removed. """ - # TODO: try to merge with 'removeDisabledParts()' above into one generic - # function - parser = _GetDataHTML() - if keeptags is None: - keeptags = ['tt', 'nowiki', 'small', 'sup'] - with closing(parser): - parser.keeptags = keeptags - parser.feed(text) - return parser.textdata + return GetDataHTML(keeptags=keeptags, removetags=removetags)(text) + + +@dataclass(init=False, eq=False) +class GetDataHTML(HTMLParser): + + """HTML parser that removes unwanted HTML elements and optionally comments. + + Tags listed in *keeptags* are preserved. Tags listed in *removetags* + are removed entirely along with their content. Optionally strips HTML + comments. Use via the callable interface or in a :code:`with closing(...)` + block. + .. note:: + The callable interface is preferred because it is simpler and + ensures proper resource management automatically. If using the + context manager, be sure to access :attr:`textdata` before calling + :meth:`close`. -class _GetDataHTML(HTMLParser): + .. tabs:: - """HTML parser which removes html tags except they are listed in keeptags. + .. tab:: callable interface - The parser is used by :func:`removeHTMLParts` similar to this: + .. code-block:: python - .. code-block:: python + text = ('Test' + '

me!

') - from contextlib import closing - from pywikibot.textlib import _GetDataHTML - with closing(_GetDataHTML()) as parser: - parser.keeptags = ['html'] - parser.feed('Test' - '

me!

') - print(parser.textdata) + parser = GetDataHTML(keeptags = ['html']) + clean_text = parser(text) - The result is: + .. tab:: closing block - .. code-block:: html + .. code-block:: python - Test me! + from contextlib import closing + text = ('Test' + '

me!

') + + parser = GetDataHTML(keeptags = ['html']) + with closing(parser): + parser.feed(text) + clean_text = parser.textdata + + .. warning:: Save the :attr:`textdata` **before** :meth:`close` + is called; otherwise the cleaned text is empty. + + **Usage:** + + >>> text = ('Test' + ... '

me!

') + >>> GetDataHTML()(text) + 'Test me!' + >>> GetDataHTML(keeptags=['title'])(text) + 'Test me!' + >>> GetDataHTML(removetags=['body'])(text) + 'Test' + + .. caution:: Tag names must be given in lowercase. .. versionchanged:: 9.2 - This class is no longer a context manager; - :pylib:`contextlib.closing()` - should be used instead. + No longer a context manager + + .. versionchanged:: 10.3 + Public class now. Added support for removals of tag contents. .. seealso:: + - :func:`removeHTMLParts` - :pylib:`html.parser` - - :pylib:`contextlib#contextlib.closing` - :meta public: + :param keeptags: List of tag names to keep, including their content + and markup. Defaults to :code:`['tt', 'nowiki', 'small', 'sup']` + if None. + :param removetags: List of tag names whose tags and content should + be removed. The tags can be preserved if listed in *keeptags*. + Defaults to :code:`['style', 'script']` if None. + :param removecomments: Whether to remove HTML comments. Defaults to + True. """ - textdata = '' - keeptags: list[str] = [] + def __init__(self, *, + keeptags: list[str] | None = None, + removetags: list[str] | None = None) -> None: + """Initialize default tags and internal state.""" + super().__init__() + self.keeptags: list[str] = (keeptags if keeptags is not None + else ['tt', 'nowiki', 'small', 'sup']) + self.removetags: list[str] = (removetags if removetags is not None + else ['style', 'script']) - def handle_data(self, data) -> None: - """Add data to text.""" - self.textdata += data + #: The cleaned output text collected during parsing. + self.textdata = '' - def handle_starttag(self, tag, attrs) -> None: - """Add start tag to text if tag should be kept.""" + self._skiptag: str | None = None + + def __call__(self, text: str) -> str: + """Feed the parser with *text* and return cleaned :attr:`textdata`. + + :param text: The HTML text to parse and clean. + :return: The cleaned text with unwanted tags/content removed. + """ + with closing(self): + self.feed(text) + return self.textdata + + def close(self) -> None: + """Clean current processing and clear :attr:`textdata`.""" + self.textdata = '' + self._skiptag = None + super().close() + + def handle_data(self, data: str) -> None: + """Handle plain text content found between tags. + + Text is added to the output unless it is located inside a tag + marked for removal. + + :param data: The text data between HTML tags. + """ + if not self._skiptag: + self.textdata += data + + def handle_starttag(self, + tag: str, + attrs: list[tuple[str, str | None]]) -> None: + """Handle an opening HTML tag. + + Tags listed in *keeptags* are preserved in the output. Tags + listed in *removetags* begin a skip block, and their content + will be excluded from the output. + + .. versionchanged:: 10.3 + Keep tag attributes. + + :param tag: The tag name (e.g., "div", "script") converted to + lowercase. + :param attrs: A list of (name, value) pairs with tag attributes. + """ if tag in self.keeptags: - self.textdata += f'<{tag}>' - def handle_endtag(self, tag) -> None: - """Add end tag to text if tag should be kept.""" + # Reconstruct attributes for preserved tags + attr_text = ''.join( + f' {name}' if value is None else f' {name}="{value}"' + for name, value in attrs + ) + self.textdata += f'<{tag}{attr_text}>' + + if tag in self.removetags: + self._skiptag = tag + + def handle_endtag(self, tag: str) -> None: + """Handle a closing HTML tag. + + Tags listed in *keeptags* are preserved in the output. A closing + tag that matches the currently skipped tag will end the skip + block. + + :param tag: The name of the closing tag. + """ if tag in self.keeptags: self.textdata += f'' + if tag in self.removetags and tag == self._skiptag: + self._skiptag = None def isDisabled(text: str, index: int, tags=None) -> bool: @@ -988,6 +1127,101 @@ def heading(self) -> str: return self.title[level:-level].strip() +class SectionList(list): + + """List of :class:`Section` objects with heading/level-aware index(). + + Introduced for handling lists of sections with custom lookup by + :attr:`Section.heading` and :attr:`level`. + + .. versionadded:: 10.4 + """ + + def __contains__(self, value: object) -> bool: + """Check if a section matching the given value exists. + + :param value: The section heading string, a (heading, level) tuple, + or a :class:`Section` instance to search for. + :return: ``True`` if a matching section exists, ``False`` otherwise. + """ + with suppress(ValueError): + self.index(value) + return True + + return False + + def count(self, value: str | tuple[str, int] | Section, /) -> int: + """Count the number of sections matching the given value. + + :param value: The section heading string, a (heading, level) tuple, + or a :class:`Section` instance to search for. + :return: The number of matching sections. + """ + if isinstance(value, Section): + return super().count(value) + + if isinstance(value, tuple) and len(value) == 2: + heading, level = value + return sum(1 for sec in self + if sec.heading == heading and sec.level == level) + + if isinstance(value, str): + return sum(1 for sec in self if sec.heading == value) + + return super().count(value) + + def index( + self, + value: str | tuple[str, int] | Section, + start: int = 0, + stop: int = sys.maxsize, + /, + ) -> int: + """Return the index of a matching section. + + Works like ``list.index(value, start, stop)`` but also allows: + + - *value* as a string → match by :attr:`Section.heading` (any level) + - *value* as a ``(heading, level)`` tuple → match both + :attr:`heading` and :attr:`level` + - *value* as a ``Section`` object → normal list.index() behavior + + :param value: The item to search for. May be: + - ``str`` — search by section heading. + - ``tuple[str, int]`` — search by heading and section level. + - :class:`Section` — search for an exact section object. + :param start: Index to start searching from (inclusive). + :param stop: Index to stop searching at (exclusive). + :return: The integer index of the matching section. + :raises ValueError: If no matching section is found. + """ + # Normalize negative indices + n = len(self) + start = max(0, n + start) if start < 0 else start + stop = max(0, n + stop) if stop < 0 else stop + + if isinstance(value, Section): + return super().index(value, start, stop) + + if isinstance(value, tuple) and len(value) == 2: + heading, level = value + for i, sec in enumerate(self[start:stop], start): + if sec.heading == heading and sec.level == level: + return i + + raise ValueError( + f'{value!r} not found in Section headings/levels') + + if isinstance(value, str): + for i, sec in enumerate(self[start:stop], start): + if sec.heading == value: + return i + + raise ValueError(f'{value!r} not found in Section headings') + + return super().index(value, start, stop) + + class Content(NamedTuple): """A namedtuple as result of :func:`extract_sections` holding page content. @@ -997,7 +1231,7 @@ class Content(NamedTuple): """ header: str #: the page header - sections: list[Section] #: the page sections + sections: SectionList[Section] #: the page sections footer: str #: the page footer @property @@ -1025,7 +1259,7 @@ def _extract_headings(text: str) -> list[_Heading]: def _extract_sections(text: str, headings) -> list[Section]: """Return a list of :class:`Section` objects.""" - sections = [] + sections = SectionList() if headings: # Assign them their contents for heading, next_heading in pairwise(headings): @@ -1086,6 +1320,16 @@ def extract_sections( '== History of this ==' >>> result.sections[1].content.strip() 'Enter "import this" for usage...' + >>> 'Details' in result.sections + True + >>> ('Details', 2) in result.sections + False + >>> result.sections.index('Details') + 2 + >>> result.sections.index(('Details', 2)) + Traceback (most recent call last): + ... + ValueError: ('Details', 2) not found in Section headings/levels >>> result.sections[2].heading 'Details' >>> result.sections[2].level @@ -1101,6 +1345,9 @@ def extract_sections( .. versionchanged:: 8.2 The :class:`Content` and :class:`Section` class have additional properties. + .. versionchanged:: 10.4 + Added custom ``index()``, ``count()`` and ``in`` operator support + for :attr:`Content.sections`. :return: The parsed namedtuple. """ # noqa: D300, D301 @@ -2214,7 +2461,7 @@ def censor_match(match): line = removeDisabledParts(line) line = removeHTMLParts(line) - line = to_latin_digits(line) + line = to_ascii_digits(line) for pat in self.patterns: line, match_obj = self._last_match_and_replace(line, pat) if match_obj: @@ -2269,3 +2516,7 @@ def censor_match(match): timestamp = None return timestamp + + +wrapper = ModuleDeprecationWrapper(__name__) +wrapper.add_deprecated_attr('to_latin_digits', to_ascii_digits, since='10.3.0') diff --git a/pywikibot/throttle.py b/pywikibot/throttle.py index 26d38d3eec..eb270fbede 100644 --- a/pywikibot/throttle.py +++ b/pywikibot/throttle.py @@ -1,13 +1,22 @@ -"""Mechanics to slow down wiki read and/or write rate.""" +"""Mechanisms to regulate the read and write rate to wiki servers. + +This module defines the :class:`Throttle` class, which ensures that +automated access to wiki servers adheres to responsible rate limits. It +avoids overloading the servers by introducing configurable delays +between requests, and coordinates these limits across processes using a +shared control file ``throttle.ctrl``. + +It supports both read and write throttling, automatic adjustment based +on the number of concurrent bot instances, and optional lag-aware delays. +""" # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations import itertools -import math import threading import time from collections import Counter @@ -18,7 +27,7 @@ import pywikibot from pywikibot import config from pywikibot.backports import Counter as CounterType -from pywikibot.tools import deprecated +from pywikibot.tools import deprecated, deprecated_args, deprecated_signature FORMAT_LINE = '{module_id} {pid} {time} {site}\n' @@ -79,7 +88,6 @@ def __init__(self, site: pywikibot.site.BaseSite | str, *, self.writedelay = writedelay or config.put_throttle self.last_read = 0.0 self.last_write = 0.0 - self.next_multiplicity = 1.0 self.retry_after = 0 # set by http.request self.delay = 0 @@ -87,7 +95,26 @@ def __init__(self, site: pywikibot.site.BaseSite | str, *, self.modules: CounterType[str] = Counter() self.checkMultiplicity() - self.setDelays() + self.set_delays() + + @property + @deprecated(since='10.3.0') + def next_multiplicity(self) -> float: + """Factor to scale delay time based on upcoming request size. + + .. deprecated:: 10.3.0 + """ + return 1.0 + + @next_multiplicity.setter + @deprecated(since='10.3.0') + def next_multiplicity(self, value: float) -> None: + """Setter for delay scaling factor for the next request. + + .. deprecated:: 10.3.0 + This property has no effect and is retained for backward + compatibility. + """ @property @deprecated('expiry', since='8.4.0') @@ -199,6 +226,7 @@ def checkMultiplicity(self) -> None: pywikibot.log(f'Found {count} {mysite} processes running,' ' including this one.') + @deprecated('set_delays', since='10.3.0') def setDelays( self, delay=None, @@ -207,7 +235,23 @@ def setDelays( ) -> None: """Set the nominal delays in seconds. + .. deprecated:: 10.3.0 + Use :meth:`set_delays` instead. + """ + self.set_delays(delay=delay, writedelay=writedelay, absolute=absolute) + + def set_delays( + self, *, + delay=None, + writedelay=None, + absolute: bool = False + ) -> None: + """Set the nominal delays in seconds. + Defaults to config values. + + .. versionadded:: 10.3.0 + Renamed from :meth:`setDelays`. """ with self.lock: delay = delay or self.mindelay @@ -221,24 +265,38 @@ def setDelays( # Start the delay count now, not at the next check self.last_read = self.last_write = time.time() - def getDelay(self, write: bool = False): - """Return the actual delay, accounting for multiple processes. + @deprecated('get_delay', since='10.3.0') + def getDelay(self, write: bool = False) -> float: + """Return the current delay, adjusted for active processes. + + .. deprecated:: 10.3.0 + Use :meth:`get_delay` instead. + """ + return self.get_delay(write=write) + + def get_delay(self, *, write: bool = False) -> float: + """Return the current delay, adjusted for active processes. - This value is the maximum wait between reads/writes, not taking - into account of how much time has elapsed since the last access. + Compute the delay for a read or write operation, factoring in + process concurrency. This method does not account for how much + time has already passed since the last access — use + :meth:`waittime` for that. + + .. versionadded:: 10.3.0 + Renamed from :meth:`getDelay`. + + :param write: Whether the operation is a write (uses writedelay). + :return: The delay in seconds before the next operation should + occur. """ - thisdelay = self.writedelay if write else self.delay + current_delay = self.writedelay if write else self.delay - # We're checking for multiple processes + # Refresh process count if the check interval has elapsed if time.time() > self.checktime + self.checkdelay: self.checkMultiplicity() - multiplied_delay = self.mindelay * self.next_multiplicity - if thisdelay < multiplied_delay: - thisdelay = multiplied_delay - elif thisdelay > self.maxdelay: - thisdelay = self.maxdelay - thisdelay *= self.process_multiplicity - return thisdelay + + current_delay = max(self.mindelay, min(current_delay, self.maxdelay)) + return current_delay * self.process_multiplicity def waittime(self, write: bool = False): """Return waiting time in seconds. @@ -247,7 +305,7 @@ def waittime(self, write: bool = False): """ # Take the previous requestsize in account calculating the desired # delay this time - thisdelay = self.getDelay(write=write) + thisdelay = self.get_delay(write=write) now = time.time() ago = now - (self.last_write if write else self.last_read) return max(0.0, thisdelay - ago) @@ -284,31 +342,36 @@ def wait(seconds: int | float) -> None: time.sleep(seconds) - def __call__(self, requestsize: int = 1, write: bool = False) -> None: - """Block the calling program if the throttle time has not expired. + @deprecated_args(requestsize=None) # since: 10.3.0 + @deprecated_signature(since='10.3.0') + def __call__(self, *, requestsize: int = 1, write: bool = False) -> None: + """Apply throttling based on delay rules and request type. + + This method blocks the calling thread if the minimum delay has + not yet elapsed since the last read or write operation. - Parameter requestsize is the number of Pages to be read/written; - multiply delay time by an appropriate factor. + .. versionchanged:: 10.3.0 + The *write* parameter is now keyword-only. - Because this seizes the throttle lock, it will prevent any other - thread from writing to the same site until the wait expires. + .. deprecated:: 10.3.0 + The *requestsize* parameter has no effect and will be removed + in a future release. + + :param requestsize: Number of pages to be read or written. + Deprecated since 10.3.0. No longer affects throttling. + :param write: Whether the operation involves writing to the site. + Write operations use a separate delay timer and lock. """ lock = self.lock_write if write else self.lock_read with lock: wait = self.waittime(write=write) - # Calculate the multiplicity of the next delay based on how - # big the request is that is being posted now. - # We want to add "one delay" for each factor of two in the - # size of the request. Getting 64 pages at once allows 6 times - # the delay time for the server. - self.next_multiplicity = math.log(1 + requestsize) / math.log(2.0) - self.wait(wait) + now = time.time() if write: - self.last_write = time.time() + self.last_write = now else: - self.last_read = time.time() + self.last_read = now def lag(self, lagtime: float | None = None) -> None: """Seize the throttle lock due to server lag. diff --git a/pywikibot/titletranslate.py b/pywikibot/titletranslate.py index 16720042cc..1d381be5d3 100644 --- a/pywikibot/titletranslate.py +++ b/pywikibot/titletranslate.py @@ -1,6 +1,6 @@ """Title translate module.""" # -# (C) Pywikibot team, 2003-2024 +# (C) Pywikibot team, 2003-2025 # # Distributed under the terms of the MIT license. # @@ -46,7 +46,7 @@ def translate( for h in hints: # argument may be given as -hint:xy where xy is a language code - codes, _, newname = h.partition(':') + code, _, newname = h.partition(':') if not newname: # if given as -hint:xy or -hint:xy:, assume that there should # be a page in language xy with the same title as the page @@ -55,12 +55,12 @@ def translate( continue newname = page.title(with_ns=False, without_brackets=removebrackets) - if codes.isdigit(): - codes = site.family.languages_by_size[:int(codes)] - elif codes == 'all': + if code.isdigit(): + codes = site.family.languages_by_size[:int(code)] + elif code == 'all': codes = list(site.family.codes) else: - codes = site.family.language_groups.get(codes, codes.split(',')) + codes = site.family.language_groups.get(code, code.split(',')) for newcode in codes: if newcode in site.codes: diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index a39cb519c7..bfc8da124c 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -29,9 +29,9 @@ add_decorated_full_name, add_full_name, deprecate_arg, - deprecate_positionals, deprecated, deprecated_args, + deprecated_signature, get_wrapper_depth, issue_deprecation_warning, manage_wrapping, @@ -55,9 +55,9 @@ 'add_decorated_full_name', 'add_full_name', 'deprecate_arg', - 'deprecate_positionals', 'deprecated', 'deprecated_args', + 'deprecated_signature', 'get_wrapper_depth', 'issue_deprecation_warning', 'manage_wrapping', diff --git a/pywikibot/tools/_deprecate.py b/pywikibot/tools/_deprecate.py index 310e7ca5f9..e72f514197 100644 --- a/pywikibot/tools/_deprecate.py +++ b/pywikibot/tools/_deprecate.py @@ -19,7 +19,7 @@ deprecation decorators moved to _deprecate submodule """ # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -443,36 +443,64 @@ def wrapper(*__args, **__kw): return decorator -def deprecate_positionals(since: str = ''): - """Decorator for methods that issues warnings for positional arguments. +def deprecated_signature(since: str = ''): + """Decorator handling deprecated changes in function or method signatures. - This decorator allows positional arguments after keyword-only - argument syntax (:pep:`3102`) but throws a FutureWarning. The - decorator makes the needed argument updates before passing them to - the called function or method. This decorator may be used for a - deprecation period when require keyword-only arguments. + This decorator supports: + + - Deprecation of positional arguments that have been converted to + keyword-only parameters. + - Detection of invalid keyword usage for positional-only parameters. + + Positional-only parameters (introduced in :pep:`570`) must be passed + positionally. If such parameters are passed as keyword arguments, + this decorator will emit a ``FutureWarning`` and automatically remap + them to positional arguments for backward compatibility. + + It allows positional arguments after keyword-only syntax (:pep:`3102`) + but emits a ``FutureWarning``. Positional arguments that are now + keyword-only are automatically mapped to their corresponding + keyword parameters before the decorated function or method is + invoked. + + The intended use is during a deprecation period, allowing legacy + calls to continue working with a warning instead of raising a + ``TypeError`` immediately. Example: .. code-block:: python - @deprecate_positionals(since='9.2.0') + @deprecated_signature(since='10.6.0') def f(posarg, *, kwarg): ... f('foo', 'bar') - This function call passes but throws a FutureWarning. Without - decorator a TypeError would be raised. + This function call passes but throws a ``FutureWarning``. + Without the decorator, a ``TypeError`` would be raised. - .. caution:: The decorated function may not use ``*args`` or - ``**kwargs``. The sequence of keyword-only arguments must match - the sequence of the old positional arguments, otherwise the - assignment of the arguments to the keyworded arguments will fail. - .. versionadded:: 9.2 + .. note:: + If the parameter name was changed, use :func:`deprecated_args` + first. - :param since: a version string when some positional arguments were - deprecated + .. caution:: + The decorated function must not accept ``*args``. The order of + keyword-only arguments must match the order of the old positional + parameters; otherwise, argument assignment may fail. + + .. versionadded:: 9.2 + .. versionchanged:: 10.4 + Raises ``ValueError`` if method has a ``*args`` parameter. + .. versionchanged:: 10.6 + Renamed from ``deprecate_positionals``. Adds handling of + positional-only parameters and emits warnings if they are passed + as keyword arguments. + + :param since: Mandatory version string indicating when signature + changed. + :raises TypeError: If required positional arguments are missing. + :raises ValueError: If the method has an ``*args`` parameter. """ def decorator(func): """Outer wrapper. Inspect the parameters of *func*. @@ -490,10 +518,64 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: :return: the value returned by the decorated function or method """ + # 1. fix deprecated positional-only usage + pos_only_in_kwargs = { + name: kwargs[name] + for name, p in params.items() + if p.kind == const.POSITIONAL_ONLY and name in kwargs + } + + if pos_only_in_kwargs: + new_args: list[Any] = [] + args_repr = [] # build representation for deprecation warning + idx = 0 # index for args + + for name in arg_keys: + param = params[name] + + if param.kind != const.POSITIONAL_ONLY: + # append remaining POSITIONAL_OR_KEYWORD arguments + new_args.extend(args[idx:]) + break + + if name in pos_only_in_kwargs: + # Value was passed as keyword → use it + value = kwargs.pop(name) + args_repr.append(repr(value)) + elif idx < len(args): + # Value from original args + value = args[idx] + idx += 1 + # Add ellipsis once for original args + if name not in ('cls', 'self') and ( + not args_repr or args_repr[-1] != '...'): + args_repr.append('...') + elif param.default is not param.empty: + # Value from default → show actual value + value = param.default + args_repr.append(repr(value)) + else: + raise TypeError( + f'Missing required positional argument: {name}' + ) + + new_args.append(value) + + args = tuple(new_args) + + args_str = ', '.join(args_repr) + issue_deprecation_warning( + f'Passing positional-only arguments as keywords to ' + f"{func.__qualname__}(): {', '.join(pos_only_in_kwargs)}", + f'positional arguments like {func.__name__}({args_str})', + since=since + ) + + # 2. warn for deprecated keyword-only usage as positional if len(args) > positionals: replace_args = list(zip(arg_keys[positionals:], args[positionals:])) - pos_args = "', '".join(name for name, arg in replace_args) + pos_args = "', '".join(name for name, _ in replace_args) keyw_args = ', '.join(f'{name}={arg!r}' for name, arg in replace_args) issue_deprecation_warning( @@ -508,13 +590,25 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: return func(*args, **kwargs) sig = inspect.signature(func) + params = sig.parameters arg_keys = list(sig.parameters) + const = inspect.Parameter # find the first KEYWORD_ONLY index + positionals = 0 for positionals, key in enumerate(arg_keys): - if sig.parameters[key].kind in (inspect.Parameter.KEYWORD_ONLY, - inspect.Parameter.VAR_KEYWORD): + kind = params[key].kind + + # disallow *args entirely + if kind == const.VAR_POSITIONAL: + raise ValueError( + f'{func.__qualname__} must not have *{key} parameter') + + # stop counting when we reach keyword-only or **kwargs + if kind in (const.KEYWORD_ONLY, const.VAR_KEYWORD): break + else: + positionals += 1 # all were positional, no keyword found return wrapper @@ -556,7 +650,7 @@ def wrapper(*__args, **__kw): name = obj.__full_name__ depth = get_wrapper_depth(wrapper) + 1 args, varargs, kwargs, *_ = getfullargspec(wrapper.__wrapped__) - if varargs is not None and kwargs is not None: + if varargs is not None and kwargs is not None: # pragma: no cover raise ValueError(f'{name} may not have * or ** args.') deprecated = set(__kw) & set(arg_names) if len(__args) > len(args): diff --git a/pywikibot/tools/chars.py b/pywikibot/tools/chars.py index 08d50e25ac..e251b97487 100644 --- a/pywikibot/tools/chars.py +++ b/pywikibot/tools/chars.py @@ -1,6 +1,6 @@ """Character based helper functions (not wiki-dependent).""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -8,6 +8,7 @@ import re from contextlib import suppress +from typing import cast from urllib.parse import unquote from pywikibot.backports import Iterable @@ -125,13 +126,13 @@ def url2string(title: str, if isinstance(encodings, str): return unquote(title, encodings, errors='strict') - first_exception = None + first_exception: BaseException | None = None for enc in encodings: try: return unquote(title, enc, errors='strict') except (UnicodeError, LookupError) as e: - if not first_exception: + if first_exception is None: first_exception = e # Couldn't convert, raise the first exception - raise first_exception + raise cast(BaseException, first_exception) diff --git a/pywikibot/tools/collections.py b/pywikibot/tools/collections.py index 5ef1cb9c12..458c06b4ef 100644 --- a/pywikibot/tools/collections.py +++ b/pywikibot/tools/collections.py @@ -1,6 +1,6 @@ """Collections datatypes.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -11,9 +11,16 @@ from collections.abc import Collection, Generator, Iterator, Mapping from contextlib import suppress from itertools import chain +from types import TracebackType from typing import Any, NamedTuple from pywikibot.backports import Generator as GeneratorType +from pywikibot.exceptions import ArgumentDeprecationWarning +from pywikibot.tools import ( + PYTHON_VERSION, + deprecated_args, + issue_deprecation_warning, +) __all__ = ( @@ -277,18 +284,50 @@ def send(self, value: Any) -> Any: self._started_gen = self.generator return next(self._started_gen) - def throw(self, typ: Exception, val=None, tb=None) -> None: + @deprecated_args(val='value', tb='traceback') # since 10.7.0 + def throw(self, + typ: BaseException | type[BaseException] | None = None, + value: Any = None, + traceback: TracebackType | None = None) -> None: """Raise an exception inside the wrapped generator. Refer :python:`generator.throw() ` for various parameter usage. + .. versionchanged:: 10.7 + The *val* and *tb* parameters were renamed to *value* and + *traceback*. + .. deprecated:: 10.7 + The ``(type, value, traceback)`` signature is deprecated; use + single-arg signature ``throw(value)`` instead. + :raises RuntimeError: No generator started + :raises TypeError: Invalid type for *typ* argument """ if not hasattr(self, '_started_gen'): raise RuntimeError('No generator was started') - self._started_gen.throw(typ, val, tb) + + # New-style (single exception instance) with keyword argument + if typ is None and traceback is None and isinstance(value, + BaseException): + self._started_gen.throw(value) + return + + if PYTHON_VERSION > (3, 8) and not (value is None + and traceback is None): + # Old-style (type, value, traceback) signature + issue_deprecation_warning( + 'The (type, value, traceback) signature of throw()', + 'the single-arg signature', + warning_class=ArgumentDeprecationWarning, + since='10.7.0' + ) + self._started_gen.throw(typ, value, traceback) + return + + # New-style (single exception instance) + self._started_gen.throw(typ) def restart(self) -> None: """Restart the generator.""" diff --git a/pywikibot/tools/djvu.py b/pywikibot/tools/djvu.py index 040e30c527..9c37e03014 100644 --- a/pywikibot/tools/djvu.py +++ b/pywikibot/tools/djvu.py @@ -1,6 +1,6 @@ """Wrapper around djvulibre to access djvu files properties and content.""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -12,13 +12,14 @@ from collections import Counter import pywikibot +from pywikibot.backports import Sequence -def _call_cmd(args, lib: str = 'djvulibre') -> tuple: +def _call_cmd(args: str | Sequence[str], + lib: str = 'djvulibre') -> tuple[bool, str]: """Tiny wrapper around subprocess.Popen(). :param args: same as Popen() - :type args: str or typing.Sequence[string] :param lib: library to be logged in logging messages :return: returns a tuple (res, stdoutdata), where res is True if dp.returncode != 0 else False diff --git a/pywikibot/tools/itertools.py b/pywikibot/tools/itertools.py index e5911ba265..d802c5698a 100644 --- a/pywikibot/tools/itertools.py +++ b/pywikibot/tools/itertools.py @@ -4,19 +4,25 @@ in :mod:`backports` """ # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations import collections +import heapq import itertools from contextlib import suppress -from itertools import chain, zip_longest from typing import Any -from pywikibot.backports import Generator, batched +from pywikibot.backports import ( + Callable, + Generator, + Iterable, + Iterator, + batched, +) from pywikibot.logging import debug from pywikibot.tools import deprecated @@ -27,6 +33,7 @@ 'islice_with_ellipsis', 'itergroup', 'roundrobin_generators', + 'union_generators', ) @@ -90,6 +97,47 @@ def islice_with_ellipsis(iterable, *args, marker: str = '…'): yield marker +def union_generators(*iterables: Iterable[Any], + key: Callable[[Any], Any] | None = None, + reverse: bool = False) -> Iterator[Any]: + """Generator of union of sorted iterables. + + Yield all items from the input iterables in sorted order, removing + duplicates. The input iterables must already be sorted according to + the same *key* and direction. For descending direction, *reverse* + must be ``True``. The generator will yield each element only once, + even if it appears in multiple iterables. This behaves similarly to: + + sorted(set(itertools.chain(*iterables)), key=key, reverse=reverse) + + but is memory-efficient since it processes items lazily. + + Sample: + + >>> list(union_generators([1, 2, 3, 4], [3, 4, 5], [2, 6])) + [1, 2, 3, 4, 5, 6] + >>> list(union_generators([4, 3, 2, 1], [5, 4, 3], [6, 2], reverse=True)) + [6, 5, 4, 3, 2, 1] + + .. versionadded:: 10.6 + + .. note:: + All input iterables must be sorted consistently. *reverse* must + be set to ``True`` only if the iterables are sorted in descending + order. For simple concatenation without duplicate removal, use + :pylib:`itertools.chain` instead. + + :param iterables: Sorted iterables to merge. + :param key: Optional key function to compare elements. If ``None``, + items are compared directly. + :param reverse: Whether the input iterables are sorted in descending + order. + :return: Generator yielding all unique items in sorted order. + """ + merged = heapq.merge(*iterables, key=key, reverse=reverse) + return (list(group)[0] for _, group in itertools.groupby(merged, key=key)) + + def intersect_generators(*iterables, allow_duplicates: bool = False): """Generator of intersect iterables. @@ -155,7 +203,7 @@ def intersect_generators(*iterables, allow_duplicates: bool = False): # Get items from iterables in a round-robin way. sentinel = object() - for items in zip_longest(*iterables, fillvalue=sentinel): + for items in itertools.zip_longest(*iterables, fillvalue=sentinel): for index, item in enumerate(items): if item is sentinel: @@ -184,7 +232,8 @@ def intersect_generators(*iterables, allow_duplicates: bool = False): # a subset of active iterables. if len(active_iterables) < n_gen: cached_iterables = set( - chain.from_iterable(v.keys() for v in cache.values())) + itertools.chain.from_iterable(v.keys() + for v in cache.values())) if cached_iterables <= active_iterables: return @@ -210,7 +259,7 @@ def roundrobin_generators(*iterables) -> Generator[Any, None, None]: sentinel = object() return (item for item in itertools.chain.from_iterable( - zip_longest(*iterables, fillvalue=sentinel)) + itertools.zip_longest(*iterables, fillvalue=sentinel)) if item is not sentinel) diff --git a/pywikibot/tools/threading.py b/pywikibot/tools/threading.py index 522a461efb..83c40deaa6 100644 --- a/pywikibot/tools/threading.py +++ b/pywikibot/tools/threading.py @@ -69,7 +69,7 @@ def __init__(self, group=None, target=None, name: str = 'GeneratorThread', raise RuntimeError('No generator for ThreadedGenerator to run.') self.args, self.kwargs = args, kwargs super().__init__(group=group, name=name) - self.queue = queue.Queue(qsize) + self.queue: queue.Queue[Any] = queue.Queue(qsize) self.finished = threading.Event() def __iter__(self): diff --git a/pywikibot/userinterfaces/buffer_interface.py b/pywikibot/userinterfaces/buffer_interface.py index 2f508c7656..31f6993fb0 100644 --- a/pywikibot/userinterfaces/buffer_interface.py +++ b/pywikibot/userinterfaces/buffer_interface.py @@ -3,7 +3,7 @@ .. versionadded:: 6.4 """ # -# (C) Pywikibot team, 2021-2024 +# (C) Pywikibot team, 2021-2025 # # Distributed under the terms of the MIT license. # @@ -11,9 +11,10 @@ import logging import queue -from typing import Any, Sequence +from typing import Any from pywikibot import config +from pywikibot.backports import Sequence from pywikibot.logging import INFO, VERBOSE from pywikibot.userinterfaces._interface_base import ABUIC @@ -29,7 +30,7 @@ def __init__(self) -> None: """Initialize the UI.""" super().__init__() - self._buffer = queue.Queue() + self._buffer: queue.Queue[Any] = queue.Queue() self.log_handler = logging.handlers.QueueHandler(self._buffer) self.log_handler.setLevel(VERBOSE if config.verbose_output else INFO) diff --git a/pywikibot/userinterfaces/terminal_interface_base.py b/pywikibot/userinterfaces/terminal_interface_base.py index f2a07c6e90..6c4c8e38e9 100644 --- a/pywikibot/userinterfaces/terminal_interface_base.py +++ b/pywikibot/userinterfaces/terminal_interface_base.py @@ -11,7 +11,7 @@ import re import sys import threading -from typing import Any, NoReturn +from typing import Any, Literal, NoReturn, TextIO import pywikibot from pywikibot import config @@ -95,7 +95,7 @@ def __init__(self) -> None: def init_handlers( self, root_logger, - default_stream: str = 'stderr' + default_stream: TextIO | Literal['stderr', 'stdout'] = 'stderr' ) -> None: """Initialize the handlers for user output. @@ -536,15 +536,15 @@ def input_list_choice(self, question: str, answers: Sequence[Any], choice = self.input(question, default=default, force=force) try: - choice = int(choice) - 1 + parsedchoice = int(choice) - 1 except (TypeError, ValueError): if choice in answers: return choice - choice = -1 + parsedchoice = -1 # User typed choice number - if 0 <= choice < len(answers): - return answers[choice] + if 0 <= parsedchoice < len(answers): + return answers[parsedchoice] if force: raise ValueError( diff --git a/pywikibot/userinterfaces/terminal_interface_win32.py b/pywikibot/userinterfaces/terminal_interface_win32.py index 604fb6b6fd..1001eda38e 100644 --- a/pywikibot/userinterfaces/terminal_interface_win32.py +++ b/pywikibot/userinterfaces/terminal_interface_win32.py @@ -1,6 +1,6 @@ """User interface for Win32 terminals.""" # -# (C) Pywikibot team, 2003-2024 +# (C) Pywikibot team, 2003-2025 # # Distributed under the terms of the MIT license. # @@ -57,6 +57,7 @@ def encounter_color(self, color, addr = -12 else: super().encounter_color(color, target_stream) + return from ctypes.wintypes import DWORD, HANDLE get_handle = ctypes.WINFUNCTYPE(HANDLE, DWORD)( diff --git a/pywikibot/userinterfaces/transliteration.py b/pywikibot/userinterfaces/transliteration.py index 6fdf6f13c6..94179b46b8 100644 --- a/pywikibot/userinterfaces/transliteration.py +++ b/pywikibot/userinterfaces/transliteration.py @@ -1,16 +1,20 @@ """Module to transliterate text.""" # -# (C) Pywikibot team, 2006-2024 +# (C) Pywikibot team, 2006-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations -from pywikibot.tools import ModuleDeprecationWrapper, deprecate_arg +from pywikibot.tools import ( + ModuleDeprecationWrapper, + deprecate_arg, + deprecated_signature, +) -#: Non latin digits used by the framework -NON_LATIN_DIGITS = { +#: Non ascii digits used by the framework +NON_ASCII_DIGITS = { 'bn': '০১২৩৪৫৬৭৮৯', 'ckb': '٠١٢٣٤٥٦٧٨٩', 'fa': '۰۱۲۳۴۵۶۷۸۹', @@ -70,11 +74,11 @@ 'Ṉ': 'N', 'Ṋ': 'N', 'Ɲ': 'N', 'ɲ': 'n', 'Ƞ': 'N', 'ǹ': 'n', 'ń': 'n', 'ñ': 'n', 'ņ': 'n', 'ň': 'n', 'ṅ': 'n', 'ṇ': 'n', 'ṉ': 'n', 'ṋ': 'n', 'ƞ': 'n', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ṍ': 'O', 'Ṏ': 'O', - 'Ȭ': 'O', 'Ö': 'O', 'Ō': 'O', 'Ṑ': 'O', 'Ṓ': 'O', 'Ŏ': 'O', 'Ǒ': 'O', + 'Ȭ': 'O', 'Ö': 'Oe', 'Ō': 'O', 'Ṑ': 'O', 'Ṓ': 'O', 'Ŏ': 'O', 'Ǒ': 'O', 'Ȯ': 'O', 'Ȱ': 'O', 'Ọ': 'O', 'Ǫ': 'O', 'Ǭ': 'O', 'Ơ': 'O', 'Ờ': 'O', 'Ớ': 'O', 'Ỡ': 'O', 'Ợ': 'O', 'Ở': 'O', 'Ỏ': 'O', 'Ɵ': 'O', 'Ø': 'O', 'Ǿ': 'O', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ṍ': 'o', 'ṏ': 'o', - 'ȭ': 'o', 'ö': 'o', 'ō': 'o', 'ṑ': 'o', 'ṓ': 'o', 'ŏ': 'o', 'ǒ': 'o', + 'ȭ': 'o', 'ö': 'oe', 'ō': 'o', 'ṑ': 'o', 'ṓ': 'o', 'ŏ': 'o', 'ǒ': 'o', 'ȯ': 'o', 'ȱ': 'o', 'ọ': 'o', 'ǫ': 'o', 'ǭ': 'o', 'ơ': 'o', 'ờ': 'o', 'ớ': 'o', 'ỡ': 'o', 'ợ': 'o', 'ở': 'o', 'ỏ': 'o', 'ɵ': 'o', 'ø': 'o', 'ǿ': 'o', 'Ȍ': 'Ö', 'Ő': 'Ö', 'Ȫ': 'Ö', 'ȍ': 'ö', 'ő': 'ö', 'ȫ': 'ö', @@ -90,10 +94,10 @@ 'Ṭ': 'T', 'Ṯ': 'T', 'Ṱ': 'T', 'Ŧ': 'T', 'Ƭ': 'T', 'Ʈ': 'T', 'ţ': 't', 'ț': 't', 'ť': 't', 'ṫ': 't', 'ṭ': 't', 'ṯ': 't', 'ṱ': 't', 'ŧ': 't', 'Ⱦ': 't', 'ƭ': 't', 'ʈ': 't', 'Ù': 'U', 'Ú': 'U', 'Ũ': 'U', 'Ṹ': 'U', - 'Ṵ': 'U', 'Ü': 'U', 'Ṳ': 'U', 'Ū': 'U', 'Ṻ': 'U', 'Ŭ': 'U', 'Ụ': 'U', + 'Ṵ': 'U', 'Ü': 'Ue', 'Ṳ': 'U', 'Ū': 'U', 'Ṻ': 'U', 'Ŭ': 'U', 'Ụ': 'U', 'Ů': 'U', 'Ų': 'U', 'Ǔ': 'U', 'Ṷ': 'U', 'Ủ': 'U', 'Ư': 'U', 'Ữ': 'U', 'Ự': 'U', 'Ử': 'U', 'ù': 'u', 'ú': 'u', 'ũ': 'u', 'ṹ': 'u', 'ṵ': 'u', - 'ü': 'u', 'ṳ': 'u', 'ū': 'u', 'ṻ': 'u', 'ŭ': 'u', 'ụ': 'u', 'ů': 'u', + 'ü': 'ue', 'ṳ': 'u', 'ū': 'u', 'ṻ': 'u', 'ŭ': 'u', 'ụ': 'u', 'ů': 'u', 'ų': 'u', 'ǔ': 'u', 'ṷ': 'u', 'ủ': 'u', 'ư': 'u', 'ữ': 'u', 'ự': 'u', 'ử': 'u', 'Ȕ': 'Ü', 'Ű': 'Ü', 'Ǜ': 'Ü', 'Ǘ': 'Ü', 'Ǖ': 'Ü', 'Ǚ': 'Ü', 'ȕ': 'ü', 'ű': 'ü', 'ǜ': 'ü', 'ǘ': 'ü', 'ǖ': 'ü', 'ǚ': 'ü', 'Û': 'Ux', @@ -113,12 +117,14 @@ 'Ƣ': 'G', 'ᵷ': 'g', 'ɣ': 'g', 'ƣ': 'g', 'ᵹ': 'g', 'Ƅ': 'H', 'ƅ': 'h', 'Ƕ': 'Wh', 'ƕ': 'wh', 'Ɩ': 'I', 'ɩ': 'i', 'Ŋ': 'Ng', 'ŋ': 'ng', 'Œ': 'OE', 'œ': 'oe', 'Ɔ': 'O', 'ɔ': 'o', 'Ȣ': 'Ou', 'ȣ': 'ou', 'Ƽ': 'Q', 'ĸ': 'q', - 'ƽ': 'q', 'ȹ': 'qp', '\uf20e': 'r', 'ſ': 's', 'ß': 'ss', 'Ʃ': 'Sh', - 'ʃ': 'sh', 'ᶋ': 'sh', 'Ʉ': 'U', 'ʉ': 'u', 'Ʌ': 'V', 'ʌ': 'v', 'Ɯ': 'W', - 'Ƿ': 'W', 'ɯ': 'w', 'ƿ': 'w', 'Ȝ': 'Y', 'ȝ': 'y', 'IJ': 'IJ', 'ij': 'ij', - 'Ƨ': 'Z', 'ʮ': 'z', 'ƨ': 'z', 'Ʒ': 'Zh', 'ʒ': 'zh', 'Ǯ': 'Dzh', 'ǯ': 'dzh', - 'Ƹ': "'", 'ƹ': "'", 'ʔ': "'", 'ˀ': "'", 'Ɂ': "'", 'ɂ': "'", 'Þ': 'Th', - 'þ': 'th', 'C': '!', 'ʗ': '!', 'ǃ': '!', + 'ƽ': 'q', 'ȹ': 'qp', 'ſ': 's', 'ß': 'ss', 'IJ': 'IJ', 'ij': 'ij', 'Ɯ': 'W', + 'Ƿ': 'W', 'ƿ': 'w', 'Ȝ': 'Y', 'ȝ': 'y', 'Ƨ': 'Z', 'ƨ': 'z', 'Ʒ': 'Zh', + 'ʒ': 'zh', 'Ǯ': 'Dzh', 'ǯ': 'dzh', 'Þ': 'Th', 'þ': 'th', + # International Phonetic Alphabet + 'ʃ': 'sh', 'ᶋ': 'sh', 'Ʉ': 'U', 'ʉ': 'u', 'Ʌ': 'V', 'ʌ': 'v', 'ʔ': "'", + 'ˀ': "'", 'Ɂ': "'", 'ɂ': "'", 'ʗ': '!', 'ǃ': '!', 'Ƹ': "'", 'ƹ': "'", + # Private Use Area + '': 'r', # Punctuation and typography '«': '"', '»': '"', '“': '"', '”': '"', '„': '"', '¨': '"', '‘': "'", '’': "'", '′': "'", '@': '(at)', '¤': '$', '¢': 'c', '€': 'E', '£': 'L', @@ -193,7 +199,6 @@ 'ى': 'á', 'ﻯ': 'á', 'ﻰ': 'á', 'ﯼ': 'y', 'ﯽ': 'y', 'ﯿ': 'y', 'ﯾ': 'y', 'ﻻ': 'la', 'ﻼ': 'la', 'ﷲ': 'llah', 'إ': "a'", 'أ': "a'", 'ؤ': "w'", 'ئ': "y'", - '◌': 'iy', # indicates absence of vowels # Perso-Arabic 'پ': 'p', 'ﭙ': 'p', 'چ': 'ch', 'ژ': 'zh', 'گ': 'g', 'ﮔ': 'g', 'ﮕ': 'g', 'ﮓ': 'g', @@ -1096,7 +1101,7 @@ '𐬳': 'shye', '𐬴': 'sshe', '𐬵': 'he', } -for digits in NON_LATIN_DIGITS.values(): +for digits in NON_ASCII_DIGITS.values(): _trans.update({char: str(i) for i, char in enumerate(digits)}) @@ -1117,23 +1122,29 @@ def __init__(self, encoding: str) -> None: continue while (value.encode(encoding, 'replace').decode(encoding) == '?' and value in trans): - value = trans[value] + value = trans[value] # pragma: no cover trans[char] = value self.trans = trans @deprecate_arg('next', 'succ') # since 9.0 - def transliterate(self, char: str, default: str = '?', + @deprecated_signature(since='10.6.0') + def transliterate(self, char: str, /, default: str = '?', *, prev: str = '-', succ: str = '-') -> str: """Transliterate the character. .. versionchanged:: 9.0 *next* parameter was renamed to *succ*. + .. versionchanged:: 10.6 + *char* argument is positional only; *prev* and *succ* + arguments are keyword only. :param char: The character to transliterate. - :param default: The character used when there is no transliteration. + :param default: The character used when there is no + transliteration. :param prev: The previous character :param succ: The succeeding character - :return: The transliterated character which may be an empty string + :return: The transliterated character which may be an empty + string """ result = default if char in self.trans: @@ -1148,9 +1159,12 @@ def transliterate(self, char: str, default: str = '?', result = prev # Lao elif char == 'ຫ': - result = '' if next in 'ງຍນຣລຼຼວ' else 'h' + result = '' if succ in 'ງຍນຣລຼຼວ' else 'h' return result wrapper = ModuleDeprecationWrapper(__name__) wrapper.add_deprecated_attr('transliterator', Transliterator, since='9.0.0') +wrapper.add_deprecated_attr('NON_LATIN_DIGITS', NON_ASCII_DIGITS, + replacement_name='NON_ASCII_DIGITS', + since='10.3.0') diff --git a/requirements.txt b/requirements.txt index c6f3ef33b0..42f78a505b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,8 +42,12 @@ pydot >= 3.0.2 python-stdnum >= 1.20 # GUI -Pillow>=11.1.0; python_version > "3.8" -Pillow==10.4.0; python_version < "3.9" +Pillow==10.4.0; platform_python_implementation == "PyPy" and python_version < "3.9" +Pillow>=11.1.0,<11.3.0; platform_python_implementation == "PyPy" and python_version >= "3.9" and python_version < "3.11" +Pillow>=11.1.0; platform_python_implementation == "PyPy" and python_version >= "3.11" +Pillow==10.4.0; platform_python_implementation != "PyPy" and python_version < "3.9" +Pillow>=11.1.0,<11.3.0; platform_python_implementation != "PyPy" and python_version == "3.9" +Pillow>=11.1.0; platform_python_implementation != "PyPy" and python_version >= "3.10" # core pagegenerators googlesearch-python >= 1.3.0 diff --git a/scripts/CHANGELOG.rst b/scripts/CHANGELOG.rst index 62f50ea4d9..7c1c04c4b3 100644 --- a/scripts/CHANGELOG.rst +++ b/scripts/CHANGELOG.rst @@ -1,6 +1,34 @@ Scripts Changelog ================= +10.7.0 +------ + +* i18n updates + +misspelling +^^^^^^^^^^^ + +* ``-page`` option was added. (:phab:`T151540`) + +watchlist +^^^^^^^^^ + +* Several exceptions are caught during watchlist count. + +10.4.0 +------ + +addwikis +^^^^^^^^ + +* Add help options for addwikis script whereas `help` is deprecated. + +interwiki +^^^^^^^^^ + +* Clarify ``-localonly`` option behavior and help text (:phab:`T57257`) + 10.3.0 ------ @@ -9,13 +37,31 @@ Scripts Changelog archivebot ^^^^^^^^^^ -* Use wikidata items for archive header templates (:phab:`T396399`) +* Use {{talkarchive}} template by default (:phab:`T400543`) +* Use Wikidata items for archive header templates (:phab:`T396399`) + +create_isbn_edition +^^^^^^^^^^^^^^^^^^^ + +* This script will be removed from repository in Pywikibot 11 + +interwiki +^^^^^^^^^ + +* Ignore :exc:`exceptions.SectionError` in :meth:`interwiki.Subject.page_empty_check` and treat it + as an empty page (:phab:`T398983`) +* Show a warning if no username is configured for a site (:phab:`T135228`) redirect ^^^^^^^^ -* Try one more move to fix redirect targets (:phab:`T396473`) -* Don't fix broken redirects if namespace of source and target are different (:phab:`T396456`) +* Attempt an additional move to fix redirect targets (:phab:`T396473`) +* Do not fix broken redirects if source and target namespaces differ (:phab:`T396456`) + +tracking_param_remover +^^^^^^^^^^^^^^^^^^^^^^ + +* Script for removing tracking URL parameters was added (:phab:`T399698`) 10.2.0 @@ -1033,12 +1079,6 @@ login * update help string -maintenance -^^^^^^^^^^^ - -* Add a preload_sites.py script to preload site information - (:phab:`T226157`) - reflinks ^^^^^^^^ diff --git a/scripts/__init__.py b/scripts/__init__.py index 44bc808000..b747241350 100644 --- a/scripts/__init__.py +++ b/scripts/__init__.py @@ -34,7 +34,7 @@ from pathlib import Path -__version__ = '10.3.0' +__version__ = '10.7.0' #: defines the entry point for pywikibot-scripts package base_dir = Path(__file__).parent diff --git a/scripts/archivebot.py b/scripts/archivebot.py index 3b018ede79..d0538edab5 100755 --- a/scripts/archivebot.py +++ b/scripts/archivebot.py @@ -1,21 +1,24 @@ #!/usr/bin/env python3 -"""archivebot.py - discussion page archiving bot. +"""archivebot.py - Discussion page archiving bot. usage: python pwb.py archivebot [OPTIONS] [TEMPLATE_PAGE] -Several TEMPLATE_PAGE templates can be given at once. Default is -`User:MiszaBot/config`. Bot examines backlinks (Special:WhatLinksHere) -to all TEMPLATE_PAGE templates. Then goes through all pages (unless a -specific page specified using options) and archives old discussions. -This is done by breaking a page into threads, then scanning each thread -for timestamps. Threads older than a specified threshold are then moved -to another page (the archive), which can be named either basing on the -thread's name or then name can contain a counter which will be -incremented when the archive reaches a certain size. +Multiple TEMPLATE_PAGE templates can be given in a single command. The +default is ``User:MiszaBot/config``. The bot examines backlinks (i.e. +Special:WhatLinksHere) to all given TEMPLATE_PAGE templates. It then +processes those pages (unless a specific page is specified via options) +and archives old discussions. -Transcluded template may contain the following parameters: +This is done by splitting each page into threads and scanning them for +timestamps. Threads older than a configured threshold are moved to an +archive page. The archive page name can be based on the thread's title, +or include a counter that increments when the archive reaches a +configured size. + +The transcluded configuration template may include the following +parameters: .. code:: wikitext @@ -30,47 +33,64 @@ |key = }} -Meanings of parameters are: +**Parameters meanings:** archive - Name of the page to which archived threads will be put. Must be a - subpage of the current page. Variables are supported. + Name of the archive page where threads will be moved. Must be a + subpage of the current page, unless a valid ``key`` is provided. + Supports variables. + algo - Specifies the maximum age of a thread. Must be in the form - :code:`old()` where ```` specifies the age in - seconds (s), hours (h), days (d), weeks (w), or years (y) like ``24h`` - or ``5d``. Default is :code:`old(24h)`. + Specifies the maximum age of a thread using the syntax: + :code:`old()`, where ```` can be in seconds (s), hours (h), + days (d), weeks (w), or years (y). For example: ``24h`` or ``5d``. + Default: :code:`old(24h)`. + counter - The current value of a counter which could be assigned as variable. - Will be updated by bot. Initial value is 1. + The current value of the archive counter used in archive page naming. + Will be updated automatically by the bot. Default: 1. + maxarchivesize - The maximum archive size before incrementing the counter. Value can - be given with appending letter like ``K`` or ``M`` which indicates - KByte or MByte. Default value is ``200K``. + The maximum size of an archive page before incrementing the counter. + A suffix of ``K`` or ``M`` may be used for kilobytes or megabytes. + Default: ``200K``. + minthreadsleft - Minimum number of threads that should be left on a page. Default - value is 5. + Minimum number of threads that must remain on the main page after + archiving. Default: 5. + minthreadstoarchive - The minimum number of threads to archive at once. Default value is 2. + Minimum number of threads that must be eligible for archiving before + any are moved. Default: 2. + archiveheader - Content that will be put on new archive pages as the header. This - parameter supports the use of variables. Default value is - ``{{talkarchive}}``. + Content placed at the top of each newly created archive page. + Supports variables. If not set explicitly, a localized default will + be retrieved from Wikidata using known archive header templates. If + no localized template is found, the fallback ``{{talkarchive}}`` is + used. + + .. note:: + If no ``archiveheader`` is set and no localized template can be + retrieved from Wikidata, the fallback ``{{talkarchive}}`` is used. + This generic fallback may not be appropriate for all wikis, so it + is recommended to set ``archiveheader`` explicitly in such cases. + key - A secret key that (if valid) allows archives not to be subpages of - the page being archived. + A secret key that, if valid, allows archive pages to exist outside + of the subpage structure of the current page. -Variables below can be used in the value for "archive" in the template -above; numbers are **latin** digits. Alternatively you may use -**localized** digits. This is only available for a few site languages. -Refer :attr:`NON_LATIN_DIGITS -` whether there is a -localized one. +Variables below can be used in the value of the "archive" parameter in +the template above. Numbers are represented as **ASCII** digits by +default; alternatively, **localized** digits may be used. Localized +digits are only available for a few site languages. Please refer to +:attr:`NON_ASCII_DIGITS ` +to check if a localized version is available. .. list-table:: :header-rows: 1 - * - latin + * - ascii - localized - Description * - %(counter)d @@ -104,13 +124,17 @@ - %(localweek)s - week number of the thread being archived -The ISO calendar starts with the Monday of the week which has at least -four days in the new Gregorian calendar. If January 1st is between -Monday and Thursday (including), the first week of that year started the -Monday of that week, which is in the year before if January 1st is not a -Monday. If it's between Friday or Sunday (including) the following week -is then the first week of the year. So up to three days are still -counted as the year before. +The ISO calendar defines the first week of the year as the week +containing the first Thursday of the Gregorian calendar year. This means: + +- If January 1st falls on a Monday, Tuesday, Wednesday, or Thursday, then + the week containing January 1st is considered the first week of the year. + +- If January 1st falls on a Friday, Saturday, or Sunday, then the first ISO + week starts on the following Monday. + +Because of this, up to three days at the start of January can belong to the +last week of the previous year according to the ISO calendar. .. seealso:: Python :python:`datetime.date.isocalendar `, @@ -118,36 +142,47 @@ Options (may be omitted): --help show this help message and exit +-help Show this help message and exit. --calc:PAGE calculate key for PAGE and exit +-calc:PAGE Calculate key for PAGE and exit. --file:FILE load list of pages from FILE +-file:FILE Load list of pages from FILE. --force override security options +-force Override security options. --locale:LOCALE switch to locale LOCALE +-locale:LOCALE Switch to locale LOCALE. --namespace:NS only archive pages from a given namespace +-namespace:NS Only archive pages from the given namespace. --page:PAGE archive a single PAGE, default ns is a user talk page +-page:PAGE Archive a single PAGE. Default namespace is a user talk + page. --salt:SALT specify salt +-salt:SALT Specify salt. -keep Preserve thread order in archive even if threads are - archived later --sort Sort archive by timestamp; should not be used with `keep` + archived later. + +-sort Sort archive by timestamp; should not be used with `keep`. -async Run the bot in parallel tasks. +Version historty: + .. versionchanged:: 7.6 - Localized variables for "archive" template parameter are supported. - `User:MiszaBot/config` is the default template. `-keep` option was - added. + Localized variables for the ``archive`` parameter are supported. + ``User:MiszaBot/config`` is the default template. The ``-keep`` option + was added. + .. versionchanged:: 7.7 ``-sort`` and ``-async`` options were added. + .. versionchanged:: 8.2 - KeyboardInterrupt was enabled with ``-async`` option. + KeyboardInterrupt support added when using the ``-async`` option. + +.. versionchanged:: 10.3 + If ``archiveheader`` is not set, the bot now attempts to retrieve a + localized template from Wikidata (based on known item IDs). If none is + found, ``{{talkarchive}}`` is used as fallback. """ # # (C) Pywikibot team, 2006-2025 @@ -168,12 +203,12 @@ from hashlib import md5 from math import ceil from textwrap import fill -from typing import Any, Pattern +from typing import Any from warnings import warn import pywikibot from pywikibot import i18n -from pywikibot.backports import pairwise +from pywikibot.backports import Pattern, pairwise from pywikibot.exceptions import Error, NoPageError from pywikibot.textlib import ( TimeStripper, @@ -395,19 +430,34 @@ def max( return max(ts1, ts2) def get_header_template(self) -> str: - """Get localized archive header template. + """Return a localized archive header template from Wikibase. + + This method looks up a localized archive header template by + checking a predefined list of Wikidata item IDs that correspond + to commonly used archive header templates. It returns the first + matching template found on the local wiki via the site’s + Wikibase repository. + + If no such localized template is found, it falls back to the + default ``{{talkarchive}}`` template. .. versionadded:: 10.2 - :raises NotImplementedError: Archive header is not localized + .. versionchanged:: 10.3 + Returns ``{{talkarchive}}`` by default if no localized + template is found. + + .. caution:: + The default should be avoided where possible. It is + recommended to explicitly set the ``archiveheader`` parameter + in the bot's configuration template instead. """ for item in ARCHIVE_HEADER: tpl = self.site.page_from_repository(item) if tpl: return f'{{{{{tpl.title(with_ns=False)}}}}}' - raise NotImplementedError( - 'Archive header is not localized on your site') + return '{{talkarchive}}' def load_page(self) -> None: """Load the page to be archived and break it up into threads. diff --git a/scripts/category.py b/scripts/category.py index 99f918b780..8794727614 100755 --- a/scripts/category.py +++ b/scripts/category.py @@ -472,16 +472,25 @@ def __init__(self, generator, newcat=None, self.comment = comment @staticmethod - def sorted_by_last_name(catlink, pagelink) -> pywikibot.Page: - """Return a Category with key that sorts persons by their last name. + def sorted_by_last_name(catlink: pywikibot.Page, + pagelink: pywikibot.Page) -> pywikibot.Page: + """Return a category entry for a person, sorted by last name. - Parameters: catlink - The Category to be linked. - pagelink - the Page to be placed in the category. + If the page title contains a disambiguation suffix in brackets, + it will be removed. The last word of the (cleaned) title is + treated as the surname and moved to the front, separated by a + comma. - Trailing words in brackets will be removed. Example: If - category_name is 'Author' and pl is a Page to [[Alexandre Dumas - (senior)]], this function will return this Category: - [[Category:Author|Dumas, Alexandre]]. + Example: + If *catlink* is ``Category:Author`` and *pagelink* points to + ``[[Alexandre Dumas (senior)]]``, this method returns:: + + [[Category:Author|Dumas, Alexandre]] + + :param catlink: Category page where the entry should be added. + :param pagelink: Page of the person to be categorized. + :return: A page object representing the category entry with the + correct sort key. """ page_name = pagelink.title() site = pagelink.site @@ -1323,49 +1332,89 @@ class CategoryTreeRobot: """Robot to create tree overviews of the category structure. - Parameters: - * cat_title - The category which will be the tree's root. - * cat_db - A CategoryDatabase object. - * max_depth - The limit beyond which no subcategories will be listed. - This also guarantees that loops in the category structure - won't be a problem. - * filename - The textfile where the tree should be saved; None to print - the tree to stdout. + This class generates a hierarchical overview of categories starting + from a given root category. The tree can be printed to stdout or + written to a file. Cycles in the category structure are prevented + by limiting the depth. + + Example: + Create a tree view of ``Category:Physics`` up to 5 levels deep + and save it to ``physics_tree.txt``:: + + db = CategoryDatabase() + robot = CategoryTreeRobot( + 'Physics', db, 'physics_tree.txt', max_depth=5) + + .. versionchanged:: 10.4 + *max_depth* is keyword only. + + :param cat_title: The category that serves as the root of the + tree. + :param cat_db: A :class:`CategoryDatabase` object + providing access to category data. + :param filename: Path to the text file where the tree + should be saved. If ``None``, the user will be prompted to enter + a filename. If an empty string is entered, the tree will be + printed to stdout. Relative paths are converted to absolute + paths using :meth:`config.datafilepath`. + :param max_depth: Maximum depth of subcategories to traverse. + Prevents infinite loops. """ def __init__( self, - cat_title, - cat_db, - filename=None, + cat_title: str, + cat_db: CategoryDatabase, + filename: str | None = None, + *, max_depth: int = 10 ) -> None: """Initializer.""" - self.cat_title = cat_title or \ - pywikibot.input( + self.cat_title = cat_title \ + or pywikibot.input( 'For which category do you want to create a tree view?') self.cat_db = cat_db if filename is None: filename = pywikibot.input( 'Please enter the name of the file ' 'where the tree should be saved,\n' - 'or press enter to simply show the tree:') + 'or press enter to simply show the tree:' + ) if filename and not os.path.isabs(filename): filename = config.datafilepath(filename) self.filename = filename self.max_depth = max_depth self.site = pywikibot.Site() - def treeview(self, cat, current_depth: int = 0, parent=None) -> str: - """Return a tree view of all subcategories of cat. - - The multi-line string contains a tree view of all subcategories of cat, - up to level max_depth. Recursively calls itself. - - Parameters: - * cat - the Category of the node we're currently opening. - * current_depth - the current level in the tree (for recursion). - * parent - the Category of the category we're coming from. + def treeview(self, + cat: pywikibot.Category, + current_depth: int = 0, + *, + parent: pywikibot.Category | None = None) -> str: + """Return a tree view of subcategories as a multi-line string. + + Generates a hierarchical tree view of all subcategories of the + given category *cat*, up to the depth specified by + ``self.max_depth``. This method is recursive. + + .. versionchanged:: 10.4 + *parent* is keyword only. + + Example: + To get a tree view of ``Category:Physics`` starting at depth 0:: + + cat = pywikibot.Category(site, 'Physics') + tree = robot.treeview(cat) + + :param cat: The Category object currently being expanded in the + tree. + :param current_depth: Current depth level in the tree (used for + recursion). + :param parent: The parent Category from which we descended (to + avoid cycles). + :return: A multi-line string representing the tree structure, + including the number of pages in each category and links to + supercategories. """ result = '#' * current_depth if current_depth > 0: @@ -1680,7 +1729,7 @@ def main(*args: str) -> None: gen_factory.namespaces, summary) elif action == 'tree': bot = CategoryTreeRobot(options.get('from'), cat_db, - options.get('to'), depth) + options.get('to'), max_depth=depth) elif action == 'listify': bot = CategoryListifyRobot(options.get('from'), options.get('to'), summary, diff --git a/scripts/change_pagelang.py b/scripts/change_pagelang.py index 587fce7ec1..bedd622fbb 100755 --- a/scripts/change_pagelang.py +++ b/scripts/change_pagelang.py @@ -23,7 +23,7 @@ .. versionadded:: 5.1 """ # -# (C) Pywikibot team, 2018-2024 +# (C) Pywikibot team, 2018-2025 # # Distributed under the terms of the MIT license. # @@ -80,17 +80,19 @@ def treat(self, page) -> None: :type page: pywikibot.page.BasePage """ # Current content language of the page and site language - parameters = {'action': 'query', - 'prop': 'info', - 'titles': page.title(), - 'meta': 'siteinfo'} + parameters = { + 'action': 'query', + 'prop': 'info', + 'titles': page.title(), + } r = self.site.simple_request(**parameters) langcheck = r.submit()['query'] currentlang = '' for k in langcheck['pages']: currentlang = langcheck['pages'][k]['pagelanguage'] - sitelang = langcheck['general']['lang'] + + sitelang = self.site.siteinfo['lang'] if self.opt.setlang == currentlang: pywikibot.info( @@ -109,7 +111,7 @@ def treat(self, page) -> None: choice = pywikibot.input_choice( f'The content language for this page is already set to ' f'<>{currentlang}<>, which is different from ' - f'the default ({sitelang}). Change it to' + f'the default ({sitelang}). Change it to ' f'<>{self.opt.setlang}<> anyway?', [('Always', 'a'), ('Yes', 'y'), ('No', 'n'), ('Never', 'v')], default='Y') @@ -152,7 +154,7 @@ def main(*args: str) -> None: site = pywikibot.Site() specialpages = site.siteinfo['specialpagealiases'] specialpagelist = {item['realname'] for item in specialpages} - allowedlanguages = site._paraminfo.parameter(module='setpagelanguage', + allowedlanguages = site._paraminfo.parameter(module_name='setpagelanguage', param_name='lang')['type'] # Check if the special page PageLanguage is enabled on the wiki # If it is not, page languages can't be set, and there's no point in diff --git a/scripts/checkimages.py b/scripts/checkimages.py index f7e158a5c0..0e178203ef 100755 --- a/scripts/checkimages.py +++ b/scripts/checkimages.py @@ -75,7 +75,7 @@ Welcome messages are imported from :mod:`scripts.welcome` script. """ # -# (C) Pywikibot team, 2006-2024 +# (C) Pywikibot team, 2006-2025 # # Distributed under the terms of the MIT license. # @@ -85,11 +85,11 @@ import re import time from itertools import zip_longest -from typing import Generator import pywikibot from pywikibot import config, i18n from pywikibot import pagegenerators as pg +from pywikibot.backports import Generator from pywikibot.bot import suggest_help from pywikibot.exceptions import ( EditConflictError, diff --git a/scripts/create_isbn_edition.py b/scripts/create_isbn_edition.py index a042a9938a..e788265eb3 100755 --- a/scripts/create_isbn_edition.py +++ b/scripts/create_isbn_edition.py @@ -5,6 +5,12 @@ amend the related Wikidata item for edition (with the :samp:`P212, {ISBN number}` as unique external ID). +.. deprecated:: 10.3 + This script is deprecated and will be removed in Pywikibot 11.0. + An external version of this script can be found in the + `geertivp/Pywikibot `_ script + collection. See :phab:`T398140` for details. + Use digital libraries to get ISBN data in JSON format, and integrate the results into Wikidata. @@ -673,7 +679,7 @@ def get_language_preferences() -> list[str]: always appended. .. seealso:: - - :wiki:`List_of_ISO_639-1_codes + - :wiki:`List_of_ISO_639-1_codes` :Return: List of ISO 639-1 language codes with strings delimited by ':'. @@ -967,11 +973,6 @@ def add_claims(isbn_data: dict[str, Any]) -> int: # noqa: C901 # Redundant "subtitles" are ignored subtitle = first_upper(titles[1].strip()) - # Get formatted ISBN number - isbn_number = isbn_data['ISBN-13'] # Numeric format - isbn_fmtd = isbnlib.mask(isbn_number) # Canonical format (with "-") - pywikibot.log(isbn_fmtd) - # Search the ISBN number both in canonical and numeric format qnumber_list = get_item_with_prop_value(ISBNPROP, isbn_fmtd) qnumber_list.update(get_item_with_prop_value(ISBNPROP, isbn_number)) @@ -1560,7 +1561,7 @@ def main(*args: str) -> None: f'{pywikibot.__version__}, {pgmlic}, {creator}') # This script requires a bot flag - wdbotflag = 'bot' in pywikibot.User(repo, repo.user()).groups() + wdbotflag = repo.has_group('bot') # Prebuilt targets target_author = pywikibot.ItemPage(repo, AUTHORINSTANCE) diff --git a/scripts/djvutext.py b/scripts/djvutext.py index ac9f6404b8..845520ea3c 100755 --- a/scripts/djvutext.py +++ b/scripts/djvutext.py @@ -1,43 +1,41 @@ #!/usr/bin/env python3 -"""This bot uploads text from djvu files onto pages in the "Page" namespace. +"""This bot uploads text from DjVu files onto pages in the "Page" namespace. -.. note:: It is intended to be used for Wikisource. +.. note:: This script is intended to be used for Wikisource. -The following parameters are supported: +The following command-line parameters are supported: --index: name of the index page (without the Index: prefix) +-index: Name of the index page (without the "Index:" prefix). --djvu: path to the djvu file, it shall be: +-djvu: Path to the DjVu file. It can be one of the following: - .. hlist:: + * A path to a file + * A directory containing a DjVu file with the same name as + the index page (optional; defaults to current directory ".") - * path to a file name - * dir where a djvu file name as index is located optional, - by default is current dir '.' +-pages:-,...-,- + Page range(s) to upload (optional). Default: :samp:`start=1`, + :samp:`end={DjVu file number of images}`. Page ranges can be + specified as:: --pages:-,...-,- Page range to - upload; optional, :samp:`start=1`, - :samp:`end={djvu file number of images}`. Page ranges can be - specified as:: + A-B -> pages A through B + A- -> pages A through the end + A -> only page A + -B -> pages 1 through B - A-B -> pages A until B - A- -> pages A until number of images - A -> just page A - -B -> pages 1 until B +This script is a subclass of :class:`ConfigParserBot`. +The following options can be set in a settings file (default: +``scripts.ini``): -This script is a :class:`ConfigParserBot `. The -following options can be set within a settings file which is scripts.ini -by default: +-summary: [str] Custom edit summary. Use quotes if the summary + contains spaces. --summary: [str] Custom edit summary. Use quotes if edit summary - contains spaces. +-force Overwrite existing text. Optional. Default: False. --force Overwrites existing text optional, default False. - --always Do not bother asking to confirm any of the changes. +-always Do not prompt for confirmation before making changes. """ # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # diff --git a/scripts/i18n b/scripts/i18n index 332854eb19..2406bf3d84 160000 --- a/scripts/i18n +++ b/scripts/i18n @@ -1 +1 @@ -Subproject commit 332854eb199f62ce99f9c2294fb729b6239086f7 +Subproject commit 2406bf3d8489a012ea3ac14dac9ca4eb6d39b923 diff --git a/scripts/interwiki.py b/scripts/interwiki.py index e2b5f60e71..8be5009653 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -274,8 +274,8 @@ for multiple languages, and specify on which sites the bot should modify pages: --localonly Only work on the local wiki, not on other wikis in the - family I have a login at. +-localonly Process only pages from the default site; ignore pages + from other family members. -limittwo Only update two pages - one in the local wiki (if logged-in) and one in the top available one. For example, @@ -341,6 +341,10 @@ To run the script on all pages on a language, run it with option ``-start:!``, and if it takes so long that you have to break it off, use ``-continue`` next time. + +.. versionchanged:: 10.4 + The ``-localonly`` option now restricts page processing to the + default site only, instead of the origin page. """ # # (C) Pywikibot team, 2003-2025 @@ -384,6 +388,7 @@ NoPageError, NoUsernameError, PageSaveRelatedError, + SectionError, ServerError, SiteDefinitionError, SpamblacklistError, @@ -442,46 +447,46 @@ class InterwikiBotConfig: """Container class for interwikibot's settings.""" + always = False + askhints = False + asynchronous = False + auto = True autonomous = False + cleanup = False confirm = False - always = False - select = False + followinterwiki = True followredirect = True - initialredirect = False force = False - cleanup = False - remove = [] - maxquerysize = 50 - same = False - skip = set() - skipauto = False - untranslated = False - untranslatedonly = False - auto = True - neverlink = [] - showtextlink = 0 - showtextlinkadd = 300 - localonly = False - limittwo = False - strictlimittwo = False - needlimit = 0 - ignore = [] - parenthesesonly = False - rememberno = False - followinterwiki = True - minsubjects = config.interwiki_min_subjects - nobackonly = False - askhints = False hintnobracket = False hints = [] hintsareright = False + ignore = [] + initialredirect = False + limittwo = False + localonly = False lacklanguage = None + maxquerysize = 50 minlinks = 0 + minsubjects = config.interwiki_min_subjects + needlimit = 0 + neverlink = [] + nobackonly = False + parenthesesonly = False quiet = False + rememberno = False + remove = [] + repository = False restore_all = False - asynchronous = False + same = False + select = False + showtextlink = 0 + showtextlinkadd = 300 + skip = set() + skipauto = False + strictlimittwo = False summary = '' - repository = False + untranslated = False + untranslatedonly = False def note(self, text: str) -> None: """Output a notification message with. @@ -671,6 +676,8 @@ def __init__(self, origin=None, hints=None, conf=None) -> None: self.hintsAsked = False self.forcedStop = False self.workonme = True + # default site for -localonly option + self.site = pywikibot.Site() def getFoundDisambig(self, site): """Return the first disambiguation found. @@ -1129,7 +1136,7 @@ def check_page(self, page, counter) -> None: # must be behind the page.isRedirectPage() part # otherwise a redirect error would be raised - if page_empty_check(page): + if self.page_empty_check(page): self.conf.remove.append(str(page)) self.conf.note(f'{page} is empty. Skipping.') if page == self.origin: @@ -1512,32 +1519,55 @@ def process_limit_two(self, new, updated) -> None: break def process_unlimited(self, new, updated) -> None: - """Post process unlimited.""" - for (site, page) in new.items(): - # if we have an account for this site - if site.family.name in config.usernames \ - and site.code in config.usernames[site.family.name] \ - and not site.has_data_repository: - # Try to do the changes - try: - if self.replaceLinks(page, new): - # Page was changed - updated.append(site) - except SaveError: - pass - except GiveUpOnPage: - break + """Post-process pages: replace links and track updated sites.""" + for site, page in new.items(): + if site.has_data_repository: + self.conf.note( + f'{site} has a data repository, skipping {page}' + ) + continue - def replaceLinks(self, page, newPages) -> bool: - """Return True if saving was successful.""" - # In this case only continue on the Page we started with - if self.conf.localonly and page != self.origin: - raise SaveError('-localonly and page != origin') + # Check if a username is configured for this site + codes = config.usernames.get(site.family.name, []) + if site.code not in codes: + pywikibot.warning( + f'username for {site} is not given in your user-config.py' + ) + continue + + # Try to do the changes + try: + changed = self.replaceLinks(page, new) + except SaveError: + continue + except GiveUpOnPage: + break + + if changed: + updated.append(site) + + def _fetch_text(self, page: pywikibot.Page) -> str: + """Validate page and load its content for editing. + + This includes checking for: + - `-localonly` flag and whether the page is on default site + - Section-only pages (pages with `#section`) + - Non-existent pages + - Empty pages + + :param page: The page to check. + :return: The text content of the page if it passes all checks. + :raises SaveError: If the page is not eligible for editing. + """ + # In this case only continue on the Page if on default site + if self.conf.localonly and page.site != self.site: + raise SaveError(f'-localonly: {page} is on site {page.site}; ' + f'only {self.site} is accepted with this option.') if page.section(): # This is not a page, but a subpage. Do not edit it. - pywikibot.info( - f'Not editing {page}: not doing interwiki on subpages') + pywikibot.info(f'Not editing {page}: interwiki not done on' + ' subpages (#section)') raise SaveError('Link has a #section') try: @@ -1546,10 +1576,16 @@ def replaceLinks(self, page, newPages) -> bool: pywikibot.info(f'Not editing {page}: page does not exist') raise SaveError("Page doesn't exist") - if page_empty_check(page): + if self.page_empty_check(page): pywikibot.info(f'Not editing {page}: page is empty') raise SaveError('Page is empty.') + return pagetext + + def replaceLinks(self, page, newPages) -> bool: + """Return True if saving was successful.""" + pagetext = self._fetch_text(page) + # clone original newPages dictionary, so that we can modify it to the # local page's needs new = newPages.copy() @@ -1788,6 +1824,34 @@ def reportBacklinks(new, updatedSites) -> None: pywikibot.warning(f'{page.site.family.name}: {page} links ' f'to incorrect {linkedPage}') + @staticmethod + def page_empty_check(page: pywikibot.Page) -> bool: + """Return True if page should be skipped as it is almost empty. + + Pages in content namespaces are considered empty if they contain + fewer than 50 characters, and other pages are considered empty if + they are not category pages and contain fewer than 4 characters + excluding interlanguage links and categories. + """ + try: + txt = page.text + except SectionError: + # Section doesn't exist — treat page as empty + return True + + # Check if the page is in content namespace + if page.namespace().content: + # Check if the page contains at least 50 characters + return len(txt) < 50 + + if not page.is_categorypage(): + site = page.site + txt = textlib.removeLanguageLinks(txt, site=site) + txt = textlib.removeCategoryLinks(txt, site=site) + return len(txt.strip()) < 4 + + return False + class InterwikiBot: @@ -2100,28 +2164,6 @@ def botMayEdit(page) -> bool: return True -def page_empty_check(page) -> bool: - """Return True if page should be skipped as it is almost empty. - - Pages in content namespaces are considered empty if they contain - less than 50 characters, and other pages are considered empty if - they are not category pages and contain less than 4 characters - excluding interlanguage links and categories. - """ - txt = page.text - # Check if the page is in content namespace - if page.namespace().content: - # Check if the page contains at least 50 characters - return len(txt) < 50 - - if not page.is_categorypage(): - txt = textlib.removeLanguageLinks(txt, site=page.site) - txt = textlib.removeCategoryLinks(txt, site=page.site) - return len(txt) < 4 - - return False - - class InterwikiDumps(OptionHandler): """Handle interwiki dumps.""" diff --git a/scripts/listpages.py b/scripts/listpages.py index 5226ae6864..c03c2092be 100755 --- a/scripts/listpages.py +++ b/scripts/listpages.py @@ -6,64 +6,66 @@ These parameters are supported to specify which pages titles to print: --format Defines the output format. +-format [int|str] Defines the output format. Can be a custom string according to python string.format() - notation or can be selected by a number from following list - (1 is default format): + notation or can be selected by a number from the following + list (1 is default format): - 1 - '{num:4d} {page.title}' - --> 10 PageTitle + ``1 - '{num:4d} {page.title}'`` + → 10 PageTitle - 2 - '{num:4d} [[{page.title}]]' - --> 10 [[PageTitle]] + ``2 - '{num:4d} [[{page.title}]]'`` + → 10 [[PageTitle]] - 3 - '{page.title}' - --> PageTitle + ``3 - '{page.title}'`` + → PageTitle - 4 - '[[{page.title}]]' - --> [[PageTitle]] + ``4 - '[[{page.title}]]'`` + → [[PageTitle]] - 5 - '{num:4d} <>{page.loc_title:<40}<>' - --> 10 localised_Namespace:PageTitle (colorised in lightred) + ``5 - '{num:4d} <>{page.loc_title:<40}<>'`` + → 10 localised_Namespace:PageTitle (colorised in lightred) - 6 - '{num:4d} {page.loc_title:<40} {page.can_title:<40}' - --> 10 localised_Namespace:PageTitle - canonical_Namespace:PageTitle + ``6 - '{num:4d} {page.loc_title:<40} {page.can_title:<40}'`` + → 10 localised_Namespace:PageTitle + canonical_Namespace:PageTitle - 7 - '{num:4d} {page.loc_title:<40} {page.trs_title:<40}' - --> 10 localised_Namespace:PageTitle - outputlang_Namespace:PageTitle - (*) requires "outputlang:lang" set. + ``7 - '{num:4d} {page.loc_title:<40} {page.trs_title:<40}'`` + → 10 localised_Namespace:PageTitle + outputlang_Namespace:PageTitle - num is the sequential number of the listed page. + .. important:: Requires ``outputlang:lang`` set, see + below. - An empty format is equal to ``-notitle`` and just shows the - total amount of pages. + ``num`` is the sequential number of the listed page. + + .. hint:: An empty format is equal to ``-notitle`` and just + shows the total number of pages. -outputlang - Language for translation of namespaces. + [str] Language for translation of namespaces. -notitle Page title is not printed. -get Page content is printed. --tofile Save Page titles to a single file. File name can be set - with -tofile:filename or -tofile:dir_name/filename. +-tofile [str] Save Page titles to a single file. File name can be + set with ``-tofile:filename`` or ``-tofile:dir_name/filename``. -save Save Page content to a file named as :code:`page.title(as_filename=True)`. Directory can be set with ``-save:dir_name``. If no dir is specified, current directory will be used. --encode File encoding can be specified with '-encode:name' (name - must be a valid python encoding: utf-8, etc.). If not +-encode [str] File encoding can be specified with ``-encode:name`` + (name must be a valid python encoding: utf-8, etc.). If not specified, it defaults to :code:`config.textfile_encoding`. -put: [str] Save the list to the defined page of the wiki. By default it does not overwrite an existing page. --overwrite Overwrite the page if it exists. Can only by applied with +-overwrite Overwrite the page if it exists. Can only be applied with -put. -summary: [str] The summary text when the page is written. If it's one @@ -99,7 +101,7 @@ ¶ms; """ # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # diff --git a/scripts/maintenance/addwikis.py b/scripts/maintenance/addwikis.py index b7cfaf3ba1..11784f4659 100755 --- a/scripts/maintenance/addwikis.py +++ b/scripts/maintenance/addwikis.py @@ -7,9 +7,13 @@ .. versionadded:: 9.2 +.. versionchanged:: 10.4 + The options ``-h``, ``-help`` and ``--help`` display the help message. +.. deprecated:: 10.4 + The ``help`` option """ # -# (C) Pywikibot team, 2024 +# (C) Pywikibot team, 2024-2025 # # Distributed under the terms of the MIT license. # @@ -20,7 +24,9 @@ from pathlib import Path import pywikibot +from pywikibot.exceptions import ArgumentDeprecationWarning from pywikibot.family import Family +from pywikibot.tools import issue_deprecation_warning # supported families by this script @@ -87,7 +93,14 @@ def main(*args: str) -> None: for arg in args: if arg.startswith('-family'): family = arg.split(':')[1] - elif arg == 'help': + elif arg in ('help', '-h', '-help', '--help'): + if arg == 'help': + issue_deprecation_warning( + "'help' option", + "'-h', '-help' or '--help'", + since='10.4.0', + warning_class=ArgumentDeprecationWarning + ) pywikibot.show_help() return else: diff --git a/scripts/misspelling.py b/scripts/misspelling.py index 12edc75ed7..212a1698ed 100755 --- a/scripts/misspelling.py +++ b/scripts/misspelling.py @@ -20,17 +20,17 @@ given, it starts at the beginning. """ # -# (C) Pywikibot team, 2007-2024 +# (C) Pywikibot team, 2007-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations from itertools import chain -from typing import Generator import pywikibot from pywikibot import i18n, pagegenerators +from pywikibot.backports import Generator try: @@ -62,11 +62,20 @@ class MisspellingRobot(BaseDisambigBot): # Optional: if there is a category, one can use the -start parameter misspelling_categories = ('Q8644265', 'Q9195708') - update_options = {'start': None} + update_options = {'start': None, 'page': None} @property def generator(self) -> Generator[pywikibot.Page]: """Generator to retrieve misspelling pages or misspelling redirects.""" + # If a single page is specified, yield that page directly + if self.opt.page: + page = pywikibot.Page(self.site, self.opt.page) + if page.exists(): + yield page + else: + pywikibot.error(f"Page '{self.opt.page}' does not exist.") + return + templates = self.misspelling_templates.get(self.site.sitename) categories = [cat for cat in (self.site.page_from_repository(item) for item in self.misspelling_categories) @@ -173,6 +182,9 @@ def main(*args: str) -> None: 'At which page do you want to start?') elif opt == 'main': options[opt] = True + elif opt == 'page': + options[opt] = value or pywikibot.input( + 'Which page do you want to process?') bot = MisspellingRobot(**options) bot.run() diff --git a/scripts/pagefromfile.py b/scripts/pagefromfile.py index 29bf99a225..76bcd91536 100755 --- a/scripts/pagefromfile.py +++ b/scripts/pagefromfile.py @@ -96,9 +96,19 @@ class NoTitleError(Exception): """No title found.""" - def __init__(self, offset) -> None: - """Initializer.""" + def __init__(self, offset: int, source: str | None = None) -> None: + """Initializer. + + .. versionchanged:: 10.7 + *source* was added; a message was passed to Exception super + class. + """ self.offset = offset + self.source = source + message = f'No title found at offset {offset}' + if source: + message += f' in {source!r}' + super().__init__(message) class PageFromFileRobot(SingleSiteBot, CurrentPageBot): @@ -249,7 +259,7 @@ def generator(self) -> Generator[pywikibot.Page, None, None]: break except NoTitleError as err: - pywikibot.info('\nNo title found - skipping a page.') + pywikibot.info('\n{err} - skipping a page.') text = text[err.offset:] else: page = pywikibot.Page(self.site, title) diff --git a/scripts/protect.py b/scripts/protect.py index 4efbbd498f..b462e7a056 100755 --- a/scripts/protect.py +++ b/scripts/protect.py @@ -56,7 +56,7 @@ # # Created by modifying delete.py # -# (C) Pywikibot team, 2008-2023 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -172,11 +172,11 @@ def main(*args: str) -> None: site = pywikibot.Site() generator_type = None - protection_levels = site.protection_levels() + protection_levels = site.restrictions['levels'] if '' in protection_levels: protection_levels.add('all') - protection_types = site.protection_types() + protection_types = site.restrictions['types'] gen_factory = pagegenerators.GeneratorFactory() for arg in local_args: option, sep, value = arg.partition(':') diff --git a/scripts/pyproject.toml b/scripts/pyproject.toml index f2d70b88d1..67f846a15f 100644 --- a/scripts/pyproject.toml +++ b/scripts/pyproject.toml @@ -7,7 +7,7 @@ package-dir = {"pywikibot_scripts" = "scripts"} [project] name = "pywikibot-scripts" -version = "10.3.0" +version = "10.7.0" authors = [ {name = "xqt", email = "info@gno.de"}, @@ -19,7 +19,7 @@ description = "Pywikibot Scripts Collection" readme = "scripts/README.rst" requires-python = ">=3.8.0" dependencies = [ - "pywikibot >= 10.0.0", + "pywikibot >= 10.5.0", "isbnlib", "langdetect", "mwparserfromhell", diff --git a/scripts/redirect.py b/scripts/redirect.py index a382b4b132..f58b3c1fdf 100755 --- a/scripts/redirect.py +++ b/scripts/redirect.py @@ -71,7 +71,7 @@ ¶ms; """ # -# (C) Pywikibot team, 2004-2024 +# (C) Pywikibot team, 2004-2025 # # Distributed under the terms of the MIT license. # @@ -80,11 +80,12 @@ import datetime from contextlib import suppress from textwrap import fill -from typing import Any, Generator +from typing import Any import pywikibot import pywikibot.data from pywikibot import i18n, pagegenerators, xmlreader +from pywikibot.backports import Generator from pywikibot.bot import ExistingPageBot, OptionHandler, suggest_help from pywikibot.exceptions import ( CircularRedirectError, diff --git a/scripts/replace.py b/scripts/replace.py index 7414bfc834..176da68d1e 100755 --- a/scripts/replace.py +++ b/scripts/replace.py @@ -1006,7 +1006,9 @@ def main(*args: str) -> None: # The summary stored here won't be actually used but is only an example site = pywikibot.Site() - single_summary = None + single_summary = ( + 'Not needed' if edit_summary and edit_summary is not True else None + ) for old, new in batched(commandline_replacements, 2): replacement = Replacement(old, new) if not single_summary: diff --git a/scripts/revertbot.py b/scripts/revertbot.py index 64fe498fa1..015871b027 100755 --- a/scripts/revertbot.py +++ b/scripts/revertbot.py @@ -36,12 +36,14 @@ def callback(self, item) -> bool: return False """ # -# (C) Pywikibot team, 2008-2024 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations +from textwrap import fill + import pywikibot from pywikibot import i18n from pywikibot.backports import Container @@ -84,7 +86,8 @@ def revert_contribs(self, callback=None) -> None: if callback(item): result = self.revert(item) if result: - pywikibot.info(f"{item['title']}: {result}") + pywikibot.info( + fill(f"{item['title']}: {result}", width=77)) else: pywikibot.info(f"Skipped {item['title']}") else: @@ -134,17 +137,17 @@ def revert(self, item) -> str | bool: return comment try: - self.site.rollbackpage(page, user=self.user, markbot=True) + result = page.rollback(user=self.user) except APIError as e: if e.code == 'badtoken': pywikibot.error( - 'There was an API token error rollbacking the edit') + 'There was an API token error rolling back the edit') return False except Error: pass else: - return (f'The edit(s) made in {page.title()} by {self.user}' - ' was rollbacked') + return (f'The edit(s) made in {result["title"]} by {self.user} ' + f'was rolled back to revision {result["last_revid"]}') pywikibot.exception(exc_info=False) return False diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py index 6684e18beb..c2896f7969 100755 --- a/scripts/solve_disambiguation.py +++ b/scripts/solve_disambiguation.py @@ -83,12 +83,12 @@ from contextlib import suppress from itertools import chain from pathlib import Path -from typing import Generator import pywikibot from pywikibot import config from pywikibot import editor as editarticle from pywikibot import i18n, pagegenerators +from pywikibot.backports import Generator from pywikibot.bot import ( HighlightContextOption, ListOption, diff --git a/scripts/tracking_param_remover.py b/scripts/tracking_param_remover.py new file mode 100755 index 0000000000..623f1cd6e8 --- /dev/null +++ b/scripts/tracking_param_remover.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +"""Script to remove tracking URL query parameters from external URLs. + +These command line parameters can be used to specify which pages to work +on: + +¶ms; + +Furthermore, the following command line parameters are supported: + +-always Don't prompt for each removal + +.. versionadded:: 10.3 +""" +# +# (C) Pywikibot team, 2025 +# +# Distributed under the terms of the MIT license. +# +from __future__ import annotations + +import re +import urllib + +import mwparserfromhell + +import pywikibot +from pywikibot import pagegenerators +from pywikibot.bot import AutomaticTWSummaryBot, ExistingPageBot, SingleSiteBot + + +docuReplacements = { # noqa: N816 + '¶ms;': pagegenerators.parameterHelp, +} + +KNOWN_TRACKER_PARAMS = [ + 'utm_.+', # universal + 'fbclid', # Facebook + 'gad_.+', # Google + 'gclid', # Google + '[gw]braid', # Google + 'li_fat_id', # LinkedIn + 'mc_.+', # Mailchimp + 'pk_.+', # Matomo / Piwik + 'msclkid', # Microsoft + 'epik', # Pinterest + 'scid', # Snapchat + 'ttclid', # TikTok + 'twclid', # Twitter / X + 'vero_.+', # Vero + 'wprov', # Wikimedia / MediaWiki + '_openstat', # Yandex + 'yclid', # Yandex + 'si', # YouTube, Spotify +] + +KNOWN_TRACKER_REGEX = re.compile(rf'({"|".join(KNOWN_TRACKER_PARAMS)})') + + +class TrackingParamRemoverBot( + SingleSiteBot, + AutomaticTWSummaryBot, + ExistingPageBot +): + + """Bot to remove tracking URL parameters.""" + + summary_key = 'tracking_param_remover-removing' + + @staticmethod + def remove_tracking_params(url: urllib.parse.ParseResult) -> str: + """Remove tracking query parameters if they are present. + + :param url: The URL to check + :returns: URL as string + """ + filtered_params = [] + + tracker_present = False + for k, v in urllib.parse.parse_qsl(url.query, keep_blank_values=True): + if KNOWN_TRACKER_REGEX.fullmatch(k): + tracker_present = True + else: + filtered_params.append((k, v)) + + if not tracker_present: + # Return the original URL if no tracker parameters were present + return urllib.parse.urlunparse(url) + + new_query = urllib.parse.urlencode(filtered_params) + + new_url = urllib.parse.urlunparse(url._replace(query=new_query)) + + return new_url + + def treat_page(self) -> None: + """Treat a page.""" + wikicode = mwparserfromhell.parse(self.current_page.text) + + for link in wikicode.ifilter_external_links(): + parsed_url = urllib.parse.urlparse(str(link.url)) + if not parsed_url.query: + continue + tracking_params_removed = self.remove_tracking_params(parsed_url) + if urllib.parse.urlunparse(parsed_url) == tracking_params_removed: + # Continue if no parameters were removed + continue + wikicode.replace(link.url, tracking_params_removed) + + self.put_current(wikicode) + + +def main(*args: str) -> None: + """Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + :param args: command line arguments + """ + options = {} + + # Process global args and prepare generator args parser + local_args = pywikibot.handle_args(args) + gen_factory = pagegenerators.GeneratorFactory() + script_args = gen_factory.handle_args(local_args) + + for arg in script_args: + opt, _, value = arg.partition(':') + if opt == '-always': + options['always'] = True + + site = pywikibot.Site() + + gen = gen_factory.getCombinedGenerator(preload=True) + bot = TrackingParamRemoverBot(generator=gen, **options) + site.login() + bot.run() + + +if __name__ == '__main__': + main() diff --git a/scripts/watchlist.py b/scripts/watchlist.py index c6d1a61193..b2fb03f0a4 100755 --- a/scripts/watchlist.py +++ b/scripts/watchlist.py @@ -25,7 +25,7 @@ watchlist is retrieved in parallel tasks. """ # -# (C) Pywikibot team, 2005-2024 +# (C) Pywikibot team, 2005-2025 # # Distributed under the terms of the MIT license. # @@ -38,7 +38,12 @@ import pywikibot from pywikibot import config from pywikibot.data.api import CachedRequest -from pywikibot.exceptions import InvalidTitleError +from pywikibot.exceptions import ( + APIError, + InvalidTitleError, + NoUsernameError, + ServerError, +) from pywikibot.tools.threading import BoundedPoolExecutor @@ -72,8 +77,13 @@ def count_watchlist_all(quiet=False) -> None: futures = {executor.submit(refresh, pywikibot.Site(lang, family)) for family in config.usernames for lang in config.usernames[family]} - wl_count_all = sum(len(future.result()) - for future in as_completed(futures)) + wl_count_all = 0 + for future in as_completed(futures): + try: + watchlist_pages = future.result() + wl_count_all += len(watchlist_pages) + except (NoUsernameError, APIError, ServerError) as e: + pywikibot.error(f'Failed to retrieve watchlist: {e}') if not quiet: pywikibot.info(f'There are a total of {wl_count_all} page(s) in the' ' watchlists for all wikis.') diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py index e4bca2a42f..4da1a67cfa 100755 --- a/scripts/weblinkchecker.py +++ b/scripts/weblinkchecker.py @@ -169,18 +169,10 @@ # Ignore links containing * in domain name # as they are intentionally fake re.compile(r'https?\:\/\/\*(/.*)?'), -] - -def get_archive_url(url): - """Get archive URL.""" - try: - return get_closest_memento_url( - url, timegate_uri='http://web.archive.org/web/') - except Exception: - return get_closest_memento_url( - url, - timegate_uri='http://timetravel.mementoweb.org/webcite/timegate/') + # properly formatted mailto links: no further checking possible + re.compile(r'mailto:[^@]+@[a-z0-9\.]+(\?.*)?'), +] def weblinks_from_text( @@ -251,7 +243,8 @@ class LinkCheckThread(threading.Thread): hosts: dict[str, float] = {} lock = threading.Lock() - def __init__(self, page, url, history, http_ignores, day) -> None: + def __init__(self, page, url: str, history: History, + http_ignores: list[int], day: int) -> None: """Initializer.""" self.page = page self.url = url @@ -341,7 +334,8 @@ class History: } """ - def __init__(self, report_thread, site=None) -> None: + def __init__(self, report_thread: DeadLinkReportThread | None, + site: pywikibot._BaseSite | None = None) -> None: """Initializer.""" self.report_thread = report_thread if not site: @@ -405,7 +399,7 @@ def set_dead_link(self, url, error, page, weblink_dead_days) -> None: if time_since_first_found > 60 * 60 * 24 * weblink_dead_days: # search for archived page try: - archive_url = get_archive_url(url) + archive_url = get_closest_memento_url(url) except Exception as e: pywikibot.warning( f'get_closest_memento_url({url}) failed: {e}') @@ -539,7 +533,8 @@ class WeblinkCheckerRobot(SingleSiteBot, ExistingPageBot): use_redirects = False - def __init__(self, http_ignores=None, day: int = 7, **kwargs) -> None: + def __init__(self, http_ignores: list[int] | None = None, + day: int = 7, **kwargs) -> None: """Initializer.""" super().__init__(**kwargs) @@ -571,8 +566,9 @@ def treat_page(self) -> None: # thread dies when program terminates thread.daemon = True # use hostname as thread.name - thread.name = removeprefix( - urlparse.urlparse(url).hostname, 'www.') + hostname = urlparse.urlparse(url).hostname + if hostname is not None: + thread.name = removeprefix(hostname, 'www.') self.threads.append(thread) def teardown(self) -> None: diff --git a/setup.py b/setup.py index 2ad8803f2a..806f3ec2e2 100755 --- a/setup.py +++ b/setup.py @@ -46,8 +46,18 @@ 'mysql': ['PyMySQL >= 1.1.1'], # vulnerability found in Pillow<8.1.2 but toolforge uses 5.4.1 'Tkinter': [ - 'Pillow>=11.1.0; python_version > "3.8"', - 'Pillow==10.4.0; python_version < "3.9"', + 'Pillow==10.4.0; platform_python_implementation == "PyPy" ' + 'and python_version < "3.9"', + 'Pillow>=11.1.0,<11.3.0; platform_python_implementation == "PyPy" ' + 'and python_version >= "3.9" and python_version < "3.11"', + 'Pillow>=11.1.0; platform_python_implementation == "PyPy" ' + 'and python_version >= "3.11"', + 'Pillow==10.4.0; platform_python_implementation != "PyPy" ' + 'and python_version < "3.9"', + 'Pillow>=11.1.0,<11.3.0; platform_python_implementation != "PyPy" ' + 'and python_version == "3.9"', + 'Pillow>=11.1.0; platform_python_implementation != "PyPy" ' + 'and python_version >= "3.10"', ], 'mwoauth': [ 'PyJWT != 2.10.0, != 2.10.1; python_version > "3.8"', # T380270 @@ -78,17 +88,6 @@ 'requests>=2.31.0', ] -# ------- setup tests_require ------- # -test_deps = [] - -# Add all dependencies as test dependencies, -# so all scripts can be compiled for script_tests, etc. -if 'PYSETUP_TEST_EXTRAS' in os.environ: # pragma: no cover - test_deps += [i for v in extra_deps.values() for i in v] - -# These extra dependencies are needed other unittest fails to load tests. -test_deps += extra_deps['eventstreams'] - class _DottedDict(dict): __getattr__ = dict.__getitem__ @@ -106,6 +105,12 @@ def read_project() -> str: .. versionadded:: 9.0 """ + if sys.version_info >= (3, 11): + import tomllib + with open(path / 'pyproject.toml', 'rb') as f: + data = tomllib.load(f) + return data['project']['name'] + toml = [] with open(path / 'pyproject.toml') as f: for line in f: @@ -174,7 +179,7 @@ def get_validated_version(name: str) -> str: # pragma: no cover if warning: print(__doc__) - print('\n\n{warning}') + print(f'\n\n{warning}') sys.exit('\nBuild of distribution package canceled.') return version @@ -230,8 +235,6 @@ def main() -> None: # pragma: no cover include_package_data=True, install_requires=dependencies, extras_require=extra_deps, - test_suite='tests.collector', - tests_require=test_deps, ) diff --git a/tests/add_text_tests.py b/tests/add_text_tests.py index e032cb1980..7c26c71c50 100755 --- a/tests/add_text_tests.py +++ b/tests/add_text_tests.py @@ -46,7 +46,7 @@ class TestAddTextScript(TestCase): dry = True def setUp(self) -> None: - """Setup test.""" + """Set up test.""" super().setUp() pywikibot.bot.ui.clear() self.generator_factory = pywikibot.pagegenerators.GeneratorFactory() diff --git a/tests/api_tests.py b/tests/api_tests.py index 81f54bf379..eddd212b7f 100755 --- a/tests/api_tests.py +++ b/tests/api_tests.py @@ -294,9 +294,10 @@ class TestOptionSet(TestCase): def test_non_lazy_load(self) -> None: """Test OptionSet with initialised site.""" options = api.OptionSet(self.get_site(), 'recentchanges', 'show') - with self.assertRaises(KeyError): + with self.assertRaisesRegex(KeyError, 'Invalid name "invalid_name"'): options.__setitem__('invalid_name', True) - with self.assertRaises(ValueError): + with self.assertRaisesRegex( + ValueError, 'Invalid value "invalid_value"'): options.__setitem__('anon', 'invalid_value') options['anon'] = True self.assertCountEqual(['anon'], options._enabled) @@ -324,13 +325,17 @@ def test_lazy_load(self) -> None: options['anon'] = True self.assertIn('invalid_name', options._enabled) self.assertLength(options, 2) - with self.assertRaises(KeyError): + with self.assertRaisesRegex( + KeyError, + r'OptionSet already contains invalid name\(s\) "invalid_name"' + ): options._set_site(self.get_site(), 'recentchanges', 'show') self.assertLength(options, 2) options._set_site(self.get_site(), 'recentchanges', 'show', clear_invalid=True) self.assertLength(options, 1) - with self.assertRaises(TypeError): + with self.assertRaisesRegex( + TypeError, 'The site cannot be set multiple times.'): options._set_site(self.get_site(), 'recentchanges', 'show') @@ -529,7 +534,8 @@ def test_many_continuations_limited(self) -> None: gen = api.PropertyGenerator( site=self.site, prop='revisions|info|categoryinfo|langlinks|templates', - parameters=params) + parameters=params + ) # An APIError is raised if set_maximum_items is not called. gen.set_maximum_items(-1) # suppress use of "rvlimit" parameter @@ -598,11 +604,6 @@ def setUp(self) -> None: 'limit': {'max': 10}, 'namespace': {'multi': True} } - self.site._paraminfo['query+alllinks'] = { - 'prefix': 'al', - 'limit': {'max': 10}, - 'namespace': {'default': 0} - } self.site._paraminfo['query+links'] = { 'prefix': 'pl', } @@ -628,17 +629,13 @@ def test_namespace_param_is_not_settable(self) -> None: def test_namespace_none(self) -> None: """Test ListGenerator set_namespace with None.""" - self.gen = api.ListGenerator(listaction='alllinks', site=self.site) - with self.assertRaises(TypeError): + self.gen = api.ListGenerator(listaction='allpages', site=self.site) + with self.assertRaisesRegex( + TypeError, + (r'int\(\) argument must be a string, a bytes-like object ' + r"or (?:a real number|a number), not 'NoneType'")): self.gen.set_namespace(None) - def test_namespace_non_multi(self) -> None: - """Test ListGenerator set_namespace when non multi.""" - self.gen = api.ListGenerator(listaction='alllinks', site=self.site) - with self.assertRaises(TypeError): - self.gen.set_namespace([0, 1]) - self.assertIsNone(self.gen.set_namespace(0)) - def test_namespace_multi(self) -> None: """Test ListGenerator set_namespace when multi.""" self.gen = api.ListGenerator(listaction='allpages', site=self.site) @@ -649,7 +646,7 @@ def test_namespace_resolve_failed(self) -> None: """Test ListGenerator set_namespace when resolve fails.""" self.gen = api.ListGenerator(listaction='allpages', site=self.site) self.assertTrue(self.gen.support_namespace()) - with self.assertRaises(KeyError): + with self.assertRaisesRegex(KeyError, '10000'): self.gen.set_namespace(10000) @@ -675,7 +672,10 @@ def setUp(self) -> None: def test_namespace_none(self) -> None: """Test ListGenerator set_namespace with None.""" - with self.assertRaises(TypeError): + with self.assertRaisesRegex( + TypeError, + (r'int\(\) argument must be a string, a bytes-like object ' + r"or (?:a real number|a number), not 'NoneType'")): self.gen.set_namespace(None) def test_namespace_zero(self) -> None: diff --git a/tests/archivebot_tests.py b/tests/archivebot_tests.py index d580b62075..7b78a3d03e 100755 --- a/tests/archivebot_tests.py +++ b/tests/archivebot_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for archivebot scripts.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -94,15 +94,22 @@ def test_str2size(self) -> None: def test_str2size_failures(self) -> None: """Test for rejecting of invalid shorthand notation of sizes.""" - with self.assertRaises(archivebot.MalformedConfigError): + with self.assertRaisesRegex( + archivebot.MalformedConfigError, "Couldn't parse size: 4 KK"): archivebot.str2size('4 KK') - with self.assertRaises(archivebot.MalformedConfigError): + with self.assertRaisesRegex( + archivebot.MalformedConfigError, "Couldn't parse size: K4"): archivebot.str2size('K4') - with self.assertRaises(archivebot.MalformedConfigError): + with self.assertRaisesRegex( + archivebot.MalformedConfigError, "Couldn't parse size: 4X"): archivebot.str2size('4X') - with self.assertRaises(archivebot.MalformedConfigError): + with self.assertRaisesRegex( + archivebot.MalformedConfigError, + "Couldn't parse size: 1 234 56"): archivebot.str2size('1 234 56') - with self.assertRaises(archivebot.MalformedConfigError): + with self.assertRaisesRegex( + archivebot.MalformedConfigError, + "Couldn't parse size: 1234 567"): archivebot.str2size('1234 567') @@ -123,15 +130,15 @@ def test_archivebot(self, code=None) -> None: talk = archivebot.DiscussionPage(page, None) self.assertIsInstance(talk.archives, dict) self.assertIsInstance(talk.archived_threads, int) - self.assertTrue(talk.archiver is None) + self.assertIsNone(talk.archiver) self.assertIsInstance(talk.header, str) self.assertIsInstance(talk.timestripper, TimeStripper) self.assertIsInstance(talk.threads, list) self.assertGreaterEqual( len(talk.threads), THREADS[code], - f'{len(talk.threads)} Threads found on {talk},\n{THREADS[code]} or' - ' more expected' + f'{len(talk.threads)} Threads found on {talk},\n' + f'{THREADS[code]} or more expected' ) for thread in talk.threads: @@ -182,7 +189,7 @@ def test_archivebot(self, code=None) -> None: talk = archivebot.DiscussionPage(page, None) self.assertIsInstance(talk.archives, dict) self.assertIsInstance(talk.archived_threads, int) - self.assertTrue(talk.archiver is None) + self.assertIsNone(talk.archiver) self.assertIsInstance(talk.header, str) self.assertIsInstance(talk.timestripper, TimeStripper) @@ -344,7 +351,9 @@ def testLoadConfigInOtherNamespace(self) -> None: except Error as e: # pragma: no cover self.fail(f'PageArchiver() raised {e}!') - with self.assertRaises(archivebot.MissingConfigError): + with self.assertRaisesRegex( + archivebot.MissingConfigError, + 'Missing or malformed template'): archivebot.PageArchiver(page, tmpl_without_ns, '') diff --git a/tests/aspects.py b/tests/aspects.py index fa4203f858..34a6ea1160 100644 --- a/tests/aspects.py +++ b/tests/aspects.py @@ -15,6 +15,7 @@ import re import sys import time +import types import unittest import warnings from collections.abc import Sized @@ -37,6 +38,7 @@ from pywikibot.family import WikimediaFamily from pywikibot.site import BaseSite from pywikibot.tools import ( # noqa: F401 (used by eval()) + PYTHON_VERSION, MediaWikiVersion, suppress_warnings, ) @@ -63,6 +65,151 @@ pywikibot.bot.set_interface('buffer') +class Python314AssertionsMixin: # pragma: no cover + + """Mixin providing assertion methods added in Python 3.14 for unittest. + + This mixin ensures TestCase compatibility on older Python versions. + + The mixin will be removed without deprecation period once Python 3.14 + becomes the minimum requirement for Pywikibot, likely with Pywikibot 16. + + .. versionadded:: 10.3 + """ + + def assertStartsWith(self, s: str, prefix: str, + msg: str | None = None) -> None: + """Fail if the string *s* does not start with *prefix*. + + :param s: The string to check. + :param prefix: The expected prefix. + :param msg: Optional custom failure message. + """ + if s.startswith(prefix): + return + + variant = 'any of ' if isinstance(prefix, tuple) else '' + default_msg = f'{s!r} does not start with {variant}{prefix!r}' + self.fail(self._formatMessage(msg, default_msg)) + + def assertNotStartsWith(self, s: str, prefix: str, + msg: str | None = None) -> None: + """Fail if the string *s* starts with *prefix*. + + :param s: The string to check. + :param prefix: The unwanted prefix. + :param msg: Optional custom failure message. + """ + if not s.startswith(prefix): + return + + default_msg = f'{s!r} starts with {prefix!r}' + self.fail(self._formatMessage(msg, default_msg)) + + def assertEndsWith(self, s: str, suffix: str, + msg: str | None = None) -> None: + """Fail if the string *s* does not end with *suffix*. + + :param s: The string to check. + :param suffix: The expected suffix. + :param msg: Optional custom failure message. + """ + if s.endswith(suffix): + return + + variant = 'any of ' if isinstance(suffix, tuple) else '' + default_msg = f'{s!r} does not end with {variant}{suffix!r}' + self.fail(self._formatMessage(msg, default_msg)) + + def assertNotEndsWith(self, s: str, suffix: str, + msg: str | None = None) -> None: + """Fail if the string *s* ends with *suffix*. + + :param s: The string to check. + :param suffix: The unwanted suffix. + :param msg: Optional custom failure message. + """ + if not s.endswith(suffix): + return + + default_msg = f'{s!r} ends with {suffix!r}' + self.fail(self._formatMessage(msg, default_msg)) + + def assertHasAttr(self, obj: object, name: str, + msg: str | None = None) -> None: + """Fail if the object *obj* does not have an attribute *name*. + + :param obj: The object to check. + :param name: The expected attribute name. + :param msg: Optional custom failure message. + """ + if hasattr(obj, name): + return + + if isinstance(obj, types.ModuleType): + obj_name = f'module {obj.__name__!r}' + elif isinstance(obj, type): + obj_name = f'type object {obj.__name__!r}' + else: + obj_name = f'{type(obj).__name__!r}' + + default_msg = f'{obj_name} does not have attribute {name!r}' + self.fail(self._formatMessage(msg, default_msg)) + + def assertNotHasAttr(self, obj: object, name: str, + msg: str | None = None) -> None: + """Fail if the object *obj* has an attribute *name*. + + :param obj: The object to check. + :param name: The unwanted attribute name. + :param msg: Optional custom failure message. + """ + if not hasattr(obj, name): + return + + if isinstance(obj, types.ModuleType): + obj_name = f'module {obj.__name__!r}' + elif isinstance(obj, type): + obj_name = f'type object {obj.__name__!r}' + else: + obj_name = f'{type(obj).__name__!r}' + + default_msg = f'{obj_name} has attribute {name!r}' + self.fail(self._formatMessage(msg, default_msg)) + + def assertIsSubclass(self, cls: type, superclass: type | tuple[type, ...], + msg: str | None = None) -> None: + """Fail if *cls* is not a subclass of *superclass*. + + :param cls: The class to test. + :param superclass: The expected superclass or tuple of superclasses. + :param msg: Optional custom failure message. + """ + if issubclass(cls, superclass): + return + + default_msg = f'{cls!r} is not a subclass of {superclass!r}' + self.fail(self._formatMessage(msg, default_msg)) + + def assertNotIsSubclass( + self, + cls: type, + superclass: type | tuple[type, ...], + msg: str | None = None + ) -> None: + """Fail if *cls* is a subclass of *superclass*. + + :param cls: The class to test. + :param superclass: The superclass or tuple of superclasses to reject. + :param msg: Optional custom failure message. + """ + if not issubclass(cls, superclass): + return + + default_msg = f'{cls!r} is a subclass of {superclass!r}' + self.fail(self._formatMessage(msg, default_msg)) + + class TestTimerMixin(unittest.TestCase): """Time each test and report excessive durations.""" @@ -86,22 +233,27 @@ def tearDown(self) -> None: sys.stdout.flush() -class TestCaseBase(TestTimerMixin): +# Add Python314AssertionsMixin on Python < 3.14 +if PYTHON_VERSION < (3, 14): + bases = (TestTimerMixin, Python314AssertionsMixin) +else: + bases = (TestTimerMixin, ) + + +class TestCaseBase(*bases): """Base class for all tests.""" def assertIsEmpty(self, seq, msg=None) -> None: """Check that the sequence is empty.""" - self.assertIsInstance( - seq, Sized, SIZED_ERROR) + self.assertIsInstance(seq, Sized, SIZED_ERROR) if seq: msg = self._formatMessage(msg, f'{safe_repr(seq)} is not empty') self.fail(msg) def assertIsNotEmpty(self, seq, msg=None) -> None: """Check that the sequence is not empty.""" - self.assertIsInstance( - seq, Sized, SIZED_ERROR) + self.assertIsInstance(seq, Sized, SIZED_ERROR) if not seq: msg = self._formatMessage(msg, f'{safe_repr(seq)} is empty') self.fail(msg) @@ -109,8 +261,7 @@ def assertIsNotEmpty(self, seq, msg=None) -> None: def assertLength(self, seq, other, msg=None) -> None: """Verify that a sequence seq has the length of other.""" # the other parameter may be given as a sequence too - self.assertIsInstance( - seq, Sized, SIZED_ERROR) + self.assertIsInstance(seq, Sized, SIZED_ERROR) first_len = len(seq) try: second_len = len(other) @@ -292,16 +443,16 @@ def test_requirement(obj): def require_version(version_needed: str, /, reason: str = ''): - """Require minimum MediaWiki version to be queried. + """Skip test unless a minimum MediaWiki version is available. - The version needed for the test; must be given with a preleading rich - comparisons operator like ``<1.31wmf4`` or ``>=1.43``. If the - comparison does not match the test will be skipped. + The required version must include a comparison operator (e.g. + :code:`<1.31wmf4` or :code:`>=1.43`). If the site's version does not + satisfy the condition, the test is skipped. - This decorator can only be used for TestCase having a single site. - It cannot be used for DrySite tests. In addition version comparison - for other than the current site e.g. for the related data or image - repositoy of the current site is ot possible. + This decorator can only be used for :class:`TestCase` having a + single site. It cannot be used for DrySite tests. Version checks are + only supported for the current site — not for related sites like + data or image repositories. .. versionadded:: 8.0 @@ -335,6 +486,8 @@ def wrapper(self, *args, **kwargs): ) try: + # Split version string into operator and version + # (e.g. '>=1.39' → '', '>=', '1.39') site_vers, op, version = re.split('([<>]=?)', version_needed) except ValueError: raise ValueError(f'There is no valid operator given with ' @@ -502,8 +655,8 @@ def setUpClass(cls) -> None: f'{cls.__name__}: accessing {hostname} caused exception:') cls._checked_hostnames[hostname] = e - raise unittest.SkipTest(f'{cls.__name__}: hostname {hostname}' - ' failed: {e}') from None + raise unittest.SkipTest(f'{cls.__name__}: hostname {hostname} ' + f'failed: {e}') from None cls._checked_hostnames[hostname] = True @@ -591,8 +744,8 @@ def setUpClass(cls) -> None: site.login() if not site.user(): - raise unittest.SkipTest( - f'{cls.__name__}: Not able to login to {site}') + raise unittest.SkipTest(f'{cls.__name__}: Not able to login ' + f'{site.username()} to {site}') def setUp(self) -> None: """Set up the test case. @@ -626,7 +779,7 @@ def _reset_login(self, skip_if_login_fails: bool = False) -> None: continue if not site.logged_in(): - site.login() + site.login() # pragma: no cover if skip_if_login_fails and not site.user(): # during setUp() only self.skipTest( @@ -1389,6 +1542,7 @@ class DeprecationTestCase(TestCase): r'(; use .* instead)?\.') source_adjustment_skips = [ + unittest.case._AssertRaisesBaseContext, unittest.case._AssertRaisesContext, TestCase.assertRaises, TestCase.assertRaisesRegex, @@ -1399,10 +1553,6 @@ class DeprecationTestCase(TestCase): # Require an instead string INSTEAD = object() - # Python 3 component in the call stack of _AssertRaisesContext - if hasattr(unittest.case, '_AssertRaisesBaseContext'): - source_adjustment_skips.append(unittest.case._AssertRaisesBaseContext) - def __init__(self, *args, **kwargs) -> None: """Initializer.""" super().__init__(*args, **kwargs) @@ -1513,13 +1663,15 @@ def assertOneDeprecationParts(self, deprecated=None, instead=None, def assertOneDeprecation(self, msg=None, count=1) -> None: """Assert that exactly one deprecation message happened and reset.""" - self.assertDeprecation(msg) - # This is doing such a weird structure, so that it shows any other - # deprecation message from the set. - self.assertCountEqual(set(self.deprecation_messages), - [self.deprecation_messages[0]]) - self.assertLength(self.deprecation_messages, count) - self._reset_messages() + try: + self.assertDeprecation(msg) + # This is doing such a weird structure, so that it shows any other + # deprecation message from the set. + self.assertCountEqual(set(self.deprecation_messages), + [self.deprecation_messages[0]]) + self.assertLength(self.deprecation_messages, count) + finally: + self._reset_messages() def assertNoDeprecation(self, msg=None) -> None: """Assert that no deprecation warning happened.""" @@ -1540,7 +1692,7 @@ def assertDeprecationFile(self, filename) -> None: and 'pywikibot' not in item.filename): continue # pragma: no cover - if item.filename != filename: + if item.filename != filename: # pragma: no cover self.fail(f'expected warning filename {filename}; warning ' f'item: {item}') diff --git a/tests/basepage.py b/tests/basepage.py index 3903067509..a611768ab6 100644 --- a/tests/basepage.py +++ b/tests/basepage.py @@ -1,16 +1,26 @@ """BasePage tests subclasses.""" # -# (C) Pywikibot team, 2015-2022 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations +from abc import ABCMeta, abstractmethod + from pywikibot.page import BasePage from tests.aspects import TestCase -class BasePageTestBase(TestCase): +class ABCTestCaseMeta(ABCMeta, type(TestCase)): + + """Enable abstract methods in TestCase-based base classes. + + .. versionadded:: 10.3 + """ + + +class BasePageTestBase(TestCase, metaclass=ABCTestCaseMeta): """Base of BasePage test classes.""" @@ -19,8 +29,17 @@ class BasePageTestBase(TestCase): def setUp(self) -> None: """Set up test.""" super().setUp() - assert self._page, 'setUp() must create an empty BasePage in _page' - assert isinstance(self._page, BasePage) + self.setup_page() + self.assertIsInstance(self._page, BasePage, + 'setUp() must assign a BasePage to _page, not ' + f'{type(self._page).__name__}') + + @abstractmethod + def setup_page(self) -> None: + """Subclasses must implement this to assign self._page. + + .. versionadded:: 10.3 + """ class BasePageLoadRevisionsCachingTestBase(BasePageTestBase): @@ -38,15 +57,15 @@ class BasePageLoadRevisionsCachingTestBase(BasePageTestBase): def setUp(self) -> None: """Set up test.""" super().setUp() - assert self.cached is False, 'Tests do not support caching' + self.assertFalse(self.cached, 'Tests do not support caching') - def _test_page_text(self, get_text=True) -> None: + def _test_page_text(self) -> None: """Test site.loadrevisions() with .text.""" page = self._page - self.assertFalse(hasattr(page, '_revid')) - self.assertFalse(hasattr(page, '_text')) - self.assertTrue(hasattr(page, '_revisions')) + self.assertNotHasAttr(page, '_revid') + self.assertNotHasAttr(page, '_text') + self.assertHasAttr(page, '_revisions') self.assertFalse(page._revisions) # verify that initializing the page content @@ -57,50 +76,44 @@ def _test_page_text(self, get_text=True) -> None: page._revisions = {} self.site.loadrevisions(page, total=1) - self.assertTrue(hasattr(page, '_revid')) - self.assertTrue(hasattr(page, '_revisions')) + self.assertHasAttr(page, '_revid') + self.assertHasAttr(page, '_revisions') self.assertLength(page._revisions, 1) self.assertIn(page._revid, page._revisions) - self.assertEqual(page._text, custom_text) self.assertEqual(page.text, page._text) + del page.text - self.assertFalse(hasattr(page, '_text')) + self.assertNotHasAttr(page, '_text') self.assertIsNone(page._revisions[page._revid].text) self.assertIsNone(page._latest_cached_revision()) page.text = custom_text - self.site.loadrevisions(page, total=1, content=True) self.assertIsNotNone(page._latest_cached_revision()) self.assertEqual(page._text, custom_text) self.assertEqual(page.text, page._text) + del page.text - self.assertFalse(hasattr(page, '_text')) + + self.assertNotHasAttr(page, '_text') # Verify that calling .text doesn't call loadrevisions again loadrevisions = self.site.loadrevisions try: self.site.loadrevisions = None - if get_text: - loaded_text = page.text - else: # T107537 - with self.assertRaises(NotImplementedError): - page.text - loaded_text = '' + loaded_text = page.text self.assertIsNotNone(loaded_text) - self.assertFalse(hasattr(page, '_text')) + self.assertNotHasAttr(page, '_text') page.text = custom_text - if get_text: - self.assertEqual(page.get(), loaded_text) + self.assertEqual(page.get(), loaded_text) self.assertEqual(page._text, custom_text) self.assertEqual(page.text, page._text) del page.text - self.assertFalse(hasattr(page, '_text')) - if get_text: - self.assertEqual(page.text, loaded_text) + self.assertNotHasAttr(page, '_text') + self.assertEqual(page.text, loaded_text) finally: self.site.loadrevisions = loadrevisions diff --git a/tests/bot_tests.py b/tests/bot_tests.py index 86d62afaaf..ef53c57262 100755 --- a/tests/bot_tests.py +++ b/tests/bot_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Bot tests.""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -56,8 +56,8 @@ def _treat(self, pages, post_treat=None): def treat(page) -> None: self.assertEqual(page, next(self._page_iter)) if self._treat_site is None: - self.assertFalse(hasattr(self.bot, 'site')) - self.assertFalse(hasattr(self.bot, '_site')) + self.assertNotHasAttr(self.bot, 'site') + self.assertNotHasAttr(self.bot, '_site') elif not isinstance(self.bot, pywikibot.bot.MultipleSitesBot): self.assertIsNotNone(self.bot._site) self.assertEqual(self.bot.site, self.bot._site) @@ -301,7 +301,7 @@ class TestOptionHandler(TestCase): dry = True def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" self.option_handler = Options(baz=True) super().setUp() diff --git a/tests/cache_tests.py b/tests/cache_tests.py index 1c1cc7b3b2..d9671e1651 100755 --- a/tests/cache_tests.py +++ b/tests/cache_tests.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 """API Request cache tests.""" # -# (C) Pywikibot team, 2012-2024 +# (C) Pywikibot team, 2012-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations +import re import unittest from pywikibot.login import LoginStatus @@ -31,6 +32,8 @@ def _check_cache_entry(self, entry) -> None: self.assertIsNotNone(entry.site._username) # pragma: no cover self.assertIsInstance(entry._params, dict) self.assertIsNotNone(entry._params) + self.assertLength(str(entry), 64) + self.assertIsNotNone(re.fullmatch(r'[0-9a-f]+', str(entry))) # TODO: more tests on entry._params, and possibly fixes needed # to make it closely replicate the original object. diff --git a/tests/citoid_tests.py b/tests/citoid_tests.py new file mode 100755 index 0000000000..ce1fff5bb1 --- /dev/null +++ b/tests/citoid_tests.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Unit tests for citoid script.""" +# +# (C) Pywikibot team, 2025 +# +# Distributed under the terms of the MIT license. +# +from __future__ import annotations + +import datetime +import unittest + +import pywikibot +from pywikibot.data import citoid +from pywikibot.exceptions import ApiNotAvailableError +from tests.aspects import TestCase + + +class TestCitoid(TestCase): + + """Test the Citoid client.""" + + family = 'wikipedia' + code = 'test' + login = False + + def test_citoid_positive(self): + """Test citoid script.""" + client = citoid.CitoidClient(self.site) + resp = client.get_citation( + 'mediawiki', + 'https://ro.wikipedia.org/wiki/România' + ) + self.assertLength(resp, 1) + self.assertEqual(resp[0]['title'], 'România') + self.assertEqual( + resp[0]['rights'], + 'Creative Commons Attribution-ShareAlike License' + ) + self.assertIsNotEmpty(resp[0]['url']) + self.assertEqual( + resp[0]['accessDate'], + datetime.datetime.now().strftime('%Y-%m-%d') + ) + + def test_citoid_no_config(self): + """Test citoid script with no citoid endpoint configured.""" + client = citoid.CitoidClient(pywikibot.Site('pl', 'wikiquote')) + with self.assertRaises(ApiNotAvailableError): + client.get_citation( + 'mediawiki', + 'https://ro.wikipedia.org/wiki/România' + ) + + def test_citoid_no_valid_format(self): + """Test citoid script with invalid format provided.""" + client = citoid.CitoidClient(self.site) + with self.assertRaises(ValueError): + client.get_citation( + 'mediawiki2', + 'https://ro.wikipedia.org/wiki/România' + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/collections_tests.py b/tests/collections_tests.py index db9bbc8f31..29754bf2be 100755 --- a/tests/collections_tests.py +++ b/tests/collections_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for the Wikidata parts of the page module.""" # -# (C) Pywikibot team, 2019-2022 +# (C) Pywikibot team, 2019-2025 # # Distributed under the terms of the MIT license. # @@ -44,7 +44,7 @@ class TestLanguageDict(DataCollectionTestCase): dry = True def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.site = self.get_site() self.lang_out = {'en': 'foo', 'zh': 'bar'} @@ -132,7 +132,7 @@ class TestAliasesDict(DataCollectionTestCase): dry = True def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.site = self.get_site() self.lang_out = {'en': ['foo', 'bar'], diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py index 74d8f0e8c3..bd397922dd 100755 --- a/tests/cosmetic_changes_tests.py +++ b/tests/cosmetic_changes_tests.py @@ -431,17 +431,19 @@ def test_translate_magic_words(self) -> None: self.assertEqual( '[[File:Foo.bar|250px|zentriert|Bar]]', self.cct.translateMagicWords('[[File:Foo.bar|250px|center|Bar]]')) - - @unittest.expectedFailure # T396715 - def test_translateMagicWords_fail(self) -> None: - """Test translateMagicWords method. - - The current implementation doesn't check whether the magic word - is inside a template. - """ + # test magic word inside template self.assertEqual( '[[File:Foo.bar|{{Baz|thumb|foo}}]]', self.cct.translateMagicWords('[[File:Foo.bar|{{Baz|thumb|foo}}]]')) + # test magic word inside link and template + self.assertEqual( + '[[File:ABC.jpg|123px|mini|links|[[Foo|left]] {{Bar|thumb}}]]', + self.cct.translateMagicWords( + '[[File:ABC.jpg|123px|thumb|left|[[Foo|left]] {{Bar|thumb}}]]') + ) + self.assertEqual( + '[[File:Foo.bar]]', + self.cct.translateMagicWords('[[File:Foo.bar]]')) def test_cleanUpLinks_pipes(self) -> None: """Test cleanUpLinks method.""" diff --git a/tests/datasite_tests.py b/tests/datasite_tests.py index dc0132bdbf..77272ed71b 100755 --- a/tests/datasite_tests.py +++ b/tests/datasite_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for the site module.""" # -# (C) Pywikibot team, 2014-2022 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -27,7 +27,7 @@ def test_item(self) -> None: seen = [] for item in datasite.preload_entities(items): self.assertIsInstance(item, pywikibot.ItemPage) - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertNotIn(item, seen) seen.append(item) self.assertLength(seen, 5) @@ -42,7 +42,7 @@ def test_item_as_page(self) -> None: seen = [] for item in datasite.preload_entities(pages): self.assertIsInstance(item, pywikibot.ItemPage) - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertNotIn(item, seen) seen.append(item) self.assertLength(seen, 5) @@ -53,7 +53,7 @@ def test_property(self) -> None: page = pywikibot.Page(datasite, 'P6') property_page = next(datasite.preload_entities([page])) self.assertIsInstance(property_page, pywikibot.PropertyPage) - self.assertTrue(hasattr(property_page, '_content')) + self.assertHasAttr(property_page, '_content') class TestDataSiteClientPreloading(DefaultWikidataClientTestCase): @@ -67,7 +67,7 @@ def test_non_item(self) -> None: item = next(datasite.preload_entities([mainpage])) self.assertIsInstance(item, pywikibot.ItemPage) - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertEqual(item.id, 'Q5296') diff --git a/tests/deletionbot_tests.py b/tests/deletionbot_tests.py index 22a521a94d..f44ece565f 100755 --- a/tests/deletionbot_tests.py +++ b/tests/deletionbot_tests.py @@ -109,17 +109,17 @@ def setUpClass(cls) -> None: def setUp(self) -> None: """Set up unit test.""" - self._original_delete = pywikibot.Page.delete - self._original_undelete = pywikibot.Page.undelete - pywikibot.Page.delete = delete_dummy - pywikibot.Page.undelete = undelete_dummy super().setUp() - def tearDown(self) -> None: - """Tear down unit test.""" - pywikibot.Page.delete = self._original_delete - pywikibot.Page.undelete = self._original_undelete - super().tearDown() + patches = ( + patch.object(pywikibot.Page, 'delete', delete_dummy), + patch.object(pywikibot.Page, 'undelete', undelete_dummy), + patch.object(delete.DeletionRobot, 'skip_page', + lambda inst, page: False) + ) + for p in patches: + self.addCleanup(p.stop) + p.start() def test_dry(self) -> None: """Test dry run of bot.""" @@ -133,16 +133,17 @@ def test_dry(self) -> None: self.assertEqual(self.undelete_args, ['[[FoooOoOooO]]', 'foo']) -def delete_dummy(self, reason, prompt, mark, automatic_quit) -> int: +def delete_dummy(page_self, reason, prompt, mark, automatic_quit, *, + deletetalk=False) -> int: """Dummy delete method.""" - TestDeletionBot.delete_args = [self.title(as_link=True), reason, prompt, - mark, automatic_quit] + TestDeletionBot.delete_args = [page_self.title(as_link=True), reason, + prompt, mark, automatic_quit] return 0 -def undelete_dummy(self, reason) -> None: +def undelete_dummy(page_self, reason) -> None: """Dummy undelete method.""" - TestDeletionBot.undelete_args = [self.title(as_link=True), reason] + TestDeletionBot.undelete_args = [page_self.title(as_link=True), reason] if __name__ == '__main__': diff --git a/tests/dry_api_tests.py b/tests/dry_api_tests.py index fbef7d0158..3f5520b435 100755 --- a/tests/dry_api_tests.py +++ b/tests/dry_api_tests.py @@ -29,7 +29,6 @@ TestCase, unittest, ) -from tests.utils import DummySiteinfo class DryCachedRequestTests(SiteAttributeTestCase): @@ -149,49 +148,21 @@ def setUp(self) -> None: class MockFamily(Family): @property - def name(self) -> str: - return 'mock' + def langs(self) -> str: + return {'mock': ''} class MockSite(pywikibot.site.APISite): _loginstatus = LoginStatus.NOT_ATTEMPTED - _namespaces = {2: ['User']} def __init__(self) -> None: - self._user = 'anon' - pywikibot.site.BaseSite.__init__(self, 'mock', MockFamily()) - self._siteinfo = DummySiteinfo({'case': 'first-letter'}) - - def version(self) -> str: - return '1.31' # lowest supported release - - def protocol(self) -> str: - return 'http' - - @property - def codes(self): - return {'mock'} - - def user(self): - return self._user - - def encoding(self) -> str: - return 'utf-8' - - def encodings(self): - return [] - - @property - def siteinfo(self): - return self._siteinfo + pywikibot.site.BaseSite.__init__( + self, 'mock', MockFamily(), 'MyUser') def __repr__(self) -> str: return 'MockSite()' - def __getattr__(self, attr): - raise Exception(f'Attribute {attr!r} not defined') - self.mocksite = MockSite() super().setUp() @@ -201,15 +172,18 @@ def test_cachefile_path_different_users(self) -> None: parameters={'action': 'query', 'meta': 'siteinfo'}) anonpath = req._cachefile_path() - self.mocksite._userinfo = {'name': 'MyUser'} + self.assertIsNone(self.mocksite.user()) + + self.mocksite._userinfo = {'name': 'MyUser', 'id': 4711} self.mocksite._loginstatus = LoginStatus.AS_USER req = CachedRequest(expiry=1, site=self.mocksite, parameters={'action': 'query', 'meta': 'siteinfo'}) userpath = req._cachefile_path() self.assertNotEqual(anonpath, userpath) + self.assertEqual(self.mocksite.user(), 'MyUser') - self.mocksite._userinfo = {'name': 'MyOtherUser'} + self.mocksite._userinfo = {'name': 'MyOtherUser', 'id': 4712} self.mocksite._loginstatus = LoginStatus.AS_USER req = CachedRequest(expiry=1, site=self.mocksite, parameters={'action': 'query', 'meta': 'siteinfo'}) @@ -217,6 +191,8 @@ def test_cachefile_path_different_users(self) -> None: self.assertNotEqual(anonpath, otherpath) self.assertNotEqual(userpath, otherpath) + self.assertIsNone(self.mocksite.user()) + self.assertEqual(self.mocksite.username(), 'MyUser') def test_unicode(self) -> None: """Test caching with Unicode content.""" diff --git a/tests/edit_tests.py b/tests/edit_tests.py index 852fd715c5..7fcd1d1f0c 100755 --- a/tests/edit_tests.py +++ b/tests/edit_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for editing pages.""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -63,11 +63,11 @@ def test_appendtext(self) -> None: """Test writing to a page without preloading the .text.""" ts = str(time.time()) p = pywikibot.Page(self.site, 'User:John Vandenberg/appendtext test') - self.assertFalse(hasattr(p, '_text')) + self.assertNotHasAttr(p, '_text') p.site.editpage(p, appendtext=ts) - self.assertFalse(hasattr(p, '_text')) + self.assertNotHasAttr(p, '_text') p = pywikibot.Page(self.site, 'User:John Vandenberg/appendtext test') - self.assertTrue(p.text.endswith(ts)) + self.assertEndsWith(p.text, ts) self.assertNotEqual(p.text, ts) diff --git a/tests/eventstreams_tests.py b/tests/eventstreams_tests.py index 81c1ab2116..db51e41994 100755 --- a/tests/eventstreams_tests.py +++ b/tests/eventstreams_tests.py @@ -8,6 +8,7 @@ from __future__ import annotations import json +import re import unittest from contextlib import suppress from unittest import mock @@ -45,8 +46,11 @@ def test_url_parameter(self, key) -> None: self.assertEqual(e._url, e.sse_kwargs.get('url')) self.assertIsNone(e._total) self.assertIsNone(e._streams) - self.assertEqual(repr(e), - f"EventStreams(url='{self.sites[key]['hostname']}')") + self.assertRegex( + repr(e), + rf"^EventStreams\(url={self.sites[key]['hostname']!r}, " + r"headers={'user-agent': '[^']+'}\)$" + ) def test_url_from_site(self, key) -> None: """Test EventStreams with url from site.""" @@ -59,9 +63,12 @@ def test_url_from_site(self, key) -> None: self.assertEqual(e._url, e.sse_kwargs.get('url')) self.assertIsNone(e._total) self.assertEqual(e._streams, streams) - site_repr = f'site={site!r}, ' if site != Site() else '' - self.assertEqual(repr(e), - f"EventStreams({site_repr}streams='{streams}')") + site_repr = re.escape(f'site={site!r}, ') if site != Site() else '' + self.assertRegex( + repr(e), + r"^EventStreams\(headers={'user-agent': '[^']+'}, " + rf'{site_repr}streams={streams!r}\)$' + ) @mock.patch('pywikibot.comms.eventstreams.EventSource', new=mock.MagicMock()) @@ -70,7 +77,7 @@ class TestEventStreamsStreamsTests(DefaultSiteTestCase): """Stream tests for eventstreams module.""" def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() site = self.get_site() fam = site.family @@ -134,7 +141,7 @@ def test_filter_function_settings(self) -> None: """Test EventStreams filter function settings.""" def foo() -> bool: """Dummy function.""" - return True + return True # pragma: no cover self.es.register_filter(foo) self.assertEqual(self.es.filter['all'][0], foo) diff --git a/tests/family_tests.py b/tests/family_tests.py index ca8d349b5f..b232698507 100755 --- a/tests/family_tests.py +++ b/tests/family_tests.py @@ -13,6 +13,7 @@ import pywikibot from pywikibot.exceptions import UnknownFamilyError from pywikibot.family import Family, SingleSiteFamily +from pywikibot.tools import suppress_warnings from tests.aspects import PatchingTestCase, TestCase, unittest from tests.utils import DrySite @@ -100,13 +101,16 @@ def test_get_obsolete_wp(self) -> None: self.assertIsInstance(family.obsolete, Mapping) # redirected code (see site tests test_alias_code_site) self.assertEqual(family.code_aliases['dk'], 'da') - self.assertEqual(family.interwiki_replacements['dk'], 'da') + msg = 'pywikibot.family.Family.interwiki_replacements is deprecated' + with suppress_warnings(msg, FutureWarning): + self.assertEqual(family.interwiki_replacements['dk'], 'da') self.assertEqual(family.obsolete['dk'], 'da') # closed/locked site (see site tests test_locked_site) self.assertIsNone(family.obsolete['mh']) # offline site (see site tests test_removed_site) self.assertIsNone(family.obsolete['ru-sib']) - self.assertIn('dk', family.interwiki_replacements) + with suppress_warnings(msg, FutureWarning): + self.assertIn('dk', family.interwiki_replacements) def test_obsolete_from_attributes(self) -> None: """Test obsolete property for given class attributes.""" @@ -114,14 +118,17 @@ def test_obsolete_from_attributes(self) -> None: family = type('TempFamily', (Family,), {})() self.assertEqual(family.obsolete, {}) - self.assertEqual(family.interwiki_replacements, {}) + msg = 'pywikibot.family.Family.interwiki_replacements is deprecated' + with suppress_warnings(msg, FutureWarning): + self.assertEqual(family.interwiki_replacements, {}) self.assertEqual(family.interwiki_removals, frozenset()) # Construct a temporary family with other attributes and instantiate it family = type('TempFamily', (Family,), {'code_aliases': {'a': 'b'}, 'closed_wikis': ['c']})() self.assertEqual(family.obsolete, {'a': 'b', 'c': None}) - self.assertEqual(family.interwiki_replacements, {'a': 'b'}) + with suppress_warnings(msg, FutureWarning): + self.assertEqual(family.interwiki_replacements, {'a': 'b'}) self.assertEqual(family.interwiki_removals, frozenset('c')) def test_obsolete_readonly(self) -> None: diff --git a/tests/file_tests.py b/tests/file_tests.py index d317627d63..1d8ec7e5b0 100755 --- a/tests/file_tests.py +++ b/tests/file_tests.py @@ -79,8 +79,10 @@ def test_shared_only(self) -> None: def test_local_only(self) -> None: """Test file_is_shared() on file page with local file only.""" - title = 'File:Untitled (Three Forms), stainless steel sculpture by ' \ - '--James Rosati--, 1975-1976, --Honolulu Academy of Arts--.JPG' + title = ( + 'File:Untitled (Three Forms), stainless steel sculpture by ' + '--James Rosati--, 1975-1976, --Honolulu Academy of Arts--.JPG' + ) commons = self.get_site('commons') enwp = self.get_site('enwiki') @@ -238,12 +240,14 @@ def test_lazyload_metadata(self) -> None: def test_get_file_url(self) -> None: """Get File url.""" self.assertTrue(self.image.exists()) - self.assertEqual(self.image.get_file_url(), - 'https://upload.wikimedia.org/wikipedia/commons/' - 'd/d3/Albert_Einstein_Head.jpg') - self.assertEqual(self.image.latest_file_info.url, - 'https://upload.wikimedia.org/wikipedia/commons/' - 'd/d3/Albert_Einstein_Head.jpg') + self.assertEqual( + self.image.get_file_url(), + 'https://upload.wikimedia.org/wikipedia/commons/' + 'd/d3/Albert_Einstein_Head.jpg') + self.assertEqual( + self.image.latest_file_info.url, + 'https://upload.wikimedia.org/wikipedia/commons/' + 'd/d3/Albert_Einstein_Head.jpg') @unittest.expectedFailure # T391761 def test_get_file_url_thumburl_from_width(self) -> None: @@ -333,8 +337,8 @@ def test_changed_title(self) -> None: def test_not_existing_download(self) -> None: """Test not existing download.""" - page = pywikibot.FilePage(self.site, - 'File:notexisting_Albert Einstein.jpg') + page = pywikibot.FilePage( + self.site, 'File:notexisting_Albert Einstein.jpg') filename = join_images_path('Albert Einstein.jpg') with self.assertRaisesRegex( @@ -358,8 +362,8 @@ def test_data_item(self) -> None: page = pywikibot.FilePage(self.site, 'File:Albert Einstein.jpg') item = page.data_item() self.assertIsInstance(item, pywikibot.MediaInfo) - self.assertTrue(page._item is item) - self.assertTrue(item.file is page) + self.assertIs(page._item, item) + self.assertIs(item.file, page) self.assertEqual('-1', item.id) item.get() self.assertEqual('M14634781', item.id) @@ -368,7 +372,7 @@ def test_data_item(self) -> None: item.labels, pywikibot.page._collections.LanguageDict) self.assertIsInstance( item.statements, pywikibot.page._collections.ClaimCollection) - self.assertTrue(item.claims is item.statements) + self.assertIs(item.claims, item.statements) all_claims = list(chain.from_iterable(item.statements.values())) self.assertEqual({claim.on_item for claim in all_claims}, {item}) @@ -386,25 +390,33 @@ def test_data_item(self) -> None: def test_data_item_not_file(self) -> None: """Test data item with invalid pageid.""" item = pywikibot.MediaInfo(self.site, 'M1') # Main Page - with self.assertRaises(Error): + with self.assertRaisesRegex(Error, r'not.*file'): item.file - with self.assertRaises(NoWikibaseEntityError): + with self.assertRaisesRegex( + NoWikibaseEntityError, + r"Entity.*(not.*exist|doesn't exist)"): item.get() self.assertFalse(item.exists()) def test_data_item_when_no_file_or_data_item(self) -> None: """Test data item associated to file that does not exist.""" - page = pywikibot.FilePage(self.site, - 'File:Notexisting_Albert Einstein.jpg') + page = pywikibot.FilePage( + self.site, 'File:Notexisting_Albert Einstein.jpg') self.assertFalse(page.exists()) item = page.data_item() self.assertIsInstance(item, pywikibot.MediaInfo) - with self.assertRaises(NoWikibaseEntityError): + with self.assertRaisesRegex( + NoWikibaseEntityError, + r"Entity.*(not.*exist|doesn't exist)"): item.get() - with self.assertRaises(NoWikibaseEntityError): + with self.assertRaisesRegex( + NoWikibaseEntityError, + r"Entity.*(not.*exist|doesn't exist)"): item.title() - with self.assertRaises(NoWikibaseEntityError): + with self.assertRaisesRegex( + NoWikibaseEntityError, + r"Entity.*(not.*exist|doesn't exist)"): item.labels def test_data_item_when_file_exist_but_without_item(self) -> None: @@ -457,7 +469,7 @@ class TestMediaInfoEditing(TestCase): login = True write = True - # commons.wikimedia.beta.wmflabs.org + # commons.wikimedia.beta.wmcloud.org family = 'commons' code = 'beta' @@ -520,11 +532,15 @@ def test_edit_claims(self) -> None: item = page.data_item() # Insert claim to non-existing file - with self.assertRaises(NoWikibaseEntityError): + with self.assertRaisesRegex( + NoWikibaseEntityError, + r"Entity.*(not.*exist|doesn't exist)"): item.addClaim(new_claim) # Insert claim using site object to non-existing file - with self.assertRaises(NoWikibaseEntityError): + with self.assertRaisesRegex( + NoWikibaseEntityError, + r"Entity.*(not.*exist|doesn't exist)"): self.site.addClaim(item, new_claim) # Test adding claim existing file @@ -574,7 +590,8 @@ def test_edit_claims(self) -> None: self.assertTrue(claim_found) # Note removeClaims() parameter needs to be array - summary = f'Removing {property_id} with {value} using site object' + summary = (f'Removing {property_id} with {value} ' + 'using site object') self.site.removeClaims(remove_statements, summary=summary) # Test that the claims were actually removed diff --git a/tests/generate_family_file_tests.py b/tests/generate_family_file_tests.py index 91b86fc4cc..e7386a67c5 100755 --- a/tests/generate_family_file_tests.py +++ b/tests/generate_family_file_tests.py @@ -10,6 +10,7 @@ import unittest from contextlib import suppress from random import sample +from unittest.mock import patch from urllib.parse import urlparse from pywikibot import Site @@ -86,7 +87,9 @@ def test_initial_attributes(self) -> None: def test_attributes_after_run(self) -> None: """Test FamilyFileGenerator attributes after run().""" gen = self.generator_instance - gen.run() + with patch.object(FamilyTestGenerator, 'show') as mock_show: + gen.run() + mock_show.assert_called() with self.subTest(test='Test whether default is loaded'): self.assertIn(self.site.lang, gen.wikis) diff --git a/tests/generate_user_files_tests.py b/tests/generate_user_files_tests.py index 858931913f..28b2575099 100755 --- a/tests/generate_user_files_tests.py +++ b/tests/generate_user_files_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Test generate_user_files script.""" # -# (C) Pywikibot team, 2018-2023 +# (C) Pywikibot team, 2018-2025 # # Distributed under the terms of the MIT license. # @@ -29,8 +29,8 @@ def test_ask_for_dir_change(self) -> None: def test_base_names(self) -> None: """Test basename constants.""" - self.assertTrue(guf.USER_BASENAME.endswith('.py')) - self.assertTrue(guf.PASS_BASENAME.endswith('.py')) + self.assertEndsWith(guf.USER_BASENAME, '.py') + self.assertEndsWith(guf.PASS_BASENAME, '.py') def test_config_test(self) -> None: """Test config text strings.""" diff --git a/tests/gui_tests.py b/tests/gui_tests.py index daad444ff3..9ff4a7d2cb 100755 --- a/tests/gui_tests.py +++ b/tests/gui_tests.py @@ -92,7 +92,7 @@ def setUpModule() -> None: try: dialog = tkinter.Tk() - except RuntimeError as e: + except RuntimeError as e: # pragma: no cover raise unittest.SkipTest(f'Skipping due to T380732 - {e}') dialog.destroy() diff --git a/tests/hooks/__init__.py b/tests/hooks/__init__.py index 8a22a45b51..b9e3b06568 100644 --- a/tests/hooks/__init__.py +++ b/tests/hooks/__init__.py @@ -1,3 +1,10 @@ -"""Local pre-commit hooks for CI tests.""" +"""Local pre-commit hooks for CI tests. +.. versionadded:: 10.3 +""" +# +# (C) Pywikibot team, 2025 +# +# Distributed under the terms of the MIT license. +# from __future__ import annotations diff --git a/tests/hooks/copyright_fixer.py b/tests/hooks/copyright_fixer.py index 93eeb8167b..d3fa734884 100755 --- a/tests/hooks/copyright_fixer.py +++ b/tests/hooks/copyright_fixer.py @@ -1,5 +1,8 @@ #!/usr/bin/env python -"""Pre-commit hook to set the leftmost copyright year.""" +"""Pre-commit hook to set the leftmost copyright year. + +.. versionadded:: 10.3 +""" # # (C) Pywikibot team, 2025 # @@ -22,10 +25,21 @@ def get_patched_files(): """Return the PatchSet for the latest commit.""" - out = subprocess.run(['git', 'diff', '--unified=0'], - stdout=subprocess.PIPE, - check=True, encoding='utf-8', text=True).stdout - return {Path(path) for path in re.findall(r'(?m)^\+\+\+ b/(.+)$', out) + cmd_opts = ' --name-only --diff-filter=AMR' + diff_cmd = f'git diff {cmd_opts}'.split() + show_cmd = f'git show --format= {cmd_opts}'.split() + + captures = [] + captures.append( + subprocess.check_output(diff_cmd, encoding='utf-8') + ) + captures.append( + subprocess.check_output(diff_cmd + ['--staged'], encoding='utf-8') + ) + captures.append( + subprocess.check_output(show_cmd + ['HEAD'], encoding='utf-8') + ) + return {Path(path) for capture in captures for path in capture.splitlines() if path.endswith('.py')} diff --git a/tests/http_tests.py b/tests/http_tests.py index 317a8b254b..e365242e67 100755 --- a/tests/http_tests.py +++ b/tests/http_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for http module.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -55,10 +55,10 @@ def setUp(self) -> None: super().setUp() self._authenticate = config.authenticate config.authenticate = { - 'zh.wikipedia.beta.wmflabs.org': ('1', '2'), - '*.wikipedia.beta.wmflabs.org': ('3', '4', '3', '4'), - '*.beta.wmflabs.org': ('5', '6'), - '*.wmflabs.org': ('7', '8', '8'), + 'zh.wikipedia.beta.wmcloud.org': ('1', '2'), + '*.wikipedia.beta.wmcloud.org': ('3', '4', '3', '4'), + '*.beta.wmcloud.org': ('5', '6'), + '*.wmcloud.org': ('7', '8', '8'), } def tearDown(self) -> None: @@ -69,15 +69,15 @@ def tearDown(self) -> None: def test_url_based_authentication(self) -> None: """Test url-based authentication info.""" pairs = { - 'https://zh.wikipedia.beta.wmflabs.org': ('1', '2'), - 'https://en.wikipedia.beta.wmflabs.org': ('3', '4', '3', '4'), - 'https://wiki.beta.wmflabs.org': ('5', '6'), - 'https://beta.wmflabs.org': None, - 'https://wmflabs.org': None, + 'https://zh.wikipedia.beta.wmcloud.org': ('1', '2'), + 'https://en.wikipedia.beta.wmcloud.org': ('3', '4', '3', '4'), + 'https://wiki.beta.wmcloud.org': ('5', '6'), + 'https://beta.wmcloud.org': None, + 'https://wmcloud.org': None, 'https://www.wikiquote.org/': None, } with suppress_warnings( - r"config.authenticate\['\*.wmflabs.org'] has invalid value.", + r"config.authenticate\['\*.wmcloud.org'] has invalid value.", UserWarning, ): for url, auth in pairs.items(): @@ -230,8 +230,7 @@ def tearDown(self) -> None: def test_default_user_agent(self) -> None: """Config defined format string test.""" - self.assertTrue(http.user_agent().startswith( - pywikibot.calledModuleName())) + self.assertStartsWith(http.user_agent(), pywikibot.calledModuleName()) self.assertIn('Pywikibot/' + pywikibot.__version__, http.user_agent()) self.assertNotIn(' ', http.user_agent()) self.assertNotIn('()', http.user_agent()) @@ -481,7 +480,7 @@ def setUpClass(cls) -> None: def test_requests(self) -> None: """Test with requests, underlying package.""" with requests.Session() as s: - r = s.get(self.url) + r = s.get(self.url, headers={'User-Agent': http.user_agent()}) self.assertEqual(r.headers['content-type'], 'image/png') self.assertEqual(r.content, self.png) diff --git a/tests/interwiki_graph_tests.py b/tests/interwiki_graph_tests.py index 91fedc7e4a..eeb9cdb6e3 100755 --- a/tests/interwiki_graph_tests.py +++ b/tests/interwiki_graph_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Test Interwiki Graph functionality.""" # -# (C) Pywikibot team, 2015-2022 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -38,7 +38,7 @@ class TestWiktionaryGraph(SiteAttributeTestCase): @classmethod def setUpClass(cls) -> None: - """Setup test class.""" + """Set up test class.""" super().setUpClass() cls.pages = { diff --git a/tests/interwiki_link_tests.py b/tests/interwiki_link_tests.py index 6df9dbc0c8..6d805f7aef 100755 --- a/tests/interwiki_link_tests.py +++ b/tests/interwiki_link_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Test Interwiki Link functionality.""" # -# (C) Pywikibot team, 2014-2022 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # diff --git a/tests/interwikidata_tests.py b/tests/interwikidata_tests.py index 7a77ef3b65..059064ff86 100755 --- a/tests/interwikidata_tests.py +++ b/tests/interwikidata_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for scripts/interwikidata.py.""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -9,6 +9,7 @@ import unittest from contextlib import suppress +from typing import Any import pywikibot from pywikibot import Link @@ -21,13 +22,13 @@ class DummyBot(interwikidata.IWBot): """A dummy bot to prevent editing in production wikis.""" - def put_current(self) -> bool: + def put_current(self, *args: Any, **kwargs: Any) -> bool: """Prevent editing.""" - return False + raise NotImplementedError - def create_item(self) -> bool: + def create_item(self) -> pywikibot.ItemPage: """Prevent creating items.""" - return False + raise NotImplementedError def try_to_add(self) -> None: """Prevent adding sitelinks to items.""" diff --git a/tests/interwikimap_tests.py b/tests/interwikimap_tests.py index ae3aa65a9c..b0cf64291e 100755 --- a/tests/interwikimap_tests.py +++ b/tests/interwikimap_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for the site module.""" # -# (C) Pywikibot team, 2018-2024 +# (C) Pywikibot team, 2018-2025 # # Distributed under the terms of the MIT license. # @@ -112,7 +112,7 @@ class TestInterwikiMapPrefix(TestCase): code = 'en' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.iw_map = self.site._interwikimap diff --git a/tests/link_tests.py b/tests/link_tests.py index 8b8a001a4a..960deed6a0 100755 --- a/tests/link_tests.py +++ b/tests/link_tests.py @@ -72,10 +72,11 @@ def replaced(self, iterable): for items in iterable: if isinstance(items, str): items = [items] - items = [re.sub(' ', - self.site.family.title_delimiter_and_aliases[0], - item) - for item in items] + items = [ + item.replace(' ', + self.site.family.title_delimiter_and_aliases[0]) + for item in items + ] if len(items) == 1: items = items[0] yield items @@ -122,25 +123,25 @@ def test_valid(self) -> None: def test_invalid(self) -> None: """Test that invalid titles raise InvalidTitleError.""" # Bad characters forbidden regardless of wgLegalTitleChars - def generate_contains_illegal_chars_exc_regex(text): + def generate_contains_illegal_chars_exc_regex(text) -> str: return (rf'^(u|)\'{re.escape(text)}\' contains illegal char' rf'\(s\) (u|)\'{re.escape(text[2])}\'$') # Directory navigation - def generate_contains_dot_combinations_exc_regex(text): + def generate_contains_dot_combinations_exc_regex(text) -> str: return (rf'^\(contains \. / combinations\): (u|)' rf'\'{re.escape(text)}\'$') # Tilde - def generate_contains_tilde_exc_regex(text): + def generate_contains_tilde_exc_regex(text) -> str: return rf'^\(contains ~~~\): (u|)\'{re.escape(text)}\'$' # Overlength - def generate_overlength_exc_regex(text): + def generate_overlength_exc_regex(text) -> str: return rf'^\(over 255 bytes\): (u|)\'{re.escape(text)}\'$' # Namespace prefix without actual title - def generate_has_no_title_exc_regex(text): + def generate_has_no_title_exc_regex(text) -> str: return rf'^(u|)\'{re.escape(text.strip())}\' has no title\.$' title_tests = [ @@ -240,7 +241,7 @@ class LinkTestWikiEn(LinkTestCase): code = 'en' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'en' config.family = 'wikipedia' @@ -312,7 +313,7 @@ class TestPartiallyQualifiedExplicitLinkDifferentFamilyParser(LinkTestCase): code = 'en' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'en' config.family = 'wikisource' @@ -388,7 +389,7 @@ class TestFullyQualifiedLinkDifferentFamilyParser(LinkTestCase): PATTERN = '{colon}{first}:{second}:{title}' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'en' config.family = 'wikisource' @@ -438,7 +439,7 @@ class TestFullyQualifiedExplicitLinkNoLangConfigFamilyParser(LinkTestCase): code = 'en' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'wikidata' config.family = 'wikidata' @@ -492,7 +493,7 @@ class TestFullyQualifiedNoLangFamilyExplicitLinkParser(LinkTestCase): } def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'en' config.family = 'wikipedia' @@ -526,7 +527,7 @@ class TestFullyQualifiedOneSiteFamilyExplicitLinkParser(LinkTestCase): code = 'species' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'en' config.family = 'wikipedia' @@ -617,7 +618,7 @@ class TestPartiallyQualifiedImplicitLinkDifferentFamilyParser(LinkTestCase): code = 'en' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'en' config.family = 'wikisource' @@ -668,7 +669,7 @@ class TestFullyQualifiedImplicitLinkNoLangConfigFamilyParser(LinkTestCase): code = 'en' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'wikidata' config.family = 'wikidata' @@ -714,7 +715,7 @@ class TestFullyQualifiedNoLangFamilyImplicitLinkParser(LinkTestCase): code = 'test' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'en' config.family = 'wikipedia' @@ -752,7 +753,7 @@ class TestFullyQualifiedOneSiteFamilyImplicitLinkParser(LinkTestCase): code = 'species' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() config.mylang = 'en' config.family = 'wikipedia' diff --git a/tests/memento_tests.py b/tests/memento_tests.py index 86479069bb..f5d5269c71 100755 --- a/tests/memento_tests.py +++ b/tests/memento_tests.py @@ -39,10 +39,10 @@ def _get_archive_url(self, url, date_string=None): class TestMementoArchive(MementoTestCase): - """New WebCite Memento tests.""" + """Web Archive Memento tests.""" - timegate_uri = 'http://timetravel.mementoweb.org/timegate/' - hostname = timegate_uri.replace('gate/', 'map/json/http://google.com') + timegate_uri = 'https://web.archive.org/web/' + hostname = timegate_uri def test_newest(self) -> None: """Test Archive for an old https://google.com.""" @@ -55,7 +55,7 @@ def test_newest(self) -> None: class TestMementoDefault(MementoTestCase): - """Test InternetArchive is default Memento timegate.""" + """Test Web Archive is default Memento timegate.""" timegate_uri = None net = True @@ -64,6 +64,8 @@ def test_newest(self) -> None: """Test getting memento for newest https://google.com.""" archivedversion = self._get_archive_url('https://google.com') self.assertIsNotNone(archivedversion) + from pywikibot.data.memento import DEFAULT_TIMEGATE_BASE_URI + self.assertStartsWith(archivedversion, DEFAULT_TIMEGATE_BASE_URI) def test_invalid(self) -> None: """Test getting memento for invalid URL.""" diff --git a/tests/oauth_tests.py b/tests/oauth_tests.py index cecf17fec5..20a3fdb1d1 100755 --- a/tests/oauth_tests.py +++ b/tests/oauth_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Test OAuth functionality.""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -80,16 +80,16 @@ def test_edit(self) -> None: p = pywikibot.Page(self.site, title) try: p.site.editpage(p, appendtext='\n' + ts) - except EditConflictError as e: + except EditConflictError as e: # pragma: no cover self.assertEqual(e.page, p) else: revision_id = p.latest_revision_id p = pywikibot.Page(self.site, title) t = p.text if revision_id == p.latest_revision_id: - self.assertTrue(p.text.endswith(ts)) + self.assertEndsWith(p.text, ts) else: - self.assertIn(ts, t) + self.assertIn(ts, t) # pragma: no cover class TestOauthLoginManager(DefaultSiteTestCase, OAuthSiteTestCase): diff --git a/tests/page_tests.py b/tests/page_tests.py index 6d9b30dbdf..a023de2374 100755 --- a/tests/page_tests.py +++ b/tests/page_tests.py @@ -9,6 +9,7 @@ import pickle import re +import time from contextlib import suppress from datetime import timedelta from unittest import mock @@ -272,7 +273,11 @@ def testFileTitle(self) -> None: 'File:Example #3.jpg', # file extension in section ): with self.subTest(title=title), \ - self.assertRaises(ValueError): + self.assertRaisesRegex( + ValueError, + r'(not.*valid.*file' + r'|not in the file namespace' + r'|does not have a valid extension)'): pywikibot.FilePage(site, title) def testImageAndDataRepository(self) -> None: @@ -570,14 +575,14 @@ def test_redirect(self) -> None: else: self.skipTest(f'No redirect pages on site {site!r}') # This page is already initialised - self.assertTrue(hasattr(page, '_isredir')) + self.assertHasAttr(page, '_isredir') # call api.update_page without prop=info del page._isredir page.isDisambig() self.assertTrue(page.isRedirectPage()) page_copy = pywikibot.Page(site, page.title()) - self.assertFalse(hasattr(page_copy, '_isredir')) + self.assertNotHasAttr(page_copy, '_isredir') page_copy.isDisambig() self.assertTrue(page_copy.isRedirectPage()) @@ -727,7 +732,7 @@ class TestPageBotMayEdit(TestCase): login = True def setUp(self) -> None: - """Setup test.""" + """Set up test.""" super().setUp() self.page = pywikibot.Page(self.site, 'not_existent_page_for_pywikibot_tests') @@ -1009,6 +1014,7 @@ def testIsStaticRedirect(self) -> None: def testPageGet(self) -> None: """Test ``Page.get()`` on different types of pages.""" fail_msg = '{page!r}.get() raised {error!r} unexpectedly!' + unexpected_exceptions = IsRedirectPageError, NoPageError, SectionError site = self.get_site('en') p1 = pywikibot.Page(site, 'User:Legoktm/R2') p2 = pywikibot.Page(site, 'User:Legoktm/R1') @@ -1024,7 +1030,7 @@ def testPageGet(self) -> None: try: p2.get(get_redirect=True) - except (IsRedirectPageError, NoPageError, SectionError) as e: + except unexpected_exceptions as e: # pragma: no cover self.fail(fail_msg.format(page=p2, error=e)) with self.assertRaisesRegex(NoPageError, NO_PAGE_RE): @@ -1039,7 +1045,7 @@ def testPageGet(self) -> None: page = pywikibot.Page(site, 'Manual:Pywikibot/2.0 #See_also') try: page.get() - except (IsRedirectPageError, NoPageError, SectionError) as e: + except unexpected_exceptions as e: # pragma: no cover self.fail(fail_msg.format(page=page, error=e)) def test_set_redirect_target(self) -> None: @@ -1082,13 +1088,26 @@ def test_watch(self) -> None: # Note: this test uses the userpage, so that it is unwatched and # therefore is not listed by script_tests test_watchlist_simulate. + userpage = self.get_userpage() + # watched_pages parameters + wp_params = {'force': True, 'with_talkpage': False} rv = userpage.watch() - self.assertIsInstance(rv, bool) + + self.assertEqual(userpage.exists(), rv) + if rv: + self.assertIn(userpage, userpage.site.watched_pages(**wp_params)) + + with self.assertWarnsRegex(UserWarning, + r"expiry parameter \('.+'\) is ignored"): + rv = userpage.watch(unwatch=True, expiry='indefinite') + self.assertTrue(rv) - rv = userpage.watch(unwatch=True) - self.assertIsInstance(rv, bool) + rv = userpage.watch(expiry='5 seconds') self.assertTrue(rv) + self.assertIn(userpage, userpage.site.watched_pages(**wp_params)) + time.sleep(10) + self.assertNotIn(userpage, userpage.site.watched_pages(**wp_params)) class TestPageDelete(TestCase): diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py index fe59e6a252..2cf9661dbe 100755 --- a/tests/pagegenerators_tests.py +++ b/tests/pagegenerators_tests.py @@ -27,7 +27,7 @@ PreloadingGenerator, WikibaseItemFilterPageGenerator, ) -from tests import join_data_path, unittest_print +from tests import join_data_path from tests.aspects import ( DefaultSiteTestCase, DeprecationTestCase, @@ -73,13 +73,13 @@ class TestDryPageGenerators(TestCase): titles = en_wp_page_titles + en_wp_nopage_titles def setUp(self) -> None: - """Setup test.""" + """Set up test.""" super().setUp() self.site = self.get_site() def assertFunction(self, obj) -> None: """Assert function test.""" - self.assertTrue(hasattr(pagegenerators, obj)) + self.assertHasAttr(pagegenerators, obj) self.assertTrue(callable(getattr(pagegenerators, obj))) def test_module_import(self) -> None: @@ -202,7 +202,7 @@ class BasetitleTestCase(TestCase): 'Calf Case.pdf/{}') def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.site = self.get_site() self.titles = [self.base_title.format(i) for i in range(1, 11)] @@ -228,7 +228,7 @@ class TestCategoryFilterPageGenerator(BasetitleTestCase): category_list = ['Category:Validated'] def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.catfilter_list = [pywikibot.Category(self.site, cat) for cat in self.category_list] @@ -596,7 +596,7 @@ def test_basic(self) -> None: self.assertIsInstance(page.exists(), bool) self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') self.assertLength(links, count) def test_low_step(self) -> None: @@ -611,7 +611,7 @@ def test_low_step(self) -> None: self.assertIsInstance(page.exists(), bool) self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') self.assertLength(links, count) def test_order(self) -> None: @@ -625,7 +625,7 @@ def test_order(self) -> None: self.assertIsInstance(page.exists(), bool) self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') self.assertEqual(page, links[count]) self.assertLength(links, count + 1) @@ -1077,7 +1077,7 @@ def test_prefixing_default(self) -> None: self.assertLessEqual(len(pages), 10) for page in pages: self.assertIsInstance(page, pywikibot.Page) - self.assertTrue(page.title().lower().startswith('a')) + self.assertStartsWith(page.title().lower(), 'a') def test_prefixing_ns(self) -> None: """Test prefixindex generator with namespace filter.""" @@ -1427,7 +1427,7 @@ class TestWantedFactoryGenerator(DefaultSiteTestCase): """Test pagegenerators.GeneratorFactory for wanted pages.""" def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.gf = pagegenerators.GeneratorFactory(site=self.site) @@ -1453,7 +1453,7 @@ def test_wanted_files(self) -> None: for page in self._generator_with_tests(): self.assertIsInstance(page, pywikibot.Page) if not isinstance(page, pywikibot.FilePage): - with self.assertRaisesRegex(ValueError, + with self.assertRaisesRegex(ValueError, # pragma: no cover 'does not have a valid extension'): pywikibot.FilePage(page) else: @@ -1547,7 +1547,7 @@ class TestLogeventsFactoryGenerator(DefaultSiteTestCase, @classmethod def setUpClass(cls) -> None: - """Setup test class.""" + """Set up test class.""" super().setUpClass() site = pywikibot.Site() newuser_logevents = list(site.logevents(logtype='newusers', total=1)) @@ -1666,7 +1666,7 @@ def test_RC_pagegenerator_result(self) -> None: testentry = entries[0] self.assertEqual(testentry.site, site) - self.assertTrue(hasattr(testentry, '_rcinfo')) + self.assertHasAttr(testentry, '_rcinfo') rcinfo = testentry._rcinfo self.assertEqual(rcinfo['server_name'], site.hostname()) @@ -1687,26 +1687,16 @@ def test_unconnected_with_repo(self) -> None: if not site: self.skipTest('Site is not using a Wikibase repository') - pages = list(pagegenerators.UnconnectedPageGenerator(self.site, 3)) + pages = list( + pagegenerators.UnconnectedPageGenerator(self.site, 3, strict=True)) self.assertLessEqual(len(pages), 3) pattern = (fr'Page \[\[({site.sitename}:|{site.code}:)-1\]\]' r" doesn't exist\.") - found = [] for page in pages: - with self.subTest(page=page): - try: - page.data_item() - except NoPageError as e: - self.assertRegex(str(e), pattern) - else: - found.append(page) - if found: - unittest_print('connection found for ', - ', '.join(str(p) for p in found)) - - # assume that we have at least one unconnected page - self.assertLess(len(found), 3) + with self.subTest(page=page), self.assertRaisesRegex(NoPageError, + pattern): + page.data_item() def test_unconnected_without_repo(self) -> None: """Test that it raises a ValueError on sites without repository.""" diff --git a/tests/paraminfo_tests.py b/tests/paraminfo_tests.py index a89bdcd677..388c0367aa 100755 --- a/tests/paraminfo_tests.py +++ b/tests/paraminfo_tests.py @@ -119,6 +119,7 @@ def test_content_format(self) -> None: 'text/css', 'text/plain', ] + if self.site.mw_version >= '1.36.0-wmf.2': base.extend([ 'application/octet-stream', @@ -127,6 +128,10 @@ def test_content_format(self) -> None: 'text/unknown', 'unknown/unknown', ]) + + if self.site.mw_version >= '1.45.0-wmf.11': + base.append('application/vue+xml') # T400537 + if isinstance(self.site, DataSite): # It is not clear when this format has been added, see T129281. base.append('application/vnd.php.serialized') diff --git a/tests/proofreadpage_tests.py b/tests/proofreadpage_tests.py index 2d50c9503b..cad13bc3bb 100755 --- a/tests/proofreadpage_tests.py +++ b/tests/proofreadpage_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for the proofreadpage module.""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. # @@ -75,8 +75,10 @@ def test_tag_attr_str(self) -> None: def test_tag_attr_exceptions(self) -> None: """Test TagAttr for Exceptions.""" - self.assertRaises(ValueError, TagAttr, 'fromsection', 'A123"') - self.assertRaises(TypeError, TagAttr, 'fromsection', 3.0) + with self.assertRaisesRegex(ValueError, 'has wrong quotes'): + TagAttr('fromsection', 'A123"') + with self.assertRaisesRegex(TypeError, 'must be str or int'): + TagAttr('fromsection', 3.0) def test_pages_tag_parser(self) -> None: """Test PagesTagParser.""" @@ -110,14 +112,13 @@ def test_pages_tag_parser(self) -> None: def test_pages_tag_parser_exceptions(self) -> None: """Test PagesTagParser Exceptions.""" - text = """Text: """ + parser = PagesTagParser(text) + self.assertEqual(parser.index, 'Index.pdf') text = """Text: """ - self.assertRaises(ValueError, PagesTagParser, text) + with self.assertRaisesRegex(ValueError, 'has wrong quotes'): + PagesTagParser(text) class TestProofreadPageInvalidSite(TestCase): @@ -142,11 +143,11 @@ class TestBasePageMethodsProofreadPage(BasePageMethodsTestBase): family = 'wikisource' code = 'en' - def setUp(self) -> None: - """Set up test case.""" + def setup_page(self) -> None: + """Set up test page.""" self._page = ProofreadPage( - self.site, 'Page:Popular Science Monthly Volume 1.djvu/12') - super().setUp() + self.site, 'Page:Popular Science Monthly Volume 1.djvu/12' + ) def test_basepage_methods(self) -> None: """Test ProofreadPage methods inherited from superclass BasePage.""" @@ -162,11 +163,10 @@ class TestLoadRevisionsCachingProofreadPage( family = 'wikisource' code = 'en' - def setUp(self) -> None: - """Set up test case.""" + def setup_page(self) -> None: + """Set up test page.""" self._page = ProofreadPage( self.site, 'Page:Popular Science Monthly Volume 1.djvu/12') - super().setUp() def test_page_text(self) -> None: """Test site.loadrevisions() with Page.text.""" @@ -343,7 +343,7 @@ def test_valid_parsing(self) -> None: def test_div_in_footer(self) -> None: """Test ProofreadPage page parsing functions.""" page = ProofreadPage(self.site, self.div_in_footer['title']) - self.assertTrue(page.footer.endswith('')) + self.assertEndsWith(page.footer, '') def test_decompose_recompose_text(self) -> None: """Test ProofreadPage page decomposing/composing text.""" @@ -500,7 +500,7 @@ def test_ocr_wmfocr(self) -> None: """Test page.ocr(ocr_tool='wmfOCR').""" try: text = self.page.ocr(ocr_tool='wmfOCR') - except Exception as exc: + except Exception as exc: # pragma: no cover self.assertIsInstance(exc, ValueError) else: ref_text = self.data['wmfOCR'] @@ -543,7 +543,7 @@ def test_index(self) -> None: # Test deleter del page.index - self.assertFalse(hasattr(page, '_index')) + self.assertNotHasAttr(page, '_index') # Test setter with wrong type. with self.assertRaises(TypeError): page.index = 'invalid index' @@ -637,11 +637,10 @@ class TestBasePageMethodsIndexPage(BS4TestCase, BasePageMethodsTestBase): family = 'wikisource' code = 'en' - def setUp(self) -> None: - """Set up test case.""" + def setup_page(self) -> None: + """Set up test page.""" self._page = IndexPage( self.site, 'Index:Popular Science Monthly Volume 1.djvu') - super().setUp() def test_basepage_methods(self) -> None: """Test IndexPage methods inherited from superclass BasePage.""" @@ -657,11 +656,10 @@ class TestLoadRevisionsCachingIndexPage(BS4TestCase, family = 'wikisource' code = 'en' - def setUp(self) -> None: - """Set up test case.""" + def setup_page(self) -> None: + """Set up test page.""" self._page = IndexPage( self.site, 'Index:Popular Science Monthly Volume 1.djvu') - super().setUp() def test_page_text(self) -> None: """Test site.loadrevisions() with Page.text.""" @@ -793,11 +791,6 @@ def test_get_page_and_number(self, key) -> None: self.assertEqual(index_page.get_page_number_from_label(str(label)), num_set) - # Error if label does not exists. - label, num_set = 'dummy label', [] - with self.assertRaises(KeyError): - index_page.get_page_number_from_label('dummy label') - # Test get_page_from_label. for label, page_set in data['get_page']: # Get set of pages from label with label as int or str. @@ -806,10 +799,6 @@ def test_get_page_and_number(self, key) -> None: self.assertEqual(index_page.get_page_from_label(str(label)), page_set) - # Error if label does not exists. - with self.assertRaises(KeyError): - index_page.get_page_from_label('dummy label') - # Test get_page. for n in num_set: p = index_page.get_page(n) @@ -820,6 +809,10 @@ def test_get_page_and_number(self, key) -> None: n = index_page.get_number(p) self.assertEqual(index_page.get_page(n), p) + # Error if label does not exists. + with self.assertRaises(KeyError): + index_page.get_page_number_from_label('dummy label') + def test_page_gen(self, key) -> None: """Test Index page generator.""" data = self.sites[key] diff --git a/tests/protectbot_tests.py b/tests/protectbot_tests.py index faf404eee3..7279aa505b 100755 --- a/tests/protectbot_tests.py +++ b/tests/protectbot_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for scripts/protect.py.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -54,10 +54,11 @@ def test_summary(self) -> None: self.maxDiff = None comment = rev[0].comment - self.assertTrue(comment.startswith( + self.assertStartsWith( + comment, 'Protected "[[User:Sn1per/ProtectTest2]]": Bot: ' 'Protecting all pages from category Pywikibot Protect Test' - )) + ) # the order may change, see T367259 for ptype in ('Edit', 'Move'): self.assertIn(f'[{ptype}=Allow only administrators] (indefinite)', diff --git a/tests/script_tests.py b/tests/script_tests.py index 19e4eedaf4..9a37290fc4 100755 --- a/tests/script_tests.py +++ b/tests/script_tests.py @@ -11,8 +11,10 @@ import sys import unittest from contextlib import suppress -from importlib import import_module +from pathlib import Path +from pywikibot.backports import Iterator +from pywikibot.bot import global_args as pwb_args from pywikibot.tools import has_module from tests import join_root_path, unittest_print from tests.aspects import DefaultSiteTestCase, MetaTestCaseClass, PwbTestCase @@ -28,7 +30,6 @@ # These dependencies are not always the package name which is in setup.py. # Here, the name given to the module which will be imported is required. script_deps = { - 'create_isbn_edition': ['isbnlib', 'unidecode'], 'weblinkchecker': ['memento_client'], } @@ -38,8 +39,6 @@ def check_script_deps(script_name) -> bool: if script_name in script_deps: for package_name in script_deps[script_name]: if not has_module(package_name): - unittest_print(f'{script_name} depends on {package_name},' - " which isn't available") return False return True @@ -51,20 +50,30 @@ def check_script_deps(script_name) -> bool: unrunnable_script_set = set() -def list_scripts(path, exclude=None): - """Return list of scripts in given path.""" +def list_scripts(path: str, exclude: str = '') -> list[str]: + """List script names (without '.py') in a directory. + + :param path: Directory path to search for Python scripts. + :param exclude: Filename (without '.py' extension) to exclude from + the result. Defaults to empty string, meaning no exclusion. + :return: List of script names without the '.py' extension, excluding + the specified file. Files starting with '_' (e.g. __init__.py) + are always excluded. + """ + p = Path(path) return [ - name[0:-3] for name in os.listdir(path) # strip '.py' - if name.endswith('.py') - and not name.startswith('_') # skip __init__.py and _* - and name != exclude + f.stem for f in p.iterdir() + if f.is_file() + and f.suffix == '.py' + and not f.name.startswith('_') + and f.stem != exclude ] -script_list = framework_scripts + list_scripts(scripts_path) +script_list = framework_scripts + list_scripts(scripts_path, + 'create_isbn_edition') script_input = { - 'create_isbn_edition': '\n', 'category_redirect': 'q\nn\n', 'interwiki': 'Test page that should not exist\n', 'misspelling': 'q\n', @@ -87,7 +96,6 @@ def list_scripts(path, exclude=None): 'checkimages', 'clean_sandbox', 'commons_information', - 'create_isbn_edition', 'delinker', 'login', 'misspelling', @@ -140,49 +148,53 @@ def list_scripts(path, exclude=None): } -def collector(loader=unittest.loader.defaultTestLoader): - """Load the default tests. - - .. note:: Raising SkipTest during load_tests will cause the loader - to fallback to its own discover() ordering of unit tests. - """ - if unrunnable_script_set: # pragma: no cover - unittest_print('Skipping execution of unrunnable scripts:\n' - f'{unrunnable_script_set!r}') - - test_pattern = 'tests.script_tests.TestScript{}.test_{}' +def collector() -> Iterator[str]: + """Generate test fully qualified names from test classes.""" + for cls in TestScriptHelp, TestScriptSimulate, TestScriptGenerator: + for name in cls._script_list: + name = '_' + name if name == 'login' else name + yield f'tests.script_tests.{cls.__name__}.test_{name}' - tests = ['_login'] + [name for name in sorted(script_list) - if name != 'login' - and name not in unrunnable_script_set] - test_list = [test_pattern.format('Help', name) for name in tests] - tests = [name for name in tests if name not in failed_dep_script_set] - test_list += [test_pattern.format('Simulate', name) for name in tests] +custom_loader = False - tests = [name for name in tests if name not in auto_run_script_set] - test_list += [test_pattern.format('Generator', name) for name in tests] +def load_tests(loader: unittest.TestLoader = unittest.defaultTestLoader, + standard_tests: unittest.TestSuite | None = None, + pattern: str | None = None) -> unittest.TestSuite: + """Load the default modules and return a TestSuite.""" + global custom_loader + custom_loader = True suite = unittest.TestSuite() - suite.addTests(loader.loadTestsFromNames(test_list)) + suite.addTests(loader.loadTestsFromNames(collector())) return suite -def load_tests(loader=unittest.loader.defaultTestLoader, - tests=None, pattern=None): - """Load the default modules.""" - return collector(loader) +def filter_scripts(excluded: set[str] | None = None, *, + exclude_auto_run: bool = False, + exclude_failed_dep: bool = True) -> list[str]: + """Return a filtered list of script names. + + :param excluded: Scripts to exclude explicitly. + :param exclude_auto_run: If True, remove scripts in + auto_run_script_set. + :param exclude_failed_dep: If True, remove scripts in + failed_dep_script_set. + :return: A list of valid script names in deterministic order. + """ + excluded = excluded or set() + scripts = ['login'] + [ + name for name in sorted(script_list) + if name != 'login' + and name not in unrunnable_script_set + and (not exclude_failed_dep or name not in failed_dep_script_set) + ] -def import_script(script_name: str) -> None: - """Import script for coverage only (T305795).""" - if not ci_test_run: - return # pragma: no cover + if exclude_auto_run: + scripts = [n for n in scripts if n not in auto_run_script_set] - prefix = 'scripts.' - if script_name in framework_scripts: - prefix = 'pywikibot.' + prefix - import_module(prefix + script_name) + return [n for n in scripts if n not in excluded] class ScriptTestMeta(MetaTestCaseClass): @@ -201,7 +213,7 @@ def test_execution(script_name, args=None): def test_script(self) -> None: global_args_msg = \ 'For global options use -help:global or run pwb' - global_args = ['-pwb_close_matches:1'] + global_args = (pwb_args or []) + ['-pwb_close_matches:1'] cmd = [*global_args, script_name, *args] data_in = script_input.get(script_name) @@ -290,34 +302,31 @@ def test_script(self) -> None: arguments = dct['_arguments'] - for script_name in script_list: - import_script(script_name) + if custom_loader: + collected_scripts = dct['_script_list'] + else: + collected_scripts = filter_scripts(exclude_failed_dep=False) + for script in collected_scripts: # force login to be the first, alphabetically, so the login # message does not unexpectedly occur during execution of # another script. - # unrunnable script tests are disabled by default in load_tests() - - if script_name == 'login': - test_name = 'test__login' - else: - test_name = 'test_' + script_name - - cls.add_method(dct, test_name, - test_execution(script_name, arguments.split()), - f'Test running {script_name} {arguments}.') - - if script_name in dct['_expected_failures']: - dct[test_name] = unittest.expectedFailure(dct[test_name]) - elif script_name in dct['_allowed_failures']: - dct[test_name] = unittest.skip( - f'{script_name} is in _allowed_failures set' - )(dct[test_name]) - elif script_name in failed_dep_script_set \ - and arguments == '-simulate': - dct[test_name] = unittest.skip( - f'{script_name} has dependencies; skipping' - )(dct[test_name]) + test = 'test__login' if script == 'login' else 'test_' + script + + cls.add_method(dct, test, + test_execution(script, arguments.split()), + f'Test running {script} {arguments}.') + + if script in dct['_expected_failures']: + dct[test] = unittest.expectedFailure(dct[test]) + elif script in dct['_allowed_failures']: + dct[test] = unittest.skip( + f'{script} is in _allowed_failures set' + )(dct[test]) + elif script in failed_dep_script_set and arguments == '-simulate': + dct[test] = unittest.skip( + f'{script} has dependencies; skipping' + )(dct[test]) return super().__new__(cls, name, bases, dct) @@ -340,6 +349,7 @@ class TestScriptHelp(PwbTestCase, metaclass=ScriptTestMeta): _results = None _skip_results = {} _timeout = False + _script_list = filter_scripts(exclude_failed_dep=False) class TestScriptSimulate(DefaultSiteTestCase, PwbTestCase, @@ -388,6 +398,7 @@ class TestScriptSimulate(DefaultSiteTestCase, PwbTestCase, _results = no_args_expected_results _skip_results = skip_on_results _timeout = auto_run_script_set + _script_list = filter_scripts(_allowed_failures) class TestScriptGenerator(DefaultSiteTestCase, PwbTestCase, @@ -409,7 +420,6 @@ class TestScriptGenerator(DefaultSiteTestCase, PwbTestCase, 'claimit', 'clean_sandbox', 'commonscat', - 'create_isbn_edition', 'data_ingestion', 'delinker', 'djvutext', @@ -419,7 +429,6 @@ class TestScriptGenerator(DefaultSiteTestCase, PwbTestCase, 'interwiki', 'listpages', 'login', - 'misspelling', 'movepages', 'pagefromfile', 'parser_function_count', @@ -456,6 +465,7 @@ class TestScriptGenerator(DefaultSiteTestCase, PwbTestCase, _results = ("Working on 'Foobar'", 'Script terminated successfully') _skip_results = {} _timeout = True + _script_list = filter_scripts(_allowed_failures, exclude_auto_run=True) if __name__ == '__main__': diff --git a/tests/site_detect_tests.py b/tests/site_detect_tests.py index 7cbd7f6c85..28de946be0 100755 --- a/tests/site_detect_tests.py +++ b/tests/site_detect_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Test for site detection.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -61,10 +61,7 @@ class MediaWikiSiteTestCase(SiteDetectionTestCase): standard_version_sites = ( 'http://www.ck-wissen.de/ckwiki/index.php?title=$1', 'http://en.citizendium.org/wiki/$1', - # Server that hosts www.wikichristian.org is unreliable - it - # occasionally responding with 500 error (see: T151368). 'http://www.wikichristian.org/index.php?title=$1', - 'http://kb.mozillazine.org/$1' # 1.40.1 ) non_standard_version_sites = ( @@ -106,7 +103,11 @@ class MediaWikiSiteTestCase(SiteDetectionTestCase): def test_standard_version_sites(self) -> None: """Test detection of standard MediaWiki sites.""" for url in self.standard_version_sites: - with self.subTest(url=urlparse(url).netloc): + nl = urlparse(url).netloc + with self.subTest(url=nl): + if os.getenv('GITHUB_ACTIONS') and nl == 'en.citizendium.org': + self.skipTest('Skip test on github due to T404583') + self.assertSite(url) def test_proofreadwiki(self) -> None: @@ -196,7 +197,7 @@ def fetch(self, url, *args, **kwargs): @PatchingTestCase.patched(pywikibot, 'input') def input(self, question, *args, **kwargs): """Patched version of pywikibot.input.""" - self.assertTrue(question.endswith('username?')) + self.assertEndsWith(question, 'username?') return self.USERNAME @PatchingTestCase.patched(pywikibot, 'Site') @@ -227,7 +228,7 @@ def test_T235768_failure(self) -> None: """ site = MWSite(self._weburl) self.assertIsInstance(site, MWSite) - self.assertTrue(hasattr(site, 'lang')) + self.assertHasAttr(site, 'lang') self.assertEqual(site.lang, self.LANG) diff --git a/tests/site_generators_tests.py b/tests/site_generators_tests.py index 52e729f01d..a5387738a2 100755 --- a/tests/site_generators_tests.py +++ b/tests/site_generators_tests.py @@ -275,7 +275,7 @@ def test_allpages(self) -> None: self.assertGreaterEqual(page.title(), 'Py') for page in mysite.allpages(prefix='Pre', total=5): if self.validate_page(page): - self.assertTrue(page.title().startswith('Pre')) + self.assertStartsWith(page.title(), 'Pre') for page in mysite.allpages(namespace=1, total=5): self.validate_page(page, namespace=1) for page in mysite.allpages(filterredir=True, total=5): @@ -307,20 +307,15 @@ def test_allpages_langlinks_disabled(self) -> None: def test_allpages_pagesize(self) -> None: """Test allpages with page maxsize parameter.""" mysite = self.get_site() + encoding = mysite.encoding() for page in mysite.allpages(minsize=100, total=5): self.assertIsInstance(page, pywikibot.Page) self.assertTrue(page.exists()) - self.assertGreaterEqual(len(page.text.encode(mysite.encoding())), - 100) + self.assertGreaterEqual(len(page.text.encode(encoding)), 100) for page in mysite.allpages(maxsize=200, total=5): self.assertIsInstance(page, pywikibot.Page) self.assertTrue(page.exists()) - if len(page.text.encode(mysite.encoding())) > 200 \ - and mysite.data_repository() == mysite: # pragma: no cover - unittest_print( - f'{page}.text is > 200 bytes while raw JSON is <= 200') - continue - self.assertLessEqual(len(page.text.encode(mysite.encoding())), 200) + self.assertLessEqual(len(page.text.encode(encoding)), 200) def test_allpages_protection(self) -> None: """Test allpages with protect_type parameter.""" @@ -336,57 +331,6 @@ def test_allpages_protection(self) -> None: self.assertIn('edit', page._protection) self.assertIn('sysop', page._protection['edit']) - def test_all_links(self) -> None: - """Test the site.alllinks() method.""" - mysite = self.get_site() - fwd = list(mysite.alllinks(total=10)) - uniq = list(mysite.alllinks(total=10, unique=True)) - - with self.subTest(msg='Test that unique links are in all links'): - self.assertLessEqual(len(fwd), 10) - self.assertLessEqual(len(uniq), len(fwd)) - for link in fwd: - self.assertIsInstance(link, pywikibot.Page) - self.assertIn(link, uniq) - - with self.subTest(msg='Test with start parameter'): - for page in mysite.alllinks(start='Link', total=5): - self.assertIsInstance(page, pywikibot.Page) - self.assertEqual(page.namespace(), 0) - self.assertGreaterEqual(page.title(), 'Link') - - with self.subTest(msg='Test with prefix parameter'): - for page in mysite.alllinks(prefix='Fix', total=5): - self.assertIsInstance(page, pywikibot.Page) - self.assertEqual(page.namespace(), 0) - self.assertTrue( - page.title().startswith('Fix'), - msg=f"{page.title()} does not start with 'Fix'" - ) - - # increase timeout due to T359427/T359425 - # ~ 47s are required on wikidata - config_timeout = pywikibot.config.socket_timeout - pywikibot.config.socket_timeout = (config_timeout[0], 60) - with self.subTest(msg='Test namespace parameter'): - for page in mysite.alllinks(namespace=1, total=5): - self.assertIsInstance(page, pywikibot.Page) - self.assertEqual(page.namespace(), 1) - pywikibot.config.socket_timeout = config_timeout - - with self.subTest(msg='Test with fromids parameter'): - for page in mysite.alllinks(start='From', namespace=4, - fromids=True, total=5): - self.assertIsInstance(page, pywikibot.Page) - self.assertGreaterEqual(page.title(with_ns=False), 'From') - self.assertTrue(hasattr(page, '_fromid')) - - with self.subTest( - msg='Test that Error is raised with unique and fromids'): - errgen = mysite.alllinks(unique=True, fromids=True) - with self.assertRaises(Error): - next(errgen) - def test_all_categories(self) -> None: """Test the site.allcategories() method.""" mysite = self.get_site() @@ -400,8 +344,7 @@ def test_all_categories(self) -> None: self.assertGreaterEqual(cat.title(with_ns=False), 'Abc') for cat in mysite.allcategories(total=5, prefix='Def'): self.assertIsInstance(cat, pywikibot.Category) - self.assertTrue(cat.title(with_ns=False).startswith('Def')) - # Bug T17985 - reverse and start combined; fixed in v 1.14 + self.assertStartsWith(cat.title(with_ns=False), 'Def') for cat in mysite.allcategories(total=5, start='Hij', reverse=True): self.assertIsInstance(cat, pywikibot.Category) self.assertLessEqual(cat.title(with_ns=False), 'Hij') @@ -426,7 +369,7 @@ def test_all_images(self) -> None: for impage in mysite.allimages(prefix='Ch', total=5): self.assertIsInstance(impage, pywikibot.FilePage) self.assertTrue(impage.exists()) - self.assertTrue(impage.title(with_ns=False).startswith('Ch')) + self.assertStartsWith(impage.title(with_ns=False), 'Ch') for impage in mysite.allimages(minsize=100, total=5): self.assertIsInstance(impage, pywikibot.FilePage) self.assertTrue(impage.exists()) @@ -617,7 +560,7 @@ def test_protectedpages_edit_level(self) -> None: """Test protectedpages protection level.""" site = self.get_site() levels = set() - all_levels = site.protection_levels().difference(['']) + all_levels = site.restrictions['levels'].difference(['']) for level in all_levels: if list(site.protectedpages(protect_type='edit', level=level, total=1)): @@ -693,26 +636,15 @@ def test_unconnected(self) -> None: if not site: self.skipTest('Site is not using a Wikibase repository') - pages = list(self.site.unconnected_pages(total=3)) + pages = list(self.site.unconnected_pages(total=3, strict=True)) self.assertLessEqual(len(pages), 3) pattern = (fr'Page \[\[({site.sitename}:|{site.code}:)-1\]\]' r" doesn't exist\.") - found = [] for page in pages: - with self.subTest(page=page): - try: - page.data_item() - except NoPageError as e: - self.assertRegex(str(e), pattern) - else: - found.append(page) - if found: - unittest_print('connection found for ', - ', '.join(str(p) for p in found)) - - # assume that we have at least one unconnected page - self.assertLess(len(found), 3) + with self.subTest(page=page), self.assertRaisesRegex(NoPageError, + pattern): + page.data_item() class TestSiteGeneratorsUsers(DefaultSiteTestCase): @@ -768,7 +700,7 @@ def test_allusers_with_prefix(self) -> None: for user in mysite.allusers(prefix='C', total=5): self.assertIsInstance(user, dict) self.assertIn('name', user) - self.assertTrue(user['name'].startswith('C')) + self.assertStartsWith(user['name'], 'C') self.assertIn('editcount', user) self.assertIn('registration', user) @@ -778,7 +710,7 @@ def test_allusers_with_group(self) -> None: for user in mysite.allusers(prefix='D', group='bot', total=5): self.assertIsInstance(user, dict) self.assertIn('name', user) - self.assertTrue(user['name'].startswith('D')) + self.assertStartsWith(user['name'], 'D') self.assertIn('editcount', user) self.assertIn('registration', user) self.assertIn('groups', user) @@ -1033,28 +965,36 @@ def test_changetype(self) -> None: def test_flags(self) -> None: """Test the site.recentchanges() with boolean flags.""" mysite = self.site - for change in mysite.recentchanges(minor=True, total=5): - self.assertIsInstance(change, dict) - self.assertIn('minor', change) - for change in mysite.recentchanges(minor=False, total=5): - self.assertIsInstance(change, dict) - self.assertNotIn('minor', change) - for change in mysite.recentchanges(bot=True, total=5): - self.assertIsInstance(change, dict) - self.assertIn('bot', change) - for change in mysite.recentchanges(bot=False, total=5): - self.assertIsInstance(change, dict) - self.assertNotIn('bot', change) - for change in mysite.recentchanges(anon=True, total=5): - self.assertIsInstance(change, dict) - for change in mysite.recentchanges(anon=False, total=5): - self.assertIsInstance(change, dict) - for change in mysite.recentchanges(redirect=False, total=5): - self.assertIsInstance(change, dict) - self.assertNotIn('redirect', change) - for change in mysite.recentchanges(redirect=True, total=5): - self.assertIsInstance(change, dict) - self.assertIn('redirect', change) + with self.subTest(minor=True): + for change in mysite.recentchanges(minor=True, total=5): + self.assertIsInstance(change, dict) + self.assertIn('minor', change) + with self.subTest(minor=False): + for change in mysite.recentchanges(minor=False, total=5): + self.assertIsInstance(change, dict) + self.assertNotIn('minor', change) + with self.subTest(bot=True): + for change in mysite.recentchanges(bot=True, total=5): + self.assertIsInstance(change, dict) + self.assertIn('bot', change) + with self.subTest(bot=False): + for change in mysite.recentchanges(bot=False, total=5): + self.assertIsInstance(change, dict) + self.assertNotIn('bot', change) + with self.subTest(anon=True): + for change in mysite.recentchanges(anon=True, total=5): + self.assertIsInstance(change, dict) + with self.subTest(anon=False): + for change in mysite.recentchanges(anon=False, total=5): + self.assertIsInstance(change, dict) + with self.subTest(redirect=True): + for change in mysite.recentchanges(redirect=True, total=5): + self.assertIsInstance(change, dict) + self.assertIn('redirect', change) + with self.subTest(redirect=False): + for change in mysite.recentchanges(redirect=False, total=5): + self.assertIsInstance(change, dict) + self.assertNotIn('redirect', change) def test_tag_filter(self) -> None: """Test the site.recentchanges() with tag filter.""" @@ -1109,7 +1049,7 @@ def test_watched_pages_uncached(self) -> None: """Test the site.watched_pages() method uncached.""" gen = self.site.watched_pages(total=5, force=True) self.assertIsInstance(gen.request, api.Request) - self.assertFalse(issubclass(gen.request_class, api.CachedRequest)) + self.assertNotIsSubclass(gen.request_class, api.CachedRequest) for page in gen: self.assertIsInstance(page, pywikibot.Page) @@ -1188,7 +1128,7 @@ def test_namespaces(self) -> None: namespaces=14, total=5): self.assertIsInstance(contrib, dict) self.assertIn('title', contrib) - self.assertTrue(contrib['title'].startswith(mysite.namespace(14))) + self.assertStartsWith(contrib['title'], mysite.namespace(14)) for contrib in mysite.usercontribs(user=mysite.user(), namespaces=[10, 11], total=5): @@ -1224,7 +1164,7 @@ def test_user_prefix(self) -> None: self.assertIsInstance(contrib, dict) for key in ('user', 'title', 'ns', 'pageid', 'revid'): self.assertIn(key, contrib) - self.assertTrue(contrib['user'].startswith('John')) + self.assertStartsWith(contrib['user'], 'John') def test_user_prefix_range(self) -> None: """Test the site.usercontribs() method.""" @@ -1327,7 +1267,7 @@ def test_namespaces(self) -> None: for data in mysite.alldeletedrevisions(namespaces=14, total=5): self.assertIsInstance(data, dict) self.assertIn('title', data) - self.assertTrue(data['title'].startswith(mysite.namespace(14))) + self.assertStartsWith(data['title'], mysite.namespace(14)) for data in mysite.alldeletedrevisions(user=mysite.user(), namespaces=[10, 11], @@ -1470,7 +1410,7 @@ def test_prefix(self) -> None: title = data['title'] if data['ns'] > 0: *_, title = title.partition(':') - self.assertTrue(title.startswith('John')) + self.assertStartsWith(title, 'John') self.assertIsInstance(data['revisions'], list) for drev in data['revisions']: self.assertIsInstance(drev, dict) @@ -1577,22 +1517,13 @@ def test_users(self) -> None: self.assertIn('missing', user) elif self.site.family.name == 'wikipedia': self.assertNotIn('missing', user) - self.assertEqual(cnt, len(all_users), 'Some test usernames not found') + self.assertLength(all_users, cnt, 'Some test usernames not found') class SiteRandomTestCase(DefaultSiteTestCase): """Test random methods of a site.""" - @classmethod - def setUpClass(cls) -> None: - """Skip test on beta due to T282602.""" - super().setUpClass() - site = cls.get_site() - if site.family.name in ('wpbeta', 'wsbeta'): - cls.skipTest(cls, - f'Skipping test on {site} due to T282602') - def test_unlimited_small_step(self) -> None: """Test site.randompages() continuation. @@ -1691,7 +1622,7 @@ class TestSiteLoadRevisions(TestCase): # Implemented without setUpClass(cls) and global variables as objects # were not completely disposed and recreated but retained 'memory' def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.mysite = self.get_site() self.mainpage = pywikibot.Page(pywikibot.Link('Main Page', @@ -1702,7 +1633,7 @@ def test_loadrevisions_basic(self) -> None: # Load revisions without content self.mysite.loadrevisions(self.mainpage, total=15) self.mysite.loadrevisions(self.mainpage) - self.assertFalse(hasattr(self.mainpage, '_text')) + self.assertNotHasAttr(self.mainpage, '_text') self.assertLength(self.mainpage._revisions, 15) self.assertIn(self.mainpage._revid, self.mainpage._revisions) self.assertIsNone(self.mainpage._revisions[self.mainpage._revid].text) @@ -1712,7 +1643,7 @@ def test_loadrevisions_basic(self) -> None: def test_loadrevisions_content(self) -> None: """Test the site.loadrevisions() method with content=True.""" self.mysite.loadrevisions(self.mainpage, content=True, total=5) - self.assertFalse(hasattr(self.mainpage, '_text')) + self.assertNotHasAttr(self.mainpage, '_text') self.assertIn(self.mainpage._revid, self.mainpage._revisions) self.assertIsNotNone( self.mainpage._revisions[self.mainpage._revid].text) @@ -1824,23 +1755,31 @@ class TestBacklinks(TestCase): cached = True def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.page = pywikibot.Page(self.site, 'File:BoA – Woman.png') - self.backlinks = list(self.page.backlinks(follow_redirects=False, - filter_redirects=True, - total=5)) - self.references = list(self.page.getReferences(follow_redirects=True, - filter_redirects=True, - total=5)) - self.nofollow = list(self.page.getReferences(follow_redirects=False, - filter_redirects=True, - total=5)) + self.backlinks = list( + self.page.backlinks(follow_redirects=False, + filter_redirects=True, + total=5) + ) + self.references = list( + self.page.getReferences(follow_redirects=True, + filter_redirects=True, + with_template_inclusion=False, + total=5) + ) + self.nofollow = list( + self.page.getReferences(follow_redirects=False, + filter_redirects=True, + with_template_inclusion=False, + total=5) + ) def test_backlinks_redirects_length(self) -> None: """Test backlinks redirects length.""" self.assertLength(self.backlinks, 1) - self.assertLength(self.references, 1) + self.assertLength(set(self.references), 1) self.assertLength(self.nofollow, 1) def test_backlinks_redirects_status(self) -> None: @@ -1894,7 +1833,7 @@ def test_filearchive_prefix(self) -> None: gen = self.site.filearchive(prefix='py') self.assertIn('faprefix=py', str(gen.request)) for item in gen: - self.assertTrue(item['name'].startswith('Py')) + self.assertStartsWith(item['name'], 'Py') def test_filearchive_prop(self) -> None: """Test properties.""" @@ -1939,7 +1878,7 @@ class TestLoadPagesFromPageids(DefaultSiteTestCase): cached = True def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.site = self.get_site() mainpage = self.get_mainpage() @@ -1956,9 +1895,9 @@ def test_load_from_pageids_iterable_of_str(self) -> None: self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) self.assertTrue(page.exists()) - self.assertTrue(hasattr(page, '_pageid')) + self.assertHasAttr(page, '_pageid') self.assertIn(page, self.links) - self.assertEqual(count, len(self.links)) + self.assertLength(self.links, count) def test_load_from_pageids_iterable_of_int(self) -> None: """Test basic loading with pageids.""" @@ -1969,9 +1908,9 @@ def test_load_from_pageids_iterable_of_int(self) -> None: self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) self.assertTrue(page.exists()) - self.assertTrue(hasattr(page, '_pageid')) + self.assertHasAttr(page, '_pageid') self.assertIn(page, self.links) - self.assertEqual(count, len(self.links)) + self.assertLength(self.links, count) def test_load_from_pageids_iterable_in_order(self) -> None: """Test loading with pageids is ordered.""" @@ -1982,7 +1921,7 @@ def test_load_from_pageids_iterable_in_order(self) -> None: self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) self.assertTrue(page.exists()) - self.assertTrue(hasattr(page, '_pageid')) + self.assertHasAttr(page, '_pageid') self.assertEqual(page, link) def test_load_from_pageids_iterable_with_duplicate(self) -> None: @@ -1995,9 +1934,9 @@ def test_load_from_pageids_iterable_with_duplicate(self) -> None: self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) self.assertTrue(page.exists()) - self.assertTrue(hasattr(page, '_pageid')) + self.assertHasAttr(page, '_pageid') self.assertIn(page, self.links) - self.assertEqual(count, len(self.links)) + self.assertLength(self.links, count) def test_load_from_pageids_comma_separated(self) -> None: """Test loading from comma-separated pageids.""" @@ -2008,9 +1947,9 @@ def test_load_from_pageids_comma_separated(self) -> None: self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) self.assertTrue(page.exists()) - self.assertTrue(hasattr(page, '_pageid')) + self.assertHasAttr(page, '_pageid') self.assertIn(page, self.links) - self.assertEqual(count, len(self.links)) + self.assertLength(self.links, count) def test_load_from_pageids_pipe_separated(self) -> None: """Test loading from comma-separated pageids.""" @@ -2021,9 +1960,9 @@ def test_load_from_pageids_pipe_separated(self) -> None: self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) self.assertTrue(page.exists()) - self.assertTrue(hasattr(page, '_pageid')) + self.assertHasAttr(page, '_pageid') self.assertIn(page, self.links) - self.assertEqual(count, len(self.links)) + self.assertLength(self.links, count) class TestPagePreloading(DefaultSiteTestCase): @@ -2058,11 +1997,11 @@ def test_pageids(self) -> None: self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) if page.exists(): - self.assertTrue(hasattr(page, '_revid')) + self.assertHasAttr(page, '_revid') self.assertLength(page._revisions, 1) self.assertIn(page._revid, page._revisions) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') if count >= 5: break @@ -2079,13 +2018,14 @@ def test_titles(self) -> None: self.assertEqual(page.pageid, page._pageid) del page._pageid + links.restart() # restart generator for count, page in enumerate(mysite.preloadpages(links), start=1): self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') if count >= 5: break @@ -2100,7 +2040,7 @@ def test_preload_continuation(self) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') if count >= 5: break @@ -2126,7 +2066,7 @@ def test_preload_high_groupsize(self) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') self.assertEqual(count, link_count) def test_preload_low_groupsize(self) -> None: @@ -2151,7 +2091,7 @@ def test_preload_low_groupsize(self) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') self.assertEqual(count, link_count) def test_preload_unexpected_titles_using_pageids(self) -> None: @@ -2177,7 +2117,7 @@ def test_preload_unexpected_titles_using_pageids(self) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') if count >= 5: break @@ -2204,7 +2144,7 @@ def test_preload_unexpected_titles_using_titles(self) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') if count >= 5: break @@ -2243,8 +2183,8 @@ def test_preload_langlinks_normal(self) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) - self.assertTrue(hasattr(page, '_langlinks')) + self.assertNotHasAttr(page, '_pageprops') + self.assertHasAttr(page, '_langlinks') if count >= 5: break @@ -2266,7 +2206,7 @@ def test_preload_langlinks_count(self, output_mock) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) + self.assertNotHasAttr(page, '_pageprops') if pages: self.assertRegex( output_mock.call_args[0][0], r'Retrieving \d pages from ') @@ -2284,8 +2224,8 @@ def test_preload_templates(self) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) - self.assertTrue(hasattr(page, '_templates')) + self.assertNotHasAttr(page, '_pageprops') + self.assertHasAttr(page, '_templates') if count >= 5: break @@ -2303,9 +2243,9 @@ def test_preload_templates_and_langlinks(self) -> None: if page.exists(): self.assertLength(page._revisions, 1) self.assertIsNotNone(page._revisions[page._revid].text) - self.assertFalse(hasattr(page, '_pageprops')) - self.assertTrue(hasattr(page, '_templates')) - self.assertTrue(hasattr(page, '_langlinks')) + self.assertNotHasAttr(page, '_pageprops') + self.assertHasAttr(page, '_templates') + self.assertHasAttr(page, '_langlinks') if count >= 5: break @@ -2316,7 +2256,7 @@ def test_preload_categories(self) -> None: gen = mysite.preloadpages(cats, categories=True) for count, page in enumerate(gen): with self.subTest(page=page.title()): - self.assertTrue(hasattr(page, '_categories')) + self.assertHasAttr(page, '_categories') # content=True will bypass cache self.assertEqual(page._categories, set(page.categories(content=True))) diff --git a/tests/site_tests.py b/tests/site_tests.py index 8b66149a22..87a512a609 100755 --- a/tests/site_tests.py +++ b/tests/site_tests.py @@ -28,8 +28,10 @@ ) from tests.aspects import ( AlteredDefaultSiteTestCase, + DefaultDrySiteTestCase, DefaultSiteTestCase, DeprecationTestCase, + PatchingTestCase, TestCase, WikimediaDefaultSiteTestCase, ) @@ -54,8 +56,7 @@ def test_repr(self) -> None: code = self.site.family.obsolete.get(self.code) or self.code expect = f"Site('{code}', '{self.family}')" reprs = repr(self.site) - self.assertTrue(reprs.endswith(expect), - f'\n{reprs} does not end with {expect}') + self.assertEndsWith(reprs, expect) def test_constructors(self) -> None: """Test cases for site constructors.""" @@ -715,7 +716,7 @@ def test_revdel_file(self) -> None: site.loadimageinfo(fp1, history=True) for v in fp1._file_revisions.values(): if v['timestamp'] == ts1: - self.assertTrue(hasattr(v, 'userhidden')) + self.assertHasAttr(v, 'userhidden') # Multiple revisions site.deleterevs('oldimage', '20210314184415|20210314184430', @@ -726,7 +727,7 @@ def test_revdel_file(self) -> None: site.loadimageinfo(fp2, history=True) for v in fp2._file_revisions.values(): if v['timestamp'] in (ts1, ts2): - self.assertTrue(hasattr(v, 'commenthidden')) + self.assertHasAttr(v, 'commenthidden') # Concurrently show and hide site.deleterevs('oldimage', ['20210314184415', '20210314184430'], @@ -738,9 +739,9 @@ def test_revdel_file(self) -> None: site.loadimageinfo(fp3, history=True) for v in fp3._file_revisions.values(): if v['timestamp'] in (ts1, ts2): - self.assertFalse(hasattr(v, 'commenthidden')) - self.assertFalse(hasattr(v, 'userhidden')) - self.assertFalse(hasattr(v, 'filehidden')) + self.assertNotHasAttr(v, 'commenthidden') + self.assertNotHasAttr(v, 'userhidden') + self.assertNotHasAttr(v, 'filehidden') # Cleanup site.deleterevs('oldimage', [20210314184415, 20210314184430], @@ -784,6 +785,83 @@ def test_delete_oldimage(self) -> None: site.undelete(fp, 'pywikibot unit tests', fileids=[fileid]) +class TestRollbackPage(PatchingTestCase): + + """Test rollbackpage site method.""" + + family = 'wikipedia' + code = 'test' + login = True + + @staticmethod + @PatchingTestCase.patched(pywikibot.data.api.Request, '_simulate') + def _simulate(self, action): + """Patch api.Request._simulate. Note: self is the Request instance.""" + if action == 'rollback': + result = { + 'title': self._params['title'][0].title(), + 'summary': self._params.get('summary', + ['Rollback simulation'])[0], + 'last_revid': 381070, + } + return {action: result} + + if action and config.simulate and self.write: + result = {'result': 'Success', 'nochange': ''} + return {action: result} + + return None + + @classmethod + def setUpClass(cls): + """Use sandbox page for tests.""" + super().setUpClass() + cls.page = pywikibot.Page(cls.site, 'Sandbox') + + def setUp(self): + """Patch has_right method.""" + super().setUp() + self.patch(self.site, 'has_right', lambda right: True) + + def test_missing_rights(self): + """Test missing rollback right.""" + self.patch(self.site, 'has_right', lambda right: False) + with self.assertRaisesRegex( + Error, + rf'User "{self.site.user()}" does not have required user right' + ' "rollback" on site' + ): + self.site.rollbackpage(self.page, pageid=4711) + + def test_exceptions(self): + """Test rollback exceptions.""" + with self.assertRaisesRegex( + ValueError, + "The parameters 'page' and 'pageid' cannot be used together" + ): + self.site.rollbackpage(self.page, pageid=4711) + + with self.assertRaisesRegex( + ValueError, + r"One of parameters 'page' or 'pageid' is required\." + ): + self.site.rollbackpage() + + with self.assertRaisesRegex( + NoPageError, r"Page -1 \(pageid\) doesn't exist\."): + self.site.rollbackpage(pageid=-1) + + def test_rollback_simulation(self): + """Test rollback in simulate mode.""" + result = self.site.rollbackpage(self.page) + self.assertIsInstance(result, dict) + self.assertEqual(result['title'], self.page.title()) + self.assertEqual(result['last_revid'], 381070) + self.assertEqual(result['summary'], 'Rollback simulation') + result = self.site.rollbackpage(self.page, summary='Rollback test') + self.assertEqual(result['summary'], 'Rollback test') + + class TestUsernameInUsers(DefaultSiteTestCase): """Test that the user account can be found in users list.""" @@ -824,10 +902,9 @@ class TestSiteLoadRevisionsCaching(BasePageLoadRevisionsCachingTestBase, """Test site.loadrevisions() caching.""" - def setUp(self) -> None: - """Setup tests.""" + def setup_page(self) -> None: + """Set up test page.""" self._page = self.get_mainpage(force=True) - super().setUp() def test_page_text(self) -> None: """Test site.loadrevisions() with Page.text.""" @@ -1041,28 +1118,23 @@ def test_linktrails(self) -> None: self.assertEqual(site.linktrail(), linktrail) -class TestSingleCodeFamilySite(AlteredDefaultSiteTestCase): +class TestSingleCodeFamilySite(DefaultDrySiteTestCase): """Test single code family sites.""" - sites = { - 'i18n': { - 'family': 'i18n', - 'code': 'i18n', - }, - } + family = 'i18n' + code = 'i18n' def test_twn(self) -> None: """Test translatewiki.net.""" url = 'translatewiki.net' - site = self.get_site('i18n') - self.assertEqual(site.hostname(), url) + site = self.get_site() self.assertEqual(site.code, 'i18n') self.assertIsInstance(site.namespaces, Mapping) self.assertFalse(site.obsolete) - self.assertEqual(site.family.hostname('en'), url) - self.assertEqual(site.family.hostname('i18n'), url) - self.assertEqual(site.family.hostname('translatewiki'), url) + self.assertEqual(site.hostname(), url) + for code in 'en', 'i18n', 'translatewiki': + self.assertEqual(site.family.hostname(code), url) class TestSubdomainFamilySite(TestCase): @@ -1131,7 +1203,7 @@ def test_commons(self) -> None: site2 = pywikibot.Site('beta') self.assertEqual(site2.hostname(), - 'commons.wikimedia.beta.wmflabs.org') + 'commons.wikimedia.beta.wmcloud.org') self.assertEqual(site2.code, 'beta') self.assertFalse(site2.obsolete) @@ -1250,8 +1322,6 @@ def test_get_property_names(self, key) -> None: 'unexpectedUnconnectedPage', 'wikibase-badge-Q17437796', 'wikibase-badge-Q17437798', - 'wikibase-badge-Q17506997', - 'wikibase-badge-Q17580674', 'wikibase-badge-Q70894304', 'wikibase_item', ): diff --git a/tests/siteinfo_tests.py b/tests/siteinfo_tests.py index 577b5a670d..4cb7165ce8 100755 --- a/tests/siteinfo_tests.py +++ b/tests/siteinfo_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for the site module.""" # -# (C) Pywikibot team, 2008-2022 +# (C) Pywikibot team, 2008-2025 # # Distributed under the terms of the MIT license. # @@ -64,7 +64,7 @@ def test_properties(self) -> None: self.assertIn({'ext': 'png'}, fileextensions) # restrictions self.assertIn('restrictions', self.site.siteinfo) - restrictions = self.site.siteinfo.get('restrictions') + restrictions = self.site.restrictions self.assertIsInstance(restrictions, dict) self.assertIn('cascadinglevels', restrictions) diff --git a/tests/tests_tests.py b/tests/tests_tests.py index 08739ee091..fa6ab1e4db 100755 --- a/tests/tests_tests.py +++ b/tests/tests_tests.py @@ -23,7 +23,7 @@ class HttpServerProblemTestCase(TestCase): } } - def test_502(self) -> None: + def test_502(self) -> None: # pragma: no cover """Test that framework is skipping this test due to HTTP status 502.""" self.fail("The test framework should skip this test but it hasn't.") @@ -51,7 +51,7 @@ def test_assert_is_empty(self) -> None: def test_assert_is_empty_fail(self) -> None: """Test assertIsEmpty method failing.""" self.assertIsEmpty(self.seq1) - self.assertIsEmpty(self.seq2) + self.assertIsEmpty(self.seq2) # pragma: no cover def test_assert_is_not_empty(self) -> None: """Test assertIsNotEmpty method.""" @@ -62,7 +62,7 @@ def test_assert_is_not_empty(self) -> None: def test_assert_is_not_empty_fail(self) -> None: """Test that assertIsNotEmpty method may fail.""" self.assertIsNotEmpty([]) - self.assertIsNotEmpty('') + self.assertIsNotEmpty('') # pragma: no cover def test_assert_length(self) -> None: """Test assertLength method.""" @@ -74,8 +74,10 @@ def test_assert_length(self) -> None: def test_assert_length_fail(self) -> None: """Test that assertLength method is failing.""" self.assertLength([], 1) + # no cover: start self.assertLength(self.seq1, 0) self.assertLength(None, self.seq) + # no cover: stop class TestRequireVersionDry(DefaultSiteTestCase): @@ -106,7 +108,7 @@ def method_with_params(self, key) -> None: def method_failing(self) -> None: """Test method for decorator with invalid parameter.""" - self.assertTrue(False, 'should never happen') + self.assertTrue(False, 'should never happen') # pragma: no cover @require_version('>=1.31') def method_succeed(self) -> None: @@ -116,7 +118,7 @@ def method_succeed(self) -> None: @require_version('<1.31') def method_fail(self) -> None: """Test that decorator skips.""" - self.assertTrue(False, 'intentional fail for test') + self.assertTrue(False, 'intentional fail for test') # pragma: no cover def test_unsupported_methods(self) -> None: """Test require_version with unsupported methods.""" diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 9ab1ff1973..0bb7daa23b 100755 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -43,7 +43,7 @@ def setUpClass(cls) -> None: cls.content = file.read_text(encoding='utf-8') def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" self.catresult1 = '[[Category:Cat1]]\n[[Category:Cat2]]\n' super().setUp() @@ -622,7 +622,7 @@ def test_nested_template_regex_match(self) -> None: self.assertIsNone(m['params']) self.assertIsNone(m[2]) self.assertIsNotNone(m['unhandled_depth']) - self.assertTrue(m[0].endswith('foo {{bar}}')) + self.assertEndsWith(m[0], 'foo {{bar}}') class TestDisabledParts(DefaultDrySiteTestCase): @@ -990,7 +990,7 @@ class TestDigitsConversion(TestCase): net = False def test_to_local(self) -> None: - """Test converting Latin digits to local digits.""" + """Test converting ASCII digits to local digits.""" self.assertEqual(textlib.to_local_digits(299792458, 'en'), '299792458') self.assertEqual( textlib.to_local_digits(299792458, 'fa'), '۲۹۹۷۹۲۴۵۸') @@ -1001,20 +1001,20 @@ def test_to_local(self) -> None: textlib.to_local_digits('299792458', 'km'), '២៩៩៧៩២៤៥៨') def test_to_latin(self) -> None: - """Test converting local digits to Latin digits.""" - self.assertEqual(textlib.to_latin_digits('299792458'), '299792458') + """Test converting local digits to ASCII digits.""" + self.assertEqual(textlib.to_ascii_digits('299792458'), '299792458') self.assertEqual( - textlib.to_latin_digits('۲۹۹۷۹۲۴۵۸', 'fa'), '299792458') + textlib.to_ascii_digits('۲۹۹۷۹۲۴۵۸', 'fa'), '299792458') self.assertEqual( - textlib.to_latin_digits('۲۹۹۷۹۲۴۵۸ flash'), '299792458 flash') + textlib.to_ascii_digits('۲۹۹۷۹۲۴۵۸ flash'), '299792458 flash') self.assertEqual( - textlib.to_latin_digits('២៩៩៧៩២៤៥៨', 'km'), '299792458') + textlib.to_ascii_digits('២៩៩៧៩២៤៥៨', 'km'), '299792458') self.assertEqual( - textlib.to_latin_digits('២៩៩៧៩២៤៥៨'), '299792458') + textlib.to_ascii_digits('២៩៩៧៩២៤៥៨'), '299792458') self.assertEqual( - textlib.to_latin_digits('២៩៩៧៩២៤៥៨', ['km', 'en']), '299792458') + textlib.to_ascii_digits('២៩៩៧៩២៤៥៨', ['km', 'en']), '299792458') self.assertEqual( - textlib.to_latin_digits('២៩៩៧៩២៤៥៨', ['en']), '២៩៩៧៩២៤៥៨') + textlib.to_ascii_digits('២៩៩៧៩២៤៥៨', ['en']), '២៩៩៧៩២៤៥៨') class TestReplaceExcept(DefaultDrySiteTestCase): @@ -1309,11 +1309,12 @@ def test_replace_tag_file(self) -> None: 'x', 'y', ['file'], site=self.site), '[[NonFile:y]]') + # No File if filename is missing self.assertEqual( textlib.replaceExcept( '[[File:]]', 'File:', 'NonFile:', ['file'], site=self.site), - '[[File:]]') + '[[NonFile:]]') self.assertEqual( textlib.replaceExcept( @@ -1542,11 +1543,16 @@ def _extract_sections_tests(self, result, header, sections, footer='', self.assertEqual(result.footer, footer) self.assertEqual(result.title, title) self.assertEqual(result, (header, sections, footer)) - for section in result.sections: + for i, section in enumerate(result.sections): self.assertIsInstance(section, tuple) self.assertLength(section, 2) self.assertIsInstance(section.level, int) self.assertEqual(section.title.count('=') // 2, section.level) + self.assertIn(section.heading, result.sections) + count = result.sections.count(section.heading) + self.assertGreaterEqual(count, 1) + if count == 1: + self.assertEqual(result.sections.index(section.heading), i) def test_no_sections_no_footer(self) -> None: """Test for text having no sections or footer.""" @@ -1566,6 +1572,7 @@ def test_with_section_no_footer(self) -> None: '==title==\n' 'content') result = extract_sections(text, self.site) + self.assertEqual(result.sections.index('title'), 0) self._extract_sections_tests( result, 'text\n\n', [('==title==', '\ncontent')]) @@ -1601,6 +1608,11 @@ def test_with_h4_and_h2_sections(self) -> None: '==title 2==\n' 'content') result = extract_sections(text, self.site) + self.assertEqual(result.sections.index('title'), 0) + self.assertEqual(result.sections.index(('title', 4)), 0) + with self.assertRaisesRegex(ValueError, + r"\('title', 2\) not found in Section"): + result.sections.index(('title', 2)) self._extract_sections_tests( result, 'text\n\n', @@ -1676,6 +1688,38 @@ def test_title(self) -> None: title='Pywikibot' ) + def test_index(self) -> None: + """Test index behaviour of SectionList.""" + text = """ += Intro = +== History == +== Usage == +=== Details === += References = +""" + result = extract_sections(text, self.site) + self._extract_sections_tests(result, '\n', [ + ('= Intro =', '\n'), + ('== History ==', '\n'), + ('== Usage ==', '\n'), + ('=== Details ===', '\n'), + ('= References =', '\n'), + ]) + sections = result.sections + self.assertIsInstance(sections, textlib.SectionList) + self.assertEqual(sections.index('Details'), 3) + self.assertEqual(sections.index('Details', 3), 3) + self.assertEqual(sections.index(sections[2]), 2) + self.assertEqual(sections.index('Intro', -10, 3), 0) + header = 'Details', 2 + pattern = re.escape(f'{header!r} not found in Section headings/levels') + with self.assertRaisesRegex(ValueError, pattern): + sections.index(header) + header = 'Unknown' + with self.assertRaisesRegex( + ValueError, f'{header!r} not found in Section heading'): + sections.index(header) + if __name__ == '__main__': with suppress(SystemExit): diff --git a/tests/titletranslate_tests.py b/tests/titletranslate_tests.py index 1eb8be5e89..eefc989eac 100755 --- a/tests/titletranslate_tests.py +++ b/tests/titletranslate_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for titletranslate module.""" # -# (C) Pywikibot team, 2022 +# (C) Pywikibot team, 2022-2025 # # Distributed under the terms of the MIT license. # @@ -38,6 +38,14 @@ def test_translate(self, key) -> None: result = translate(page=self.get_mainpage(site), auto=False, hints=['5:', 'nl,en,zh'], site=site) self.assertLength(result, 6) + result = translate(page=self.get_mainpage(site)) + self.assertIsEmpty(result) + result = translate(page=self.get_mainpage(site), hints=['nl']) + self.assertLength(result, 1) + with self.assertRaisesRegex(RuntimeError, + 'Either page or site parameter must be ' + r'given with translate\(\)'): + translate() if __name__ == '__main__': diff --git a/tests/tools_deprecate_tests.py b/tests/tools_deprecate_tests.py index 728bb6f7c2..776a51d393 100755 --- a/tests/tools_deprecate_tests.py +++ b/tests/tools_deprecate_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for deprecation tools.""" # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -15,9 +15,9 @@ PYTHON_VERSION, add_full_name, deprecate_arg, - deprecate_positionals, deprecated, deprecated_args, + deprecated_signature, remove_last_args, ) from tests.aspects import DeprecationTestCase @@ -76,7 +76,6 @@ def deprecated_func(foo=None): @deprecated() def deprecated_func_docstring(foo=None): """DEPRECATED. Deprecated function.""" - return foo @deprecated @@ -88,7 +87,6 @@ def deprecated_func2(foo=None): @deprecated def deprecated_func2_docstring(foo=None): """DEPRECATED, don't use this. Deprecated function.""" - return foo @deprecated(instead='baz') @@ -118,7 +116,6 @@ def deprecated_func_arg(foo=None): @deprecated def deprecated_func_docstring_arg(foo=None): """:param foo: Foo. DEPRECATED.""" - return foo @deprecated @@ -127,7 +124,6 @@ def deprecated_func_docstring_arg2(foo=None): :param foo: Foo. DEPRECATED. """ - return foo @deprecated_args(bah='foo') @@ -154,9 +150,9 @@ def deprecated_all2(foo): return foo -@deprecate_positionals() -def positionals_test_function(foo: str, *, - bar: int, baz: str = '') -> tuple[int, str]: +@deprecated_signature() +def positionals_test_function(foo: str, /, *, + bar: int, baz: str = '') -> tuple[str, int]: """Deprecating positional parameters.""" return foo + baz, bar ** 2 @@ -240,9 +236,9 @@ def deprecated_all2(self, foo): """Deprecating last positional parameter.""" return foo - @deprecate_positionals() - def test_method(self, foo: str, *, - bar: int = 5, baz: str = '') -> tuple[int, str]: + @deprecated_signature() + def test_method(self, foo: str, /, *, + bar: int = 5, baz: str = '') -> tuple[str, int]: """Deprecating positional parameters.""" return foo + baz, bar ** 2 @@ -613,81 +609,94 @@ def test_method_remove_last_args(self) -> None: " The value(s) provided for 'bar' have been dropped." ) - def test_deprecate_positionals(self) -> None: - """Test deprecation of positional parameters.""" - msg = ('Passing {param} as positional argument(s) to {func}() is ' - 'deprecated; use keyword arguments like {instead} instead.') + def test_deprecated_signature(self) -> None: + """Test deprecation of parameters signature.""" + msg1 = ('Passing {param} as positional argument(s) to {func}() is ' + 'deprecated; use keyword arguments like {instead} instead.') + msg2 = ( + 'Passing positional-only arguments as keywords to {qual}(): ' + 'foo is deprecated; ' + "use positional arguments like {func}('Pywiki') instead." + ) f = DeprecatedMethodClass().test_method - func = 'DeprecatedMethodClass.test_method' + qual = f.__qualname__ + func = f.__name__ with self.subTest(test=1): rv1, rv2 = f('Pywiki', 1, 'bot') self.assertEqual(rv1, 'Pywikibot') self.assertEqual(rv2, 1) - self.assertOneDeprecation(msg.format(param="'bar', 'baz'", - func=func, - instead="bar=1, baz='bot'")) + self.assertOneDeprecation(msg1.format(param="'bar', 'baz'", + func=qual, + instead="bar=1, baz='bot'")) with self.subTest(test=2): rv1, rv2 = f('Pywiki', 2) self.assertEqual(rv1, 'Pywiki') self.assertEqual(rv2, 4) - self.assertOneDeprecation(msg.format(param="'bar'", - func=func, - instead='bar=2')) + self.assertOneDeprecation(msg1.format(param="'bar'", + func=qual, + instead='bar=2')) with self.subTest(test=3): rv1, rv2 = f('Pywiki', 3, baz='bot') self.assertEqual(rv1, 'Pywikibot') self.assertEqual(rv2, 9) - self.assertOneDeprecation(msg.format(param="'bar'", - func=func, - instead='bar=3')) + self.assertOneDeprecation(msg1.format(param="'bar'", + func=qual, + instead='bar=3')) with self.subTest(test=4): - rv1, rv2 = f('Pywiki', bar=4) + rv1, rv2 = f(foo='Pywiki') self.assertEqual(rv1, 'Pywiki') - self.assertEqual(rv2, 16) - self.assertNoDeprecation() + self.assertEqual(rv2, 25) + self.assertOneDeprecation(msg2.format(qual=qual, func=func)) with self.subTest(test=5): - rv1, rv2 = f(foo='Pywiki') + rv1, rv2 = f('Pywiki', bar=5) self.assertEqual(rv1, 'Pywiki') self.assertEqual(rv2, 25) self.assertNoDeprecation() f = positionals_test_function - func = 'positionals_test_function' + func = f.__name__ + qual = f.__qualname__ - with self.subTest(test=6): + with self.subTest(test=1): rv1, rv2 = f('Pywiki', 6, 'bot') self.assertEqual(rv1, 'Pywikibot') self.assertEqual(rv2, 36) - self.assertOneDeprecation(msg.format(param="'bar', 'baz'", - func=func, - instead="bar=6, baz='bot'")) + self.assertOneDeprecation(msg1.format(param="'bar', 'baz'", + func=qual, + instead="bar=6, baz='bot'")) with self.subTest(test=7): rv1, rv2 = f('Pywiki', 7) self.assertEqual(rv1, 'Pywiki') self.assertEqual(rv2, 49) - self.assertOneDeprecation(msg.format(param="'bar'", - func=func, - instead='bar=7')) + self.assertOneDeprecation(msg1.format(param="'bar'", + func=qual, + instead='bar=7')) with self.subTest(test=8): rv1, rv2 = f('Pywiki', 8, baz='bot') self.assertEqual(rv1, 'Pywikibot') self.assertEqual(rv2, 64) - self.assertOneDeprecation(msg.format(param="'bar'", - func=func, - instead='bar=8')) + self.assertOneDeprecation(msg1.format(param="'bar'", + func=qual, + instead='bar=8')) with self.subTest(test=9): - rv1, rv2 = f('Pywiki', bar=9) + rv1, rv2 = f(foo='Pywiki', bar=9) self.assertEqual(rv1, 'Pywiki') self.assertEqual(rv2, 81) + self.assertOneDeprecation(msg2.format(qual=qual, func=func)) + + with self.subTest(test=10): + rv1, rv2 = f('Pywiki', bar=10) + self.assertEqual(rv1, 'Pywiki') + self.assertEqual(rv2, 100) self.assertNoDeprecation() def test_remove_last_args_invalid(self) -> None: diff --git a/tests/tools_tests.py b/tests/tools_tests.py index b3f578a53a..533ee25f8d 100755 --- a/tests/tools_tests.py +++ b/tests/tools_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Test tools package alone which don't fit into other tests.""" # -# (C) Pywikibot team, 2015-2024 +# (C) Pywikibot team, 2015-2025 # # Distributed under the terms of the MIT license. from __future__ import annotations @@ -1016,7 +1016,7 @@ class TestTinyCache(TestCase): net = False def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" self.foo = DecoratedMethods() super().setUp() @@ -1024,11 +1024,11 @@ def test_cached(self) -> None: """Test for cached decorator.""" self.assertEqual(self.foo.foo(), 'foo') # check computed value self.assertEqual(self.foo.read, 1) - self.assertTrue(hasattr(self.foo, '_foo')) + self.assertHasAttr(self.foo, '_foo') self.assertEqual(self.foo.foo(), 'foo') # check cached value self.assertEqual(self.foo.read, 1) # bar() was called only once del self.foo._foo - self.assertFalse(hasattr(self.foo, '_foo')) + self.assertNotHasAttr(self.foo, '_foo') self.assertEqual(self.foo.foo(), 'foo') # check computed value self.assertEqual(self.foo.__doc__, 'Test class to verify cached decorator.') @@ -1038,7 +1038,7 @@ def test_cached_property(self) -> None: """Test for cached property decorator.""" self.assertEqual(self.foo.bar, 'bar') self.assertEqual(self.foo.read, 1) - self.assertTrue(hasattr(self.foo, '_bar')) + self.assertHasAttr(self.foo, '_bar') self.assertEqual(self.foo.bar, 'bar') self.assertEqual(self.foo.read, 1) @@ -1054,7 +1054,7 @@ def test_cached_with_force(self) -> None: """Test for cached decorator with force enabled.""" self.assertEqual(self.foo.quux(), 'quux') self.assertEqual(self.foo.read, 1) - self.assertTrue(hasattr(self.foo, '_quux')) + self.assertHasAttr(self.foo, '_quux') self.assertEqual(self.foo.quux(force=True), 'quux') self.assertEqual(self.foo.read, 2) @@ -1063,7 +1063,7 @@ def test_cached_with_argse(self) -> None: self.assertEqual(self.foo.method_with_args(force=False), 'method_with_args') self.assertEqual(self.foo.read, 1) - self.assertTrue(hasattr(self.foo, '_method_with_args')) + self.assertHasAttr(self.foo, '_method_with_args') with self.assertRaises(TypeError): self.foo.method_with_args(True) with self.assertRaises(TypeError): diff --git a/tests/ui_tests.py b/tests/ui_tests.py index a7c1b4ef0c..fabe47149b 100755 --- a/tests/ui_tests.py +++ b/tests/ui_tests.py @@ -10,9 +10,12 @@ import io import logging import os +import platform import unittest -from contextlib import redirect_stdout, suppress +from contextlib import nullcontext, redirect_stdout, suppress +from functools import partial from typing import NoReturn +from unicodedata import normalize from unittest.mock import patch import pywikibot @@ -26,12 +29,17 @@ VERBOSE, WARNING, ) +from pywikibot.tools import suppress_warnings from pywikibot.userinterfaces import ( terminal_interface_base, terminal_interface_unix, terminal_interface_win32, ) -from pywikibot.userinterfaces.transliteration import NON_LATIN_DIGITS, _trans +from pywikibot.userinterfaces.transliteration import ( + NON_ASCII_DIGITS, + Transliterator, + _trans, +) from tests.aspects import TestCase, TestCaseBase @@ -49,7 +57,7 @@ class UITestCase(TestCaseBase): net = False def setUp(self) -> None: - """Setup test. + """Set up test. Here we patch standard input, output, and errors, essentially redirecting to `StringIO` streams. @@ -185,10 +193,7 @@ def test_exception_tb(self) -> None: self.assertEqual(stderrlines[1], 'Traceback (most recent call last):') self.assertEqual(stderrlines[3], " raise ExceptionTestError('Testing Exception')") - - end_str = ': Testing Exception' - self.assertTrue(stderrlines[-1].endswith(end_str), - f'\n{stderrlines[-1]!r} does not end with {end_str!r}') + self.assertEndsWith(stderrlines[-1], ': Testing Exception') class TestTerminalInput(UITestCase): @@ -208,13 +213,21 @@ def testInput(self) -> None: self.assertEqual(returned, 'input to read') def test_input_yn(self) -> None: - self.strin.write('\n') - self.strin.seek(0) - returned = pywikibot.input_yn('question', False, automatic_quit=False) - - self.assertEqual(self.strout.getvalue(), '') - self.assertEqual(self.strerr.getvalue(), 'question ([y]es, [N]o): ') - self.assertFalse(returned) + if platform.python_implementation() == 'PyPy': + context = suppress_warnings(r'subprocess \d+ is still running', + ResourceWarning) + else: + context = nullcontext() + with context: + self.strin.write('\n') + self.strin.seek(0) + returned = pywikibot.input_yn('question', False, + automatic_quit=False) + + self.assertEqual(self.strout.getvalue(), '') + self.assertEqual(self.strerr.getvalue(), + 'question ([y]es, [N]o): ') + self.assertFalse(returned) def _call_input_choice(self): rv = pywikibot.input_choice( @@ -245,12 +258,18 @@ def testInputChoiceCapital(self) -> None: self.assertEqual(returned, 'n') def testInputChoiceNonCapital(self) -> None: - self.strin.write('n\n') - self.strin.seek(0) - returned = self._call_input_choice() - - self.assertEqual(self.strerr.getvalue(), self.input_choice_output) - self.assertEqual(returned, 'n') + if platform.python_implementation() == 'PyPy': + context = suppress_warnings(r'subprocess \d+ is still running', + ResourceWarning) + else: + context = nullcontext() + with context: + self.strin.write('n\n') + self.strin.seek(0) + returned = self._call_input_choice() + + self.assertEqual(self.strerr.getvalue(), self.input_choice_output) + self.assertEqual(returned, 'n') def testInputChoiceIncorrectAnswer(self) -> None: self.strin.write('X\nN\n') @@ -353,19 +372,28 @@ def testOutputTransliteratedUnicodeText(self) -> None: '\x1b[93mu\x1b[0m\x1b[93me\x1b[0m\x1b[93mo\x1b[0m\n') -class TestTransliterationTable(TestCase): +class TestTransliteration(TestCase): """Test transliteration table.""" net = False - def test_latin_digits(self) -> None: - """Test that non latin digits are in transliteration table.""" - for lang, digits in NON_LATIN_DIGITS.items(): + @classmethod + def setUpClass(cls) -> None: + """Set up Transliterator function.""" + trans = Transliterator('ascii') + cls.t = staticmethod(partial(trans.transliterate, prev='P')) + + def test_ascii_digits(self) -> None: + """Test that non ascii digits are in transliteration table.""" + for lang, digits in NON_ASCII_DIGITS.items(): with self.subTest(lang=lang): - for char in digits: + for i, char in enumerate(digits): + self.assertTrue(char.isdigit()) + self.assertFalse(char.isascii()) self.assertIn(char, _trans, f'{char!r} not in transliteration table') + self.assertEqual(self.t(char), str(i)) def test_transliteration_table(self) -> None: """Test transliteration table consistency.""" @@ -373,6 +401,16 @@ def test_transliteration_table(self) -> None: with self.subTest(): self.assertNotEqual(k, v) + def test_transliterator(self) -> None: + """Test Transliterator.""" + for char in 'äöü': + self.assertEqual(self.t(char), normalize('NFD', char)[0] + 'e') + self.assertEqual(self.t('1'), '?') + self.assertEqual(self.t('◌'), 'P') + self.assertEqual(self.t('ッ'), '?') + self.assertEqual(self.t('仝'), 'P') + self.assertEqual(self.t('ຫ'), 'h') + # TODO: add tests for background colors. class FakeUITest(TestCase): diff --git a/tests/upload_tests.py b/tests/upload_tests.py index 03c85a476a..96e8043ca7 100755 --- a/tests/upload_tests.py +++ b/tests/upload_tests.py @@ -4,7 +4,7 @@ These tests write to the wiki. """ # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -64,13 +64,13 @@ def warn_callback(warnings) -> None: # First upload the warning with warnings enabled page = pywikibot.FilePage(self.site, 'MP_sounds-pwb.png') - self.assertFalse(hasattr(self, '_file_key')) + self.assertNotHasAttr(self, '_file_key') self.site.upload(page, source_filename=self.sounds_png, comment='pywikibot test', chunk_size=chunk_size, ignore_warnings=warn_callback) # Check that the warning happened and it's cached - self.assertTrue(hasattr(self, '_file_key')) + self.assertHasAttr(self, '_file_key') self.assertIs(self._offset, True) self.assertRegex(self._file_key, r'[0-9a-z]+.[0-9a-z]+.\d+.png') self._verify_stash() @@ -100,9 +100,7 @@ def _test_continue_filekey(self, chunk_size) -> None: # Check if it's still cached with self.assertAPIError('siiinvalidsessiondata') as cm: self.site.stash_info(self._file_key) - self.assertTrue(cm.exception.info.startswith('File not found'), - f'info ({cm.exception.info}) did not start with ' - '"File not found"') + self.assertStartsWith(cm.exception.info, 'File not found') @unittest.expectedFailure # T367314 def test_continue_filekey_once(self) -> None: diff --git a/tests/utils.py b/tests/utils.py index 5bf389eda4..bba040aa44 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -9,9 +9,11 @@ import inspect import os import sys +import tempfile import unittest import warnings from contextlib import contextmanager, suppress +from pathlib import Path from subprocess import PIPE, Popen, TimeoutExpired from typing import Any, NoReturn @@ -23,6 +25,7 @@ from pywikibot.exceptions import APIError from pywikibot.login import LoginStatus from pywikibot.site import Namespace +from pywikibot.tools import PYTHON_VERSION from pywikibot.tools.collections import EMPTY_DEFAULT from tests import _pwb_py @@ -259,16 +262,21 @@ def __setitem__(self, key, value) -> None: self._cache[key] = (value, False) def get(self, key, get_default=True, cache=True, expiry=False): - """Return dry data.""" + """Return dry cached data or default value.""" # Default values are always expired, so only expiry=False doesn't force # a reload force = expiry is not False - if not force and key in self._cache: - loaded = self._cache[key] - if not loaded[1] and not get_default: + if not force and (key in self._cache or 'general' in self._cache): + try: + value, is_default = self._cache[key] + except KeyError: + value, is_default = self._cache['general'] + value = value[key] + + if not is_default and not get_default: raise KeyError(key) - return loaded[0] + return value if get_default: default = EMPTY_DEFAULT @@ -341,7 +349,7 @@ def __init__(self, code, fam, user) -> None: self._siteinfo._cache['case'] = ( 'case-sensitive' if self.family.name == 'wiktionary' else 'first-letter', True) - self._siteinfo._cache['mainpage'] = 'Main Page' + self._siteinfo._cache['mainpage'] = ('Main Page', True) extensions = [] if self.family.name == 'wikisource': extensions.append({'name': 'ProofreadPage'}) @@ -387,7 +395,7 @@ def image_repository(self): def data_repository(self): """Return Site object for data repository e.g. Wikidata.""" - if self.hostname().endswith('.beta.wmflabs.org'): + if self.hostname().endswith('.beta.wmcloud.org'): # TODO: Use definition for beta cluster's wikidata code, fam = None, None fam_name = self.hostname().split('.')[-4] @@ -467,6 +475,10 @@ def execute(command: list[str], *, data_in=None, timeout=None): :param command: executable to run and arguments to use """ + if PYTHON_VERSION < (3, 9): + command.insert(1, '-W ignore::FutureWarning:pwb:46') + command.insert(1, '-W ignore::FutureWarning:__main__:46') + env = os.environ.copy() # Prevent output by test package; e.g. 'max_retries reduced from x to y' @@ -515,22 +527,52 @@ def execute_pwb(args: list[str], *, the *error* parameter was removed. .. versionchanged:: 9.1 parameters except *args* are keyword only. + .. versionchanged:: 10.4 + coverage is used if running github actions and a temporary file + is used for overrides. :param args: list of arguments for pwb.py :param overrides: mapping of pywikibot symbols to test replacements """ + tmp_path: Path | None = None command = [sys.executable] + use_coverage = os.environ.get('GITHUB_ACTIONS') + + if use_coverage: + # Test running and coverage is installed, + # enable coverage with subprocess + with suppress(ModuleNotFoundError): + import coverage # noqa: F401 + command.extend(['-m', 'coverage', 'run', '--parallel-mode']) if overrides: - command.append('-c') - overrides = '; '.join( - f'{key} = {value}' for key, value in overrides.items()) - command.append( - f'import pwb; import pywikibot; {overrides}; pwb.main()') + override_code = 'import pwb, pywikibot\n' + override_code += '\n'.join(f'{k} = {v}' for k, v in overrides.items()) + override_code += '\npwb.main()' + + if use_coverage: + # Write overrides in temporary file + with tempfile.NamedTemporaryFile( + 'w', suffix='.py', delete=False) as f: + f.write(override_code) + tmp_path = Path(f.name) + command.append(f.name) + else: + command.extend(['-c', override_code]) + else: command.append(_pwb_py) - return execute(command=command + args, data_in=data_in, timeout=timeout) + try: + # Run subprocess + result = execute( + command=command + args, data_in=data_in, timeout=timeout) + finally: + # delete temporary file if created + if tmp_path and tmp_path.exists(): + tmp_path.unlink() + + return result @contextmanager diff --git a/tests/wbtypes_tests.py b/tests/wbtypes_tests.py index a321f9211f..e1e0a47f7d 100755 --- a/tests/wbtypes_tests.py +++ b/tests/wbtypes_tests.py @@ -335,12 +335,21 @@ def test_WbTime_normalization(self) -> None: self.assertNotEqual(t11, t12) self.assertEqual(t11_normalized, t12_normalized) self.assertEqual(t13.normalize().timezone, -300) + # test _normalize handler functions + self.assertEqual(pywikibot.WbTime._normalize_millennium(1301), 2000) + self.assertEqual(pywikibot.WbTime._normalize_millennium(-1301), -2000) + self.assertEqual(pywikibot.WbTime._normalize_century(1301), 1400) + self.assertEqual(pywikibot.WbTime._normalize_century(-1301), -1400) + self.assertEqual(pywikibot.WbTime._normalize_decade(1301), 1300) + self.assertEqual(pywikibot.WbTime._normalize_decade(-1301), -1300) + self.assertEqual( + pywikibot.WbTime._normalize_power_of_ten(123456, 7), 123500) + self.assertEqual( + pywikibot.WbTime._normalize_power_of_ten(-987654, 3), -1000000) def test_WbTime_normalization_very_low_precision(self) -> None: """Test WbTime normalization with very low precision.""" repo = self.get_repo() - # flake8 is being annoying, so to reduce line length, I'll make - # some aliases here year_10000 = pywikibot.WbTime.PRECISION['10000'] year_100000 = pywikibot.WbTime.PRECISION['100000'] year_1000000 = pywikibot.WbTime.PRECISION['1000000'] @@ -423,15 +432,18 @@ def test_WbTime_timestamp(self) -> None: def test_WbTime_errors(self) -> None: """Test WbTime precision errors.""" repo = self.get_repo() - regex = r'^no year given$' - with self.assertRaisesRegex(ValueError, regex): + regex = '^year must be an int, not NoneType$' + with self.assertRaisesRegex(TypeError, regex): + pywikibot.WbTime(None, site=repo, precision=15) + regex = "missing 1 required positional argument: 'year'" + with self.assertRaisesRegex(TypeError, regex): pywikibot.WbTime(site=repo, precision=15) - with self.assertRaisesRegex(ValueError, regex): + with self.assertRaisesRegex(TypeError, regex): pywikibot.WbTime(site=repo, precision='invalid_precision') - regex = r'^Invalid precision: "15"$' + regex = '^Invalid precision: "15"$' with self.assertRaisesRegex(ValueError, regex): pywikibot.WbTime(site=repo, year=2020, precision=15) - regex = r'^Invalid precision: "invalid_precision"$' + regex = '^Invalid precision: "invalid_precision"$' with self.assertRaisesRegex(ValueError, regex): pywikibot.WbTime(site=repo, year=2020, precision='invalid_precision') @@ -460,12 +472,14 @@ def test_comparison(self) -> None: self.assertEqual(t2.second, 0) self.assertEqual(t1.toTimestr(), '+00000002010-01-01T12:43:00Z') self.assertEqual(t2.toTimestr(), '-00000002005-01-01T16:45:00Z') - self.assertRaises(ValueError, pywikibot.WbTime, site=repo, - precision=15) - self.assertRaises(ValueError, pywikibot.WbTime, site=repo, - precision='invalid_precision') + with self.assertRaisesRegex(ValueError, 'Invalid precision: "15"'): + pywikibot.WbTime(0, site=repo, precision=15) + with self.assertRaisesRegex(ValueError, + 'Invalid precision: "invalid_precision"'): + pywikibot.WbTime(0, site=repo, precision='invalid_precision') self.assertIsInstance(t1.toTimestamp(), pywikibot.Timestamp) - self.assertRaises(ValueError, t2.toTimestamp) + with self.assertRaisesRegex(ValueError, 'BC dates.*Timestamp'): + t2.toTimestamp() def test_comparison_types(self) -> None: """Test WbTime comparison with different types.""" @@ -473,10 +487,14 @@ def test_comparison_types(self) -> None: t1 = pywikibot.WbTime(site=repo, year=2010, hour=12, minute=43) t2 = pywikibot.WbTime(site=repo, year=-2005, hour=16, minute=45) self.assertGreater(t1, t2) - self.assertRaises(TypeError, operator.lt, t1, 5) - self.assertRaises(TypeError, operator.gt, t1, 5) - self.assertRaises(TypeError, operator.le, t1, 5) - self.assertRaises(TypeError, operator.ge, t1, 5) + with self.assertRaisesRegex(TypeError, 'not supported'): + operator.lt(t1, 5) + with self.assertRaisesRegex(TypeError, 'not supported'): + operator.gt(t1, 5) + with self.assertRaisesRegex(TypeError, 'not supported'): + operator.le(t1, 5) + with self.assertRaisesRegex(TypeError, 'not supported'): + operator.ge(t1, 5) def test_comparison_timezones(self) -> None: """Test comparisons with timezones.""" @@ -621,18 +639,20 @@ def test_WbQuantity_string(self) -> None: def test_WbQuantity_formatting_bound(self) -> None: """Test WbQuantity formatting with bounds.""" repo = self.get_repo() - q = pywikibot.WbQuantity(amount='0.044405586', error='0', site=repo) + amount = '0.044405586' + repr_amount = repr(Decimal(amount)) + q = pywikibot.WbQuantity(amount=amount, error='0', site=repo) self.assertEqual(str(q), - '{{\n' - ' "amount": "+{val}",\n' - ' "lowerBound": "+{val}",\n' - ' "unit": "1",\n' - ' "upperBound": "+{val}"\n' - '}}'.format(val='0.044405586')) + f'{{\n' + f' "amount": "+{amount}",\n' + f' "lowerBound": "+{amount}",\n' + f' "unit": "1",\n' + f' "upperBound": "+{amount}"\n' + f'}}') self.assertEqual(repr(q), - 'WbQuantity(amount={val}, ' - 'upperBound={val}, lowerBound={val}, ' - 'unit=1)'.format(val='0.044405586')) + f'WbQuantity(amount={repr_amount}, ' + f'upperBound={repr_amount}, ' + f"lowerBound={repr_amount}, unit='1')") def test_WbQuantity_self_equality(self) -> None: """Test WbQuantity equality.""" @@ -704,18 +724,19 @@ def test_WbQuantity_unbound(self) -> None: def test_WbQuantity_formatting_unbound(self) -> None: """Test WbQuantity formatting without bounds.""" - q = pywikibot.WbQuantity(amount='0.044405586', site=self.repo) + amount = '0.044405586' + q = pywikibot.WbQuantity(amount=amount, site=self.repo) self.assertEqual(str(q), - '{{\n' - ' "amount": "+{val}",\n' - ' "lowerBound": null,\n' - ' "unit": "1",\n' - ' "upperBound": null\n' - '}}'.format(val='0.044405586')) + f'{{\n' + f' "amount": "+{amount}",\n' + f' "lowerBound": null,\n' + f' "unit": "1",\n' + f' "upperBound": null\n' + f'}}') self.assertEqual(repr(q), - 'WbQuantity(amount={val}, ' - 'upperBound=None, lowerBound=None, ' - 'unit=1)'.format(val='0.044405586')) + f'WbQuantity(amount={Decimal(amount)!r}, ' + f'upperBound=None, lowerBound=None, ' + f"unit='1')") def test_WbQuantity_fromWikibase_unbound(self) -> None: """Test WbQuantity.fromWikibase() instantiating without bounds.""" @@ -837,7 +858,7 @@ class TestWbGeoShapeNonDry(WbRepresentationTestCase): """ def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" self.commons = pywikibot.Site('commons') self.page = Page(self.commons, 'Data:Lyngby Hovedgade.map') super().setUp() @@ -912,7 +933,7 @@ class TestWbTabularDataNonDry(WbRepresentationTestCase): """ def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" self.commons = pywikibot.Site('commons') self.page = Page(self.commons, 'Data:Bea.gov/GDP by state.tab') super().setUp() diff --git a/tests/weblinkchecker_tests.py b/tests/weblinkchecker_tests.py new file mode 100755 index 0000000000..5a1b9a5146 --- /dev/null +++ b/tests/weblinkchecker_tests.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +"""Tests for the weblinkchecker script.""" +# +# (C) Pywikibot team, 2025 +# +# Distributed under the terms of the MIT license. +# +from __future__ import annotations + +from contextlib import suppress + +import pywikibot +from scripts.weblinkchecker import WeblinkCheckerRobot +from tests.aspects import TestCase, unittest + + +class TestWeblinkchecker(TestCase): + + """Test cases for weblinkchecker.""" + + family = 'wikipedia' + code = 'test' + + def test_different_uri_schemes(self) -> None: + """Test different uri schemes on test page.""" + site = self.get_site('wikipedia:test') + page = pywikibot.Page(site, 'User:DerIch27/weblink test') + generator = [page] + bot = WeblinkCheckerRobot(site=site, generator=generator) + bot.run() + self.assertEqual(1, bot.counter['read']) + + +if __name__ == '__main__': + with suppress(SystemExit): + unittest.main() diff --git a/tests/wikibase_edit_tests.py b/tests/wikibase_edit_tests.py index 89a98fce8e..2e62130b7b 100755 --- a/tests/wikibase_edit_tests.py +++ b/tests/wikibase_edit_tests.py @@ -5,7 +5,7 @@ class in edit_failure_tests.py """ # -# (C) Pywikibot team, 2014-2024 +# (C) Pywikibot team, 2014-2025 # # Distributed under the terms of the MIT license. # @@ -540,7 +540,7 @@ class TestWikibaseDataSiteWbsetActions(WikibaseTestCase): write = True def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" self.testsite = self.get_repo() self.item = pywikibot.ItemPage(self.testsite, 'Q68') badge = pywikibot.ItemPage(self.testsite, 'Q608') diff --git a/tests/wikibase_tests.py b/tests/wikibase_tests.py index 1a28147ad5..583123f12c 100755 --- a/tests/wikibase_tests.py +++ b/tests/wikibase_tests.py @@ -50,10 +50,9 @@ class TestLoadRevisionsCaching(BasePageLoadRevisionsCachingTestBase, """Test site.loadrevisions() caching.""" - def setUp(self) -> None: - """Setup test.""" + def setup_page(self) -> None: + """Set up test page.""" self._page = ItemPage(self.get_repo(), 'Q15169668') - super().setUp() def test_page_text(self) -> None: """Test site.loadrevisions() with Page.text.""" @@ -67,7 +66,7 @@ class TestGeneral(WikidataTestCase): @classmethod def setUpClass(cls) -> None: - """Setup test class.""" + """Set up test class.""" super().setUpClass() enwiki = pywikibot.Site('en', 'wikipedia') cls.mainpage = pywikibot.Page(pywikibot.page.Link('Main Page', enwiki)) @@ -91,7 +90,7 @@ def testWikibase(self) -> None: item2 = ItemPage(repo, 'q5296') self.assertEqual(item2.getID(), 'Q5296') item2.get() - self.assertTrue(item2.labels['en'].lower().endswith('main page')) + self.assertEndsWith(item2.labels['en'].lower(), 'main page') prop = PropertyPage(repo, 'Property:P21') self.assertEqual(prop.type, 'wikibase-item') self.assertEqual(prop.namespace(), 120) @@ -164,7 +163,7 @@ class TestLoadUnknownType(WikidataTestCase): dry = True def setUp(self) -> None: - """Setup test.""" + """Set up test.""" super().setUp() wikidata = self.get_repo() self.wdp = ItemPage(wikidata, 'Q60') @@ -231,12 +230,12 @@ class TestItemLoad(WikidataTestCase): @classmethod def setUpClass(cls) -> None: - """Setup test class.""" + """Set up test class.""" super().setUpClass() cls.site = cls.get_site('enwiki') def setUp(self) -> None: - """Setup test.""" + """Set up test.""" super().setUp() self.nyc = pywikibot.Page(pywikibot.page.Link('New York City', self.site)) @@ -248,14 +247,14 @@ def test_item_normal(self) -> None: self.assertEqual(item._link._title, 'Q60') self.assertEqual(item._defined_by(), {'ids': 'Q60'}) self.assertEqual(item.id, 'Q60') - self.assertFalse(hasattr(item, '_title')) - self.assertFalse(hasattr(item, '_site')) + self.assertNotHasAttr(item, '_title') + self.assertNotHasAttr(item, '_site') self.assertEqual(item.title(), 'Q60') self.assertEqual(item.getID(), 'Q60') self.assertEqual(item.getID(numeric=True), 60) - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, '_content') item.get() - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') def test_item_lazy_initialization(self) -> None: """Test that Wikibase items are properly initialized lazily.""" @@ -279,14 +278,14 @@ def test_load_item_set_id(self) -> None: item = ItemPage(wikidata, '-1') self.assertEqual(item._link._title, '-1') item.id = 'Q60' - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, '_content') self.assertEqual(item.getID(), 'Q60') - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, '_content') item.get() - self.assertTrue(hasattr(item, '_content')) - self.assertIn('en', item.labels) + self.assertHasAttr(item, '_content') + self.assertIn('mul', item.labels) # label could change - self.assertIn(item.labels['en'], ['New York', 'New York City']) + self.assertIn(item.labels['mul'], ['New York', 'New York City']) self.assertEqual(item.title(), 'Q60') def test_reuse_item_set_id(self) -> None: @@ -301,7 +300,7 @@ def test_reuse_item_set_id(self) -> None: wikidata = self.get_repo() item = ItemPage(wikidata, 'Q60') item.get() - self.assertIn(item.labels['en'], label) + self.assertIn(item.labels['mul'], label) # When the id attribute is modified, the ItemPage goes into # an inconsistent state. @@ -313,7 +312,7 @@ def test_reuse_item_set_id(self) -> None: # it doesn't help to clear this piece of saved state. del item._content # The labels are not updated; assertion showing undesirable behaviour: - self.assertIn(item.labels['en'], label) + self.assertIn(item.labels['mul'], label) def test_empty_item(self) -> None: """Test empty wikibase item. @@ -325,8 +324,12 @@ def test_empty_item(self) -> None: item = ItemPage(wikidata) self.assertEqual(item._link._title, '-1') self.assertLength(item.labels, 0) + self.assertEqual(str(item.labels), 'LanguageDict({})') + self.assertEqual(repr(item.labels), 'LanguageDict({})') self.assertLength(item.descriptions, 0) self.assertLength(item.aliases, 0) + self.assertEqual(str(item.aliases), 'AliasesDict({})') + self.assertEqual(repr(item.aliases), 'AliasesDict({})') self.assertLength(item.claims, 0) self.assertLength(item.sitelinks, 0) @@ -363,24 +366,24 @@ def test_item_missing(self) -> None: item = ItemPage(wikidata, 'Q7') self.assertEqual(item._link._title, 'Q7') self.assertEqual(item.title(), 'Q7') - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, '_content') self.assertEqual(item.id, 'Q7') self.assertEqual(item.getID(), 'Q7') numeric_id = item.getID(numeric=True) self.assertIsInstance(numeric_id, int) self.assertEqual(numeric_id, 7) - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, '_content') regex = r"^Page .+ doesn't exist\.$" with self.assertRaisesRegex(NoPageError, regex): item.get() - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertEqual(item.id, 'Q7') self.assertEqual(item.getID(), 'Q7') self.assertEqual(item._link._title, 'Q7') self.assertEqual(item.title(), 'Q7') with self.assertRaisesRegex(NoPageError, regex): item.get() - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertEqual(item._link._title, 'Q7') self.assertEqual(item.getID(), 'Q7') self.assertEqual(item.title(), 'Q7') @@ -401,10 +404,10 @@ def test_fromPage_noprops(self) -> None: page = self.nyc item = ItemPage.fromPage(page) self.assertEqual(item._link._title, '-1') - self.assertTrue(hasattr(item, 'id')) - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, 'id') + self.assertHasAttr(item, '_content') self.assertEqual(item.title(), 'Q60') - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertEqual(item.id, 'Q60') self.assertEqual(item.getID(), 'Q60') self.assertEqual(item.getID(numeric=True), 60) @@ -416,10 +419,10 @@ def test_fromPage_noprops_with_section(self) -> None: page = pywikibot.Page(self.nyc.site, self.nyc.title() + '#foo') item = ItemPage.fromPage(page) self.assertEqual(item._link._title, '-1') - self.assertTrue(hasattr(item, 'id')) - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, 'id') + self.assertHasAttr(item, '_content') self.assertEqual(item.title(), 'Q60') - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertEqual(item.id, 'Q60') self.assertEqual(item.getID(), 'Q60') self.assertEqual(item.getID(numeric=True), 60) @@ -434,18 +437,18 @@ def test_fromPage_props(self) -> None: item = ItemPage.fromPage(page) self.assertEqual(item._link._title, 'Q60') self.assertEqual(item.id, 'Q60') - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, '_content') self.assertEqual(item.title(), 'Q60') - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, '_content') self.assertEqual(item.id, 'Q60') self.assertEqual(item.getID(), 'Q60') self.assertEqual(item.getID(numeric=True), 60) - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, '_content') item.get() - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertTrue(item.exists()) item2 = ItemPage.fromPage(page) - self.assertTrue(item is item2) + self.assertIs(item, item2) def test_fromPage_lazy(self) -> None: """Test item from page with lazy_load.""" @@ -454,10 +457,10 @@ def test_fromPage_lazy(self) -> None: self.assertEqual(item._defined_by(), {'sites': 'enwiki', 'titles': 'New York City'}) self.assertEqual(item._link._title, '-1') - self.assertFalse(hasattr(item, 'id')) - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, 'id') + self.assertNotHasAttr(item, '_content') self.assertEqual(item.title(), 'Q60') - self.assertTrue(hasattr(item, '_content')) + self.assertHasAttr(item, '_content') self.assertEqual(item.id, 'Q60') self.assertEqual(item.getID(), 'Q60') self.assertEqual(item.getID(numeric=True), 60) @@ -482,10 +485,10 @@ def _test_fromPage_noitem(self, link) -> None: item = ItemPage.fromPage(page, lazy_load=True) - self.assertFalse(hasattr(item, 'id')) - self.assertTrue(hasattr(item, '_title')) - self.assertTrue(hasattr(item, '_site')) - self.assertFalse(hasattr(item, '_content')) + self.assertNotHasAttr(item, 'id') + self.assertHasAttr(item, '_title') + self.assertHasAttr(item, '_site') + self.assertNotHasAttr(item, '_content') self.assertEqual(item._link._title, '-1') # the method 'exists' does not raise an exception @@ -1013,10 +1016,9 @@ class TestItemBasePageMethods(WikidataTestCase, BasePageMethodsTestBase): """Test behavior of ItemPage methods inherited from BasePage.""" - def setUp(self) -> None: - """Setup tests.""" + def setup_page(self) -> None: + """Set up test page.""" self._page = ItemPage(self.get_repo(), 'Q60') - super().setUp() def test_basepage_methods(self) -> None: """Test ItemPage methods inherited from superclass BasePage.""" @@ -1033,10 +1035,9 @@ class TestPageMethodsWithItemTitle(WikidataTestCase, BasePageMethodsTestBase): """Test behavior of Page methods for wikibase item.""" - def setUp(self) -> None: - """Setup tests.""" + def setup_page(self) -> None: + """Set up tests.""" self._page = pywikibot.Page(self.site, 'Q60') - super().setUp() def test_basepage_methods(self) -> None: """Test Page methods inherited from superclass BasePage with Q60.""" @@ -1065,7 +1066,7 @@ class TestLinks(WikidataTestCase): } def setUp(self) -> None: - """Setup Tests.""" + """Set up tests.""" super().setUp() self.wdp = ItemPage(self.get_repo(), 'Q60') self.wdp.id = 'Q60' @@ -1099,7 +1100,7 @@ class TestWriteNormalizeData(TestCase): net = False def setUp(self) -> None: - """Setup tests.""" + """Set up tests.""" super().setUp() self.data_out = { 'labels': {'en': {'language': 'en', 'value': 'Foo'}}, @@ -1130,6 +1131,14 @@ def test_normalized_data(self) -> None: copy.deepcopy(self.data_out)) self.assertEqual(response, self.data_out) + def test_normalized_invalid_data(self) -> None: + """Test _normalizeData() method for invalid data.""" + data = copy.deepcopy(self.data_out) + data['aliases']['en'] = tuple(data['aliases']['en']) + with self.assertRaisesRegex(TypeError, + "Unsupported value type 'tuple'"): + ItemPage._normalizeData(data) + class TestPreloadingEntityGenerator(TestCase): @@ -1290,7 +1299,7 @@ class TestAlternateNamespaces(WikidataTestCase): @classmethod def setUpClass(cls) -> None: - """Setup test class.""" + """Set up test class.""" super().setUpClass() cls.get_repo()._namespaces = NamespacesDict({ @@ -1427,7 +1436,7 @@ class TestJSON(WikidataTestCase): """Test cases to test toJSON() functions.""" def setUp(self) -> None: - """Setup test.""" + """Set up test.""" super().setUp() wikidata = self.get_repo() self.wdp = ItemPage(wikidata, 'Q60') @@ -1440,6 +1449,31 @@ def setUp(self) -> None: del self.wdp._content['lastrevid'] del self.wdp._content['pageid'] + def test_base_data(self) -> None: + """Test labels and aliases collections.""" + item = self.wdp + self.assertIn('en', item.labels) + self.assertEqual(item.labels['en'], 'New York City') + self.assertIn('en', item.aliases) + self.assertIn('NYC', item.aliases['en']) + + def test_str_repr(self) -> None: + """Test str and repr of labels and aliases.""" + self.assertEqual( + str(self.wdp.labels), + "LanguageDict({'af': 'New York Stad', 'als': 'New York City', " + "'am': 'ኒው ዮርክ ከተማ', 'an': 'Nueva York', ...})" + ) + self.assertEqual( + str(self.wdp.aliases), + "AliasesDict({'be': ['Горад Нью-Ёрк'], 'be-tarask': ['Нью Ёрк'], " + "'ca': ['Ciutat de Nova York', 'New York City'," + " 'New York City (New York)', 'NYC', 'N. Y.', 'N Y'], " + "'da': ['New York City'], ...})" + ) + self.assertEqual(str(self.wdp.labels), repr(self.wdp.labels)) + self.assertEqual(str(self.wdp.aliases), repr(self.wdp.aliases)) + def test_itempage_json(self) -> None: """Test itempage json.""" old = json.dumps(self.wdp._content, indent=2, sort_keys=True) @@ -1500,6 +1534,47 @@ def test_json_diff(self) -> None: self.assertEqual(diff, expected) +class TestHighLevelApi(WikidataTestCase): + + """Test high-level API for Wikidata.""" + + def test_get_best_claim(self) -> None: + """Test getting the best claim for a property.""" + wikidata = self.get_repo() + item = pywikibot.ItemPage(wikidata, 'Q90') + item.get() + self.assertEqual(item.get_best_claim('P17').getTarget(), + pywikibot.ItemPage(wikidata, 'Q142')) + + def test_get_value_at_timestamp(self) -> None: + """Test getting the value of a claim at a specific timestamp.""" + wikidata = self.get_repo() + item = pywikibot.ItemPage(wikidata, 'Q90') + item.get() + wbtime = pywikibot.WbTime(year=2021, month=1, day=1, site=wikidata) + claim = item.get_value_at_timestamp('P17', wbtime) + self.assertEqual(claim, pywikibot.ItemPage(wikidata, 'Q142')) + + def test_with_monolingual_good_language(self) -> None: + """Test getting a monolingual text claim with a good language.""" + wikidata = self.get_repo() + item = pywikibot.ItemPage(wikidata, 'Q183') + item.get() + wbtime = pywikibot.WbTime(year=2021, month=1, day=1, site=wikidata) + claim = item.get_value_at_timestamp('P1448', wbtime, 'ru') + self.assertIsInstance(claim, pywikibot.WbMonolingualText) + self.assertEqual(claim.language, 'ru') + + def test_with_monolingual_wrong_language(self) -> None: + """Test getting a monolingual text claim with a wrong language.""" + wikidata = self.get_repo() + item = pywikibot.ItemPage(wikidata, 'Q183') + item.get() + wbtime = pywikibot.WbTime(year=2021, month=1, day=1, site=wikidata) + claim = item.get_value_at_timestamp('P1448', wbtime, 'en') + self.assertIsNone(claim, None) + + if __name__ == '__main__': with suppress(SystemExit): unittest.main() diff --git a/tests/xmlreader_tests.py b/tests/xmlreader_tests.py index e9fa31914a..c53afdc968 100755 --- a/tests/xmlreader_tests.py +++ b/tests/xmlreader_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for xmlreader module.""" # -# (C) Pywikibot team, 2009-2024 +# (C) Pywikibot team, 2009-2025 # # Distributed under the terms of the MIT license. # @@ -37,7 +37,7 @@ def test_XmlDumpAllRevs(self) -> None: self.assertEqual('24278', pages[0].id) self.assertEqual('185185', pages[0].revisionid) self.assertEqual('188924', pages[3].revisionid) - self.assertTrue(pages[0].text.startswith('Pears are [[tree]]s of')) + self.assertStartsWith(pages[0].text, 'Pears are [[tree]]s of') self.assertEqual('Quercusrobur', pages[1].username) self.assertEqual('Pear', pages[0].title) diff --git a/tox.ini b/tox.ini index 0c859e37ef..4fe7dadff0 100644 --- a/tox.ini +++ b/tox.ini @@ -1,16 +1,17 @@ [tox] -# minversion = 1.7.2 needed for skip_missing_interpreters -minversion = 1.7.2 +minversion = 3.21 skipsdist = True skip_missing_interpreters = True envlist = commit-message - lint-py{39,312} + lint-py{39,313} [params] # Note: tox 4 does not support multiple lines when doing parameters # substitution. generate_user_files = -W error::UserWarning -m pwb generate_user_files -family:wikipedia -lang:test -v +# ignores: gui.py (needs tkinter), memento.py (has too many timeouts) +DOCTEST_IGNORES = --ignore-glob=*gui.py --ignore-glob=*memento.py [testenv] basepython = @@ -19,6 +20,7 @@ basepython = py310: python3.10 py311: python3.11 py312: python3.12 + py313: python3.13 pypy: pypy3 setenv = VIRTUAL_ENV={envdir} @@ -37,7 +39,7 @@ commands = deeptest: python {[params]generate_user_files} deeptest-py38: python -m unittest discover -vv -p "*_tests.py" - deeptest-py312: pytest + deeptest-py313: pytest fasttest: python {[params]generate_user_files} fasttest: pytest --version @@ -56,30 +58,53 @@ deps = deeptest: .[html] deeptest: .[scripts] - deeptest-py312: .[wikitextparser] - deeptest-py312: pytest >= 7.0.1 - deeptest-py312: pytest-subtests != 0.14.0 + deeptest-py313: .[wikitextparser] + deeptest-py313: pytest >= 7.0.1 + deeptest-py313: pytest-subtests != 0.14.0 [testenv:typing] -basepython = python3.8 -deps = pytest-mypy +basepython = python3.9 +deps = + pytest-mypy + types-PyMySQL + types-requests commands = mypy --version pytest --mypy -m mypy pywikibot [testenv:commit-message] -basepython = python3.8 +basepython = python3.9 deps = commit-message-validator commands = commit-message-validator [testenv:doctest] +basepython = python3 +skip_install = True +allowlist_externals = tox +deps = +commands = + tox -e doctest-py38 + tox -e doctest-py313 + +[testenv:doctest-py38] basepython = python3.8 commands = + python -m pytest --version python {[params]generate_user_files} - pytest --version -# gui.py needs tkinter -# memento.py has too many timeout - pytest pywikibot --doctest-modules --ignore-glob="*gui.py" --ignore-glob="*memento.py" + python -m pytest pywikibot --doctest-modules {[params]DOCTEST_IGNORES} + +deps = + pytest >= 7.0.1 + wikitextparser + .[eventstreams] + .[mysql] + +[testenv:doctest-py313] +basepython = python3.13 +commands = + python -m pytest --version + # user files already exists from doctest-py38 run + python -m pytest pywikibot --doctest-modules {[params]DOCTEST_IGNORES} deps = pytest >= 7.0.1 @@ -91,7 +116,7 @@ deps = commands = {posargs} [testenv:doc] -basepython = python3.12 +basepython = python3.13 commands = sphinx-build -M html ./docs ./docs/_build -j auto deps = @@ -99,7 +124,7 @@ deps = -rdocs/requirements.txt [testenv:rstcheck] -basepython = python3.12 +basepython = python3.13 commands = rstcheck --version rstcheck --report-level WARNING -r . @@ -108,7 +133,7 @@ deps = -rdocs/requirements.txt [testenv:sphinx] -basepython = python3.12 +basepython = python3.13 commands = sphinx-build -M html ./docs ./docs/_build -j auto -D html_theme=nature deps = @@ -124,7 +149,7 @@ deps = # R100: raise in except handler without from # W503: line break before binary operator; against current PEP 8 recommendation -ignore = B007,B028,E704,F824,R100,W503 +ignore = B007,B028,B042,E704,F824,R100,W503 enable-extensions = N818 count = True @@ -222,9 +247,11 @@ ignore_regex=:keyword # pep8-naming classmethod-decorators = classmethod,classproperty + [pycodestyle] exclude = .tox,.git,./*.egg,build,./scripts/i18n/* + [pytest] minversion = 7.0.1 testpaths = tests