diff --git a/.env b/.env new file mode 100644 index 00000000..84c43fa4 --- /dev/null +++ b/.env @@ -0,0 +1,2 @@ +# TODO: do not commit +INVENIO_THEME_FRONTPAGE=False diff --git a/.tx/config b/.tx/config index 80930907..4b0a93db 100644 --- a/.tx/config +++ b/.tx/config @@ -25,8 +25,8 @@ [main] host = https://app.transifex.com -[o:inveniosoftware:p:invenio:r:invenio-github-messages] -file_filter = invenio_github/translations//LC_MESSAGES/messages.po -source_file = invenio_github/translations/messages.pot +[o:inveniosoftware:p:invenio:r:invenio-vcs-messages] +file_filter = invenio_vcs/translations//LC_MESSAGES/messages.po +source_file = invenio_vcs/translations/messages.pot source_lang = en type = PO diff --git a/CHANGES.rst b/CHANGES.rst index 18c8819e..79e16784 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -26,6 +26,21 @@ Changes ======= +Version v4.0.0 (release 2025-XX-XX) + +- global!: renamed package to `invenio-vcs` to reflect the extended functionality. +- models!: renamed tables to `vcs_` instead of `github_`, and added columns to identify the provider (e.g. `github`) + - Created an Alembic migration to automate this upgrade. Please note that on large instances (i.e. more than ~50k repos) this migration is likely to lead to severe stability issues and a full lock on a number of tables for several minutes. Instead of the automated migration, please refer to the upgrade guide in this module's documentation. +- models!: moved the user's full list of repositories from the `extra_data` column of `oauthclient_remoteaccount` to `vcs_repositories`, even for non-activated repos. Activated repos are instead signified by non-null `hook` and `enabled_by_id` values. User access to repos is stored in the new `vcs_repository_users` table with the access rights propagated during the sync. + - The Alembic migration also moves the JSON data into the table automatically, but this is a slow operation that scales linearly with the size of the `oauthclient_remoteaccount`. If it is too slow, please refer to the upgrade guide for a manual faster method. +- refactor!: support for multiple VCS providers through a refactored module architecture. Contrib providers can now be created by implementing abstract classes `RepositoryServiceProviderFactory` and `RepositoryServiceProvider`. Other than the contrib provider implementations, this module is now fully provider-agnostic. + - New provider implementations may be created either in this module or anywhere else in the codebase by implementing the relevant abstract classes. +- feat: support for GitHub and GitLab via provider implementations. +- config!: moved provider-specific config options into the constructor arguments of the relevant class. Providers can now be declared as a list of class instances. + - Please see the documentation for details on the new configuration format. + +BREAKING CHANGE: various major updates will require changes to the database tables and the config. Please see the upgrade guide for more details. + Version v3.0.1 (released 2025-07-30) - api: fix set alternate zipball URL when tag and branch having same name diff --git a/MANIFEST.in b/MANIFEST.in index d699aee6..b7bdb3d3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -47,12 +47,12 @@ recursive-include docs *.py recursive-include docs *.rst recursive-include docs *.txt recursive-include docs Makefile -recursive-include invenio_github *.html -recursive-include invenio_github *.js -recursive-include invenio_github *.json -recursive-include invenio_github *.less -recursive-include invenio_github *.mo -recursive-include invenio_github *.po -recursive-include invenio_github *.pot -recursive-include invenio_github *.py +recursive-include invenio_vcs *.html +recursive-include invenio_vcs *.js +recursive-include invenio_vcs *.json +recursive-include invenio_vcs *.less +recursive-include invenio_vcs *.mo +recursive-include invenio_vcs *.po +recursive-include invenio_vcs *.pot +recursive-include invenio_vcs *.py include .git-blame-ignore-revs diff --git a/README.rst b/README.rst index f993df5a..99eb1862 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ as an Intergovernmental Organization or submit itself to any jurisdiction. ================ - Invenio-GitHub + Invenio-VCS ================ .. image:: https://img.shields.io/travis/inveniosoftware/invenio-github.svg diff --git a/RELEASE-NOTES.rst b/RELEASE-NOTES.rst deleted file mode 100644 index 614423b9..00000000 --- a/RELEASE-NOTES.rst +++ /dev/null @@ -1,36 +0,0 @@ -========================== - Invenio-GitHub v1.0.0a28 -========================== - -Invenio-GitHub v1.0.0a28 was released on October 24, 2022. - -About ------ - -Invenio module that adds GitHub integration to the platform. - -*This is an experimental developer preview release.* - -What's new ----------- - -- Initial public release. - -Installation ------------- - - $ pip install invenio-github==v1.0.0a28 - -Documentation -------------- - - http://invenio-github.readthedocs.io/ - -Happy hacking and thanks for flying Invenio-GitHub. - -| Invenio Development Team -| Email: info@inveniosoftware.org -| IRC: #invenio on irc.freenode.net -| Twitter: http://twitter.com/inveniosoftware -| GitHub: https://github.com/inveniosoftware/invenio-github -| URL: http://inveniosoftware.org diff --git a/docs/Makefile b/docs/Makefile index 7bec3421..3e435a89 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -87,9 +87,9 @@ qthelp: @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Invenio-GitHub.qhcp" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Invenio-VCS.qhcp" @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Invenio-GitHub.qhc" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Invenio-VCS.qhc" applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @@ -104,8 +104,8 @@ devhelp: @echo @echo "Build finished." @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/Invenio-GitHub" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Invenio-GitHub" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Invenio-VCS" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Invenio-VCS" @echo "# devhelp" epub: diff --git a/docs/api.rst b/docs/api.rst index 8bdd0d1c..8b48daaf 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,25 +1,9 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. + Copyright (C) 2025 CERN. - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. API Docs diff --git a/docs/authors.rst b/docs/authors.rst index 2fe04ec2..87232286 100644 --- a/docs/authors.rst +++ b/docs/authors.rst @@ -1,25 +1,8 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../AUTHORS.rst diff --git a/docs/changes.rst b/docs/changes.rst index ff5c15b1..e6c39d2c 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,25 +1,8 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../CHANGES.rst diff --git a/docs/conf.py b/docs/conf.py index 75c64ae5..4ab11b51 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,7 +25,7 @@ """Sphinx configuration.""" -from invenio_github import __version__ +from invenio_vcs import __version__ # -- General configuration ------------------------------------------------ @@ -61,8 +61,8 @@ master_doc = "index" # General information about the project. -project = "Invenio-GitHub" -copyright = "2016, CERN" +project = "Invenio-VCS" +copyright = "2025, CERN" author = "CERN" # The version info for the project you're documenting, acts as replacement for @@ -123,15 +123,15 @@ html_theme = "alabaster" html_theme_options = { - "description": "Invenio module that adds GitHub integration to the platform.", + "description": "Invenio module that adds VCS integration to the platform.", "github_user": "inveniosoftware", - "github_repo": "invenio-github", + "github_repo": "invenio-vcs", "github_button": False, "github_banner": True, "show_powered_by": False, "extra_nav_links": { - "invenio-github@GitHub": "https://github.com/inveniosoftware/invenio-github", - "invenio-github@PyPI": "https://pypi.python.org/pypi/invenio-github/", + "invenio-vcs@GitHub": "https://github.com/inveniosoftware/invenio-github", + "invenio-vcs@PyPI": "https://pypi.python.org/pypi/invenio-github/", }, } @@ -236,7 +236,7 @@ # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = "invenio-github_namedoc" +htmlhelp_basename = "invenio-vcs_namedoc" # -- Options for LaTeX output --------------------------------------------- @@ -257,8 +257,8 @@ latex_documents = [ ( master_doc, - "invenio-github.tex", - "invenio-github Documentation", + "invenio-vcs.tex", + "invenio-vcs Documentation", "CERN", "manual", ), @@ -289,9 +289,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, "invenio-github", "invenio-github Documentation", [author], 1) -] +man_pages = [(master_doc, "invenio-vcs", "invenio-vcs Documentation", [author], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -305,10 +303,10 @@ texinfo_documents = [ ( master_doc, - "invenio-github", - "Invenio-GitHub Documentation", + "invenio-vcs", + "Invenio-VCS Documentation", author, - "invenio-github", + "invenio-vcs", "Invenio module that adds GitHub integration to the platform.", "Miscellaneous", ), diff --git a/docs/contributing.rst b/docs/contributing.rst index a28a49d0..8ad3dcf9 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -1,25 +1,8 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../CONTRIBUTING.rst diff --git a/docs/index.rst b/docs/index.rst index 2cdfd71e..b1938f2a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,25 +1,9 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. + Copyright (C) 2025 CERN. - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../README.rst @@ -28,13 +12,14 @@ User's Guide ------------ This part of the documentation will show you how to get started in using -Invenio-GitHub. +Invenio-VCS. .. toctree:: :maxdepth: 2 installation usage + upgrading API Reference diff --git a/docs/installation.rst b/docs/installation.rst index cc3059e3..dc0fd0bd 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -1,25 +1,8 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../INSTALL.rst diff --git a/docs/license.rst b/docs/license.rst index 2fbc8ee6..9c7323a6 100644 --- a/docs/license.rst +++ b/docs/license.rst @@ -1,19 +1,16 @@ +.. + This file is part of Invenio. + Copyright (C) 2025 CERN. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + License ======= -Invenio is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. - -Invenio is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. -You should have received a copy of the GNU General Public License -along with Invenio; if not, write to the Free Software Foundation, Inc., -59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +.. include:: ../LICENSE -In applying this license, CERN does not waive the privileges and immunities -granted to it by virtue of its status as an Intergovernmental Organization or -submit itself to any jurisdiction. +.. note:: + In applying this license, CERN does not waive the privileges and immunities + granted to it by virtue of its status as an Intergovernmental Organization or + submit itself to any jurisdiction. diff --git a/docs/make.bat b/docs/make.bat index 2cd0325a..5fed4a51 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -127,9 +127,9 @@ if "%1" == "qthelp" ( echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Invenio-GitHub.qhcp + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Invenio-VCS.qhcp echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Invenio-GitHub.ghc + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Invenio-VCS.ghc goto end ) diff --git a/docs/upgrading.rst b/docs/upgrading.rst new file mode 100644 index 00000000..7882faac --- /dev/null +++ b/docs/upgrading.rst @@ -0,0 +1,129 @@ +.. + This file is part of Invenio. + Copyright (C) 2025 CERN. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + +Upgrading +========= + +====== +v4.0.0 +====== + +This version consists of a major refactor of the module and a full rename to ``invenio-vcs`` (from ``invenio-github``). +The new version has now been made generic and can support any VCS provider by implementing the relevant abstract classes. + +Contrib implementations are provided for GitHub and GitLab. +GitHub is supported with the exact same set of features as before, meaning this module can continue to be used for the original +purpose of ``invenio-github`` with just some migrations and configuration changes required. + +Please follow this guide if: + +- you are **not** using InvenioRDM; or +- you would like to try out ``invenio-vcs`` before InvenioRDM v14 is released. + + - This is not officially supported but should work for the most part. + +RDM-specific instructions can instead be found in the `InvenioRDM upgrade guide `_. + +-------------------------- +1. Update the dependencies +-------------------------- + +In your ``Pipfile`` (or any similar file you are using to manage dependencies), change the name and version of the ``invenio-vcs`` packages. +Additionally, you will need to ensure some other dependencies are up to date for compatibility with the new changes. + +.. code-block:: toml + + [packages] + # ... + invenio-vcs = ">=4.0.0,<5.0.0" + invenio-rdm-records = "TODO" + invenio-app-rdm = "TODO" + invenio-oauthclient = "TODO" + +.. note:: + + ``invenio-vcs`` is no longer packaged by default with InvenioRDM, as was the case with ``invenio-github``. + You must declare it as an explicit dependency on the instance level. + +Next, run the install operation and make sure the old module is no longer installed. +Having both installed simultaneously will lead to numerous conflicts, especially with Alembic migrations. + +.. code-block:: bash + + invenio-cli install + pip uninstall invenio-github + +---------------------------------- +2. Perform the database migrations +---------------------------------- + +Depending on the size of your instance, the migrations can be performed either automatically by running an Alembic script, or manually by +carefully following the instructions in this guide. + +If your instance meets one of these criteria, please use the manual method to avoid database stability issues: + +- An ``oauthclient_remoteaccount`` table with more than 50k rows +- A ``github_repositories`` table with more than 100k rows + +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +2a. Automated Alembic script +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Run the upgrade command: + +.. code-block:: bash + + pipenv run invenio alembic upgrade + +^^^^^^^^^^^^^^^^^ +2b. Manual method +^^^^^^^^^^^^^^^^^ + +.. code-block:: sql + + BEGIN; + ALTER TABLE github_repositories RENAME TO vcs_repositories; + ALTER TABLE vcs_repositories ALTER COLUMN github_id TYPE VARCHAR(255); + ALTER TABLE vcs_repositories ALTER COLUMN github_id SET NOT NULL; + ALTER TABLE vcs_repositories RENAME github_id TO provider_id; + ALTER TABLE vcs_repositories ALTER COLUMN hook TYPE VARCHAR(255); + ALTER TABLE vcs_repositories ALTER COLUMN hook DROP NOT NULL; + ALTER TABLE vcs_repositories ADD COLUMN provider VARCHAR(255) DEFAULT 'github' NOT NULL; + ALTER TABLE vcs_repositories ADD COLUMN default_branch VARCHAR(255) DEFAULT 'master' NOT NULL; + ALTER TABLE vcs_repositories ADD COLUMN description VARCHAR(10000); + ALTER TABLE vcs_repositories ADD COLUMN html_url VARCHAR(10000); + ALTER TABLE vcs_repositories ADD COLUMN license_spdx VARCHAR(255); + ALTER TABLE vcs_repositories RENAME user_id TO enabled_by_id; + DROP INDEX ix_github_repositories_name; + DROP INDEX ix_github_repositories_github_id; + ALTER TABLE vcs_repositories ADD CONSTRAINT uq_vcs_repositories_provider_provider_id UNIQUE (provider, provider_id); + ALTER TABLE vcs_repositories ADD CONSTRAINT uq_vcs_repositories_provider_name UNIQUE (provider, name); + COMMIT; + +Do some things here + +.. code-block:: sql + + BEGIN; + ALTER TABLE vcs_repositories ALTER COLUMN html_url SET NOT NULL; + ALTER TABLE github_releases RENAME TO vcs_releases; + ALTER TABLE vcs_releases ALTER COLUMN release_id TYPE VARCHAR(255); + ALTER TABLE vcs_releases ALTER COLUMN release_id SET NOT NULL; + ALTER TABLE vcs_releases RENAME release_id TO provider_id; + ALTER TABLE vcs_releases ADD COLUMN provider VARCHAR(255) DEFAULT 'github' NOT NULL; + ALTER TABLE vcs_releases ALTER COLUMN errors TYPE JSONB USING errors::text::jsonb; + ALTER TABLE vcs_releases DROP CONSTRAINT uq_github_releases_release_id; + ALTER TABLE vcs_releases ADD CONSTRAINT uq_vcs_releases_provider_id_provider UNIQUE (provider_id, provider); + ALTER TABLE vcs_releases ADD CONSTRAINT uq_vcs_releases_provider_id_provider_tag UNIQUE (provider_id, provider, tag); + CREATE TABLE vcs_repository_users ( + repository_id UUID NOT NULL, + user_id INTEGER NOT NULL, + PRIMARY KEY (repository_id, user_id), + CONSTRAINT fk_vcs_repository_users_repository_id_vcs_repositories FOREIGN KEY(repository_id) REFERENCES vcs_repositories (id), + CONSTRAINT fk_vcs_repository_users_user_id_accounts_user FOREIGN KEY(user_id) REFERENCES accounts_user (id) + ); + COMMIT; diff --git a/docs/usage.rst b/docs/usage.rst index e8ebe1ac..d54954cf 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -1,28 +1,11 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. Usage ===== -.. automodule:: invenio_github +.. automodule:: invenio_vcs diff --git a/invenio_github/api.py b/invenio_github/api.py deleted file mode 100644 index 374b5d10..00000000 --- a/invenio_github/api.py +++ /dev/null @@ -1,728 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Invenio. -# Copyright (C) 2023-2025 CERN. -# Copyright (C) 2024 KTH Royal Institute of Technology. -# -# Invenio is free software; you can redistribute it -# and/or modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the -# License, or (at your option) any later version. -# -# Invenio is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Invenio; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, -# MA 02111-1307, USA. -# -# In applying this license, CERN does not -# waive the privileges and immunities granted to it by virtue of its status -# as an Intergovernmental Organization or submit itself to any jurisdiction. - -"""Invenio module that adds GitHub integration to the platform.""" - -import json -from abc import abstractmethod -from contextlib import contextmanager -from copy import deepcopy -from urllib.parse import urlparse - -import github3 -import requests -from flask import current_app -from invenio_access.permissions import authenticated_user -from invenio_access.utils import get_identity -from invenio_db import db -from invenio_i18n import gettext as _ -from invenio_oauth2server.models import Token as ProviderToken -from invenio_oauthclient.handlers import token_getter -from invenio_oauthclient.models import RemoteAccount, RemoteToken -from invenio_oauthclient.proxies import current_oauthclient -from sqlalchemy.orm.exc import NoResultFound -from werkzeug.local import LocalProxy -from werkzeug.utils import cached_property - -from invenio_github.models import Release, ReleaseStatus, Repository -from invenio_github.proxies import current_github -from invenio_github.tasks import sync_hooks as sync_hooks_task -from invenio_github.utils import iso_utcnow, parse_timestamp, utcnow - -from .errors import ( - ReleaseZipballFetchError, - RemoteAccountDataNotSet, - RemoteAccountNotFound, - RepositoryAccessError, - RepositoryNotFoundError, - UnexpectedGithubResponse, -) - - -class GitHubAPI(object): - """Wrapper for GitHub API.""" - - def __init__(self, user_id=None): - """Create a GitHub API object.""" - self.user_id = user_id - - @cached_property - def api(self): - """Return an authenticated GitHub API.""" - return github3.login(token=self.access_token) - - @cached_property - def access_token(self): - """Return OAuth access token's value.""" - token = RemoteToken.get(self.user_id, self.remote.consumer_key) - if not token: - # The token is not yet in DB, it is retrieved from the request session. - return self.remote.get_request_token()[0] - return token.access_token - - @property - def session_token(self): - """Return OAuth session token.""" - session_token = None - if self.user_id is not None: - session_token = token_getter(self.remote) - if session_token: - token = RemoteToken.get( - self.user_id, self.remote.consumer_key, access_token=session_token[0] - ) - return token - return None - - remote = LocalProxy( - lambda: current_oauthclient.oauth.remote_apps[ - current_app.config["GITHUB_WEBHOOK_RECEIVER_ID"] - ] - ) - """Return OAuth remote application.""" - - def check_repo_access_permissions(self, repo): - """Checks permissions from user on repo. - - Repo has access if any of the following is True: - - - user is the owner of the repo - - user has access to the repo in GitHub (stored in RemoteAccount.extra_data.repos) - """ - if self.user_id and repo and repo.user_id: - user_is_owner = repo.user_id == int(self.user_id) - if user_is_owner: - return True - - if self.account and self.account.extra_data: - user_has_remote_access = self.user_available_repositories.get( - str(repo.github_id) - ) - if user_has_remote_access: - return True - - raise RepositoryAccessError( - user=self.user_id, repo=repo.name, repo_id=repo.github_id - ) - - @cached_property - def account(self): - """Return remote account.""" - return RemoteAccount.get(self.user_id, self.remote.consumer_key) - - @cached_property - def webhook_url(self): - """Return the url to be used by a GitHub webhook.""" - if not self.account.extra_data.get("tokens", {}).get("webhook"): - raise RemoteAccountDataNotSet( - self.user_id, _("Webhook data not found for user tokens (remote data).") - ) - - webhook_token = ProviderToken.query.filter_by( - id=self.account.extra_data["tokens"]["webhook"] - ).first() - if webhook_token: - wh_url = current_app.config.get("GITHUB_WEBHOOK_RECEIVER_URL") - if wh_url: - return wh_url.format(token=webhook_token.access_token) - else: - raise RuntimeError(_("You must set GITHUB_WEBHOOK_RECEIVER_URL.")) - - def init_account(self): - """Setup a new GitHub account.""" - if not self.account: - raise RemoteAccountNotFound( - self.user_id, _("Remote account was not found for user.") - ) - - ghuser = self.api.me() - # Setup local access tokens to be used by the webhooks - hook_token = ProviderToken.create_personal( - "github-webhook", - self.user_id, - scopes=["webhooks:event"], - is_internal=True, - ) - # Initial structure of extra data - self.account.extra_data = dict( - id=ghuser.id, - login=ghuser.login, - name=ghuser.name, - tokens=dict( - webhook=hook_token.id, - ), - repos=dict(), - last_sync=iso_utcnow(), - ) - db.session.add(self.account) - - def sync(self, hooks=True, async_hooks=True): - """Synchronize user repositories. - - :param bool hooks: True for syncing hooks. - :param bool async_hooks: True for sending of an asynchronous task to - sync hooks. - - .. note:: - - Syncing happens from GitHub's direction only. This means that we - consider the information on GitHub as valid, and we overwrite our - own state based on this information. - """ - github_repos = {} - for repo in self.api.repositories(): - if repo.permissions["admin"]: - github_repos[repo.id] = { - "id": repo.id, - "full_name": repo.full_name, - "description": repo.description, - "default_branch": repo.default_branch, - } - - if hooks: - self._sync_hooks(list(github_repos.keys()), asynchronous=async_hooks) - - # Update changed names for repositories stored in DB - db_repos = Repository.query.filter( - Repository.user_id == self.user_id, - ) - - for repo in db_repos: - gh_repo = github_repos.get(repo.github_id) - if gh_repo and repo.name != gh_repo["full_name"]: - repo.name = gh_repo["full_name"] - db.session.add(repo) - - # Remove ownership from repositories that the user has no longer - # 'admin' permissions, or have been deleted. - Repository.query.filter( - Repository.user_id == self.user_id, - ~Repository.github_id.in_(github_repos.keys()), - ).update({"user_id": None, "hook": None}, synchronize_session=False) - - # Update repos and last sync - self.account.extra_data.update( - dict( - repos=github_repos, - last_sync=iso_utcnow(), - ) - ) - self.account.extra_data.changed() - db.session.add(self.account) - - def _sync_hooks(self, repos, asynchronous=True): - """Check if a hooks sync task needs to be started.""" - if not asynchronous: - for repo_id in repos: - try: - self.sync_repo_hook(repo_id) - except RepositoryAccessError: - current_app.logger.warning( - str(RepositoryAccessError), exc_info=True - ) - except NoResultFound: - pass # Repository not in DB yet - else: - # If hooks will run asynchronously, we need to commit any changes done so far - db.session.commit() - sync_hooks_task.delay(self.user_id, repos) - - def _valid_webhook(self, url): - """Check if webhook url is valid. - - The webhook url is valid if it has the same host as the configured webhook url. - - :param str url: The webhook url to be checked. - :returns: True if the webhook url is valid, False otherwise. - """ - if not url: - return False - configured_host = urlparse(self.webhook_url).netloc - url_host = urlparse(url).netloc - if not (configured_host and url_host): - return False - return configured_host == url_host - - def sync_repo_hook(self, repo_id): - """Sync a GitHub repo's hook with the locally stored repo.""" - # Get the hook that we may have set in the past - gh_repo = self.api.repository_with_id(repo_id) - hooks = ( - hook - for hook in gh_repo.hooks() - if self._valid_webhook(hook.config.get("url", "")) - ) - hook = next(hooks, None) - - # If hook on GitHub exists, get or create corresponding db object and - # enable the hook. Otherwise remove the old hook information. - repo = Repository.get(repo_id, gh_repo.full_name) - - if hook: - if not repo: - repo = Repository.create(self.user_id, repo_id, gh_repo.full_name) - if not repo.enabled: - self.enable_repo(repo, hook.id) - else: - if repo: - self.disable_repo(repo) - - def check_sync(self): - """Check if sync is required based on last sync date.""" - # If refresh interval is not specified, we should refresh every time. - expiration = utcnow() - refresh_td = current_app.config.get("GITHUB_REFRESH_TIMEDELTA") - if refresh_td: - expiration -= refresh_td - last_sync = parse_timestamp(self.account.extra_data["last_sync"]) - return last_sync < expiration - - def create_hook(self, repo_id, repo_name): - """Create repository hook.""" - # Create hook - hook_config = dict( - url=self.webhook_url, - content_type="json", - secret=current_app.config["GITHUB_SHARED_SECRET"], - insecure_ssl="1" if current_app.config["GITHUB_INSECURE_SSL"] else "0", - ) - - ghrepo = self.api.repository_with_id(repo_id) - if ghrepo: - hooks = ( - h - for h in ghrepo.hooks() - if h.config.get("url", "") == hook_config["url"] - ) - hook = next(hooks, None) - - # If hook does not exist, create one. - if not hook: - hook = ghrepo.create_hook( - "web", # GitHub identifier for webhook service - hook_config, - events=["release"], - ) - else: - hook.edit(config=hook_config, events=["release"]) - - if hook: - # Get or create the repo - repo = Repository.get(github_id=repo_id, name=repo_name) - if not repo: - repo = Repository.create(self.user_id, repo_id, repo_name) - - self.enable_repo(repo, hook.id) - return True - - return False - - def remove_hook(self, repo_id, name): - """Remove repository hook.""" - repo = Repository.get(github_id=repo_id, name=name) - - if not repo: - raise RepositoryNotFoundError(repo_id) - - ghrepo = self.api.repository_with_id(repo_id) - if ghrepo: - hooks = ( - h - for h in ghrepo.hooks() - if self._valid_webhook(h.config.get("url", "")) - ) - hook = next(hooks, None) - if not hook or hook.delete(): - self.disable_repo(repo) - return True - return False - - def repo_last_published_release(self, repo): - """Retrieves the repository last release.""" - release_instance = None - release_object = repo.latest_release(ReleaseStatus.PUBLISHED) - if release_object: - release_instance = current_github.release_api_class(release_object) - return release_instance - - def get_repository_releases(self, repo): - """Retrieve repository releases. Returns API release objects.""" - self.check_repo_access_permissions(repo) - - # Retrieve releases and sort them by creation date - release_instances = [] - for release_object in repo.releases.order_by(Release.created): - release_instance = current_github.release_api_class(release_object) - release_instances.append(release_instance) - - return release_instances - - def get_user_repositories(self): - """Retrieves user repositories, containing db repositories plus remote repositories.""" - repos = deepcopy(self.user_available_repositories) - if repos: - # 'Enhance' our repos dict, from our database model - db_repos = Repository.query.filter( - Repository.github_id.in_( - [int(k) for k in self.user_available_repositories.keys()] - ) - ) - for repo in db_repos: - if str(repo.github_id) in repos: - release_instance = current_github.release_api_class( - repo.latest_release() - ) - repos[str(repo.github_id)]["instance"] = repo - repos[str(repo.github_id)]["latest"] = release_instance - return repos - - @property - def user_enabled_repositories(self): - """Retrieve user repositories from the model.""" - return Repository.query.filter(Repository.user_id == self.user_id) - - @property - def user_available_repositories(self): - """Retrieve user repositories from user's remote data.""" - return self.account.extra_data.get("repos", {}) - - def disable_repo(self, repo): - """Disables an user repository if the user has permission to do so.""" - self.check_repo_access_permissions(repo) - - repo.hook = None - repo.user_id = None - - def enable_repo(self, repo, hook): - """Enables an user repository if the user has permission to do so.""" - self.check_repo_access_permissions(repo) - - repo.hook = hook - repo.user_id = self.user_id - - def get_last_sync_time(self): - """Retrieves the last sync delta time from github's client extra data. - - Time is computed as the delta between now and the last sync time. - """ - if not self.account.extra_data.get("last_sync"): - raise RemoteAccountDataNotSet( - self.user_id, _("Last sync data is not set for user (remote data).") - ) - - extra_data = self.account.extra_data - return extra_data["last_sync"] - - def get_repository(self, repo_name=None, repo_github_id=None): - """Retrieves one repository. - - Checks for access permission. - """ - repo = Repository.get(name=repo_name, github_id=repo_github_id) - if not repo: - raise RepositoryNotFoundError(repo_name) - - # Might raise a RepositoryAccessError - self.check_repo_access_permissions(repo) - - return repo - - @classmethod - def _dev_api(cls): - """Get a developer instance for GitHub API access.""" - gh = github3.GitHub() - gh.set_client_id(cls.remote.consumer_key, cls.remote.consumer_secret) - return gh - - @classmethod - def check_token(cls, token): - """Check if an access token is authorized.""" - gh_api = cls._dev_api() - client_id, client_secret = gh_api.session.retrieve_client_credentials() - url = gh_api._build_url("applications", str(client_id), "token") - with gh_api.session.temporary_basic_auth(client_id, client_secret): - response = gh_api._post(url, data={"access_token": token}) - return response.status_code == 200 - - @classmethod - def revoke_token(cls, token): - """Revoke an access token.""" - gh_api = cls._dev_api() - client_id, client_secret = gh_api.session.retrieve_client_credentials() - url = gh_api._build_url("applications", str(client_id), "token") - with gh_api.session.temporary_basic_auth(client_id, client_secret): - response = gh_api._delete(url, data=json.dumps({"access_token": token})) - return response - - -class GitHubRelease(object): - """A GitHub release.""" - - def __init__(self, release): - """Constructor.""" - self.release_object = release - self._resolved_zipball_url = None - - @cached_property - def record(self): - """Release record.""" - return self.resolve_record() - - @cached_property - def gh(self): - """Return GitHubAPI object.""" - return GitHubAPI(user_id=self.event.user_id) - - @cached_property - def event(self): - """Get release event.""" - return self.release_object.event - - @cached_property - def payload(self): - """Return event payload.""" - return self.event.payload - - @cached_property - def release_payload(self): - """Return release metadata.""" - return self.payload["release"] - - @cached_property - def repository_payload(self): - """Return repository metadata.""" - return self.payload["repository"] - - @cached_property - def repository_object(self): - """Return repository model from database.""" - if self.release_object.repository_id: - repository = self.release_object.repository - else: - repository = Repository.query.filter_by( - user_id=self.event.user_id, - ).one() - return repository - - @cached_property - def release_file_name(self): - """Returns release zipball file name.""" - tag_name = self.release_payload["tag_name"] - repo_name = self.repository_payload["full_name"] - filename = f"{repo_name}-{tag_name}.zip" - return filename - - @cached_property - def release_zipball_url(self): - """Returns the release zipball URL.""" - return self.release_payload["zipball_url"] - - @cached_property - def user_identity(self): - """Generates release owner's user identity.""" - identity = get_identity(self.repository_object.user) - identity.provides.add(authenticated_user) - identity.user = self.repository_object.user - return identity - - @cached_property - def contributors(self): - """Get list of contributors to a repository. - - The list of contributors is fetched from Github API, filtered for type "User" and sorted by contributions. - - :returns: a generator of objects that contains contributors information. - :raises UnexpectedGithubResponse: when Github API returns a status code other than 200. - """ - max_contributors = current_app.config.get("GITHUB_MAX_CONTRIBUTORS_NUMBER", 30) - contributors_iter = self.gh.api.repository_with_id( - self.repository_object.github_id - ).contributors(number=max_contributors) - - # Consume the iterator to materialize the request and have a `last_status``. - contributors = list(contributors_iter) - status = contributors_iter.last_status - if status == 200: - # Sort by contributions and filter only users. - sorted_contributors = sorted( - (c for c in contributors if c.type == "User"), - key=lambda x: x.contributions, - reverse=True, - ) - - # Expand contributors using `Contributor.refresh()` - contributors = [x.refresh().as_dict() for x in sorted_contributors] - return contributors - else: - # Contributors fetch failed - raise UnexpectedGithubResponse( - _("Github returned unexpected code: %(status)s for release %(repo_id)s") - % {"status": status, "repo_id": self.repository_object.github_id} - ) - - @cached_property - def owner(self): - """Get owner of repository as a creator.""" - try: - owner = self.gh.api.repository_with_id( - self.repository_object.github_id - ).owner - return owner - except Exception: - return None - - # Helper functions - - def is_first_release(self): - """Checks whether the current release is the first release of the repository.""" - latest_release = self.repository_object.latest_release(ReleaseStatus.PUBLISHED) - return True if not latest_release else False - - def test_zipball(self): - """Test if the zipball URL is accessible and return the resolved URL.""" - return self.resolve_zipball_url() - - def resolve_zipball_url(self, cache=True): - """Resolve the zipball URL. - - This method will try to resolve the zipball URL by making a HEAD request, - handling the following edge cases: - - - In the case of a 300 Multiple Choices response, which can happen when a tag - and branch have the same name, it will try to fetch an "alternate" link. - - If the access token does not have the required scopes/permissions to access - public links, it will fallback to a non-authenticated request. - """ - if self._resolved_zipball_url and cache: - return self._resolved_zipball_url - - url = self.release_zipball_url - - # Execute a HEAD request to the zipball url to test if it is accessible. - response = self.gh.api.session.head(url, allow_redirects=True) - - # In case where there is a tag and branch with the same name, we might get back - # a "300 Multiple Choices" response, which requires fetching an "alternate" - # link. - if response.status_code == 300: - alternate_url = response.links.get("alternate", {}).get("url") - if alternate_url: - url = alternate_url # Use the alternate URL - response = self.gh.api.session.head(url, allow_redirects=True) - - # Another edge-case, is when the access token we have does not have the - # scopes/permissions to access public links. In that rare case we fallback to a - # non-authenticated request. - if response.status_code == 404: - current_app.logger.warning( - "GitHub zipball URL {url} not found, trying unauthenticated request.", - extra={"url": response.url}, - ) - response = requests.head(url, allow_redirects=True) - # If this response is successful we want to use the finally resolved URL to - # fetch the ZIP from. - if response.status_code == 200: - return response.url - - if response.status_code != 200: - raise ReleaseZipballFetchError() - - if cache: - self._resolved_zipball_url = response.url - - return response.url - - # High level API - - def release_failed(self): - """Set release status to FAILED.""" - self.release_object.status = ReleaseStatus.FAILED - - def release_processing(self): - """Set release status to PROCESSING.""" - self.release_object.status = ReleaseStatus.PROCESSING - - def release_published(self): - """Set release status to PUBLISHED.""" - self.release_object.status = ReleaseStatus.PUBLISHED - - def retrieve_remote_file(self, file_name): - """Retrieves a file from the repository, for the current release, using the github client. - - :param file_name: the name of the file to be retrieved from the repository. - :returns: the file contents or None, if the file if not fetched. - """ - gh_repo_owner = self.repository_payload["owner"]["login"] - gh_repo_name = self.repository_payload["name"] - gh_tag_name = self.release_payload["tag_name"] - try: - content = self.gh.api.repository(gh_repo_owner, gh_repo_name).file_contents( - path=file_name, ref=gh_tag_name - ) - except github3.exceptions.NotFoundError: - # github3 raises a github3.exceptions.NotFoundError if the file is not found - return None - return content - - @contextmanager - def fetch_zipball_file(self): - """Fetch release zipball file using the current github session.""" - session = self.gh.api.session - timeout = current_app.config.get("GITHUB_ZIPBALL_TIMEOUT", 300) - zipball_url = self.resolve_zipball_url() - with session.get(zipball_url, stream=True, timeout=timeout) as resp: - yield resp.raw - - def publish(self): - """Publish a GitHub release.""" - raise NotImplementedError - - def process_release(self): - """Processes a github release.""" - raise NotImplementedError - - def resolve_record(self): - """Resolves a record from the release. To be implemented by the API class implementation.""" - raise NotImplementedError - - def serialize_record(self): - """Serializes the release record.""" - raise NotImplementedError - - @property - @abstractmethod - def badge_title(self): - """Stores a string to render in the record badge title (e.g. 'DOI').""" - return None - - @property - @abstractmethod - def badge_value(self): - """Stores a string to render in the record badge value (e.g. '10.1234/invenio.1234').""" - raise NotImplementedError - - @property - def record_url(self): - """Release self url (e.g. github HTML url).""" - raise NotImplementedError diff --git a/invenio_github/oauth/handlers.py b/invenio_github/oauth/handlers.py deleted file mode 100644 index ad2cc15a..00000000 --- a/invenio_github/oauth/handlers.py +++ /dev/null @@ -1,87 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Invenio. -# Copyright (C) 2023 CERN. -# -# Invenio is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Invenio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Invenio. If not, see . -# -# In applying this licence, CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -"""Implement OAuth client handler.""" - -from flask import current_app, redirect, url_for -from flask_login import current_user -from invenio_db import db -from invenio_oauth2server.models import Token as ProviderToken -from invenio_oauthclient import oauth_unlink_external_id - -from invenio_github.api import GitHubAPI -from invenio_github.tasks import disconnect_github - - -def account_setup_handler(remote, token, resp): - """Perform post initialization.""" - try: - gh = GitHubAPI(user_id=token.remote_account.user_id) - gh.init_account() - gh.sync() - db.session.commit() - except Exception as e: - current_app.logger.warning(str(e), exc_info=True) - - -def disconnect_handler(remote): - """Disconnect callback handler for GitHub.""" - # User must be authenticated - if not current_user.is_authenticated: - return current_app.login_manager.unauthorized() - - external_method = "github" - external_ids = [ - i.id for i in current_user.external_identifiers if i.method == external_method - ] - if external_ids: - oauth_unlink_external_id(dict(id=external_ids[0], method=external_method)) - - github = GitHubAPI(user_id=int(current_user.id)) - token = github.session_token - - if token: - extra_data = token.remote_account.extra_data - - # Delete the token that we issued for GitHub to deliver webhooks - webhook_token_id = extra_data.get("tokens", {}).get("webhook") - ProviderToken.query.filter_by(id=webhook_token_id).delete() - - # Disable every GitHub webhooks from our side - repos = github.user_enabled_repositories.all() - repos_with_hooks = [] - for repo in repos: - if repo.hook: - repos_with_hooks.append((repo.github_id, repo.hook)) - github.disable_repo(repo) - - # Commit any changes before running the ascynhronous task - db.session.commit() - - # Send Celery task for webhooks removal and token revocation - disconnect_github.delay(token.access_token, repos_with_hooks) - - # Delete the RemoteAccount (along with the associated RemoteToken) - token.remote_account.delete() - db.session.commit() - - return redirect(url_for("invenio_oauthclient_settings.index")) diff --git a/invenio_github/oauth/remote_app.py b/invenio_github/oauth/remote_app.py deleted file mode 100644 index 89525b90..00000000 --- a/invenio_github/oauth/remote_app.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2023 CERN. -# -# Invenio-Github is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. -"""Github oauth app implementation for github integration.""" - - -from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper - -from invenio_github.oauth.handlers import account_setup_handler, disconnect_handler - -request_token_params = {"scope": "read:user,user:email,admin:repo_hook,read:org"} - -helper = GitHubOAuthSettingsHelper() -github_app = helper.remote_app -github_app["disconnect_handler"] = disconnect_handler -github_app["signup_handler"]["setup"] = account_setup_handler -github_app["params"]["request_token_params"] = request_token_params diff --git a/invenio_github/__init__.py b/invenio_vcs/__init__.py similarity index 92% rename from invenio_github/__init__.py rename to invenio_vcs/__init__.py index 0157344c..aa52df5b 100644 --- a/invenio_github/__init__.py +++ b/invenio_vcs/__init__.py @@ -25,8 +25,8 @@ """Invenio module that adds GitHub integration to the platform.""" -from .ext import InvenioGitHub +from .ext import InvenioVCS -__version__ = "3.0.1" +__version__ = "4.0.0" -__all__ = ("__version__", "InvenioGitHub") +__all__ = ("__version__", "InvenioVCS") diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py new file mode 100644 index 00000000..a7ad75d2 --- /dev/null +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -0,0 +1,334 @@ +# +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Switch to generic git services""" + +import uuid +from datetime import datetime, timezone + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy_utils import JSONType, UUIDType + +# revision identifiers, used by Alembic. +revision = "1754318294" +down_revision = "b0eaee37b545" +# You cannot rename an Alembic branch. So we will have to keep +# the branch label `invenio-github` despite changing the module +# to `invenio-vcs`. +branch_labels = () +depends_on = None + + +def upgrade(): + """Upgrade database.""" + op.rename_table("github_repositories", "vcs_repositories") + op.alter_column( + "vcs_repositories", + "github_id", + new_column_name="provider_id", + type_=sa.String(length=255), + nullable=False, + existing_type=sa.Integer(), + existing_nullable=True, + ) + op.alter_column( + "vcs_repositories", + "hook", + type_=sa.String(length=255), + nullable=True, + existing_type=sa.Integer(), + existing_nullable=True, + ) + op.add_column( + "vcs_repositories", + # We use the provider name "github" by default as this is what we're already using across the codebase + sa.Column("provider", sa.String(255), nullable=False, server_default="github"), + ) + op.add_column( + "vcs_repositories", + sa.Column( + "default_branch", sa.String(255), nullable=False, server_default="master" + ), + ) + op.add_column( + "vcs_repositories", sa.Column("description", sa.String(10000), nullable=True) + ) + op.add_column( + # Nullable for now (see below) + "vcs_repositories", + sa.Column("html_url", sa.String(10000), nullable=True), + ) + op.add_column( + "vcs_repositories", sa.Column("license_spdx", sa.String(255), nullable=True) + ) + op.alter_column("vcs_repositories", "user_id", new_column_name="enabled_by_id") + op.drop_index("ix_github_repositories_name") + op.drop_index("ix_github_repositories_github_id") + + # Because they rely on the `provider` column, these are automatically + # deleted when downgrading so we don't need a separate drop command + # for them. + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_repositories_provider_provider_id"), + table_name="vcs_repositories", + columns=["provider", "provider_id"], + ) + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_repositories_provider_name"), + table_name="vcs_repositories", + columns=["provider", "name"], + ) + + """ + # Migrate data from the OAuth remote `extra_data` field to the repositories table + # where we will now store everything directly. + # + # We need to recreate the SQLAlchemy models for `RemoteAccount` and `Repository` here but + # in a much more lightweight way. We cannot simply import the models because (a) they depend + # on the full Invenio app being initialised and all extensions available and (b) we need + # to work with the models as they stand precisely at this point in the migration chain + # rather than the model file itself which may be at a later commit. + # + # We only include here the columns, constraints, and relations that we actually need to + # perform the migration, therefore keeping these models as lightweight as possible. + remote_account_table = sa.table( + "oauthclient_remoteaccount", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("user_id", sa.Integer, sa.ForeignKey("account_user.id")), + sa.Column("client_id", sa.String(255)), + sa.Column("extra_data", MutableDict.as_mutable(JSONType)), + ) + vcs_repositories_table = sa.table( + "vcs_repositories", + sa.Column("id", UUIDType, primary_key=True), + sa.Column("provider_id", sa.String(255), nullable=True), + sa.Column("provider", sa.String(255), nullable=True), + sa.Column("description", sa.String(10000), nullable=True), + sa.Column("html_url", sa.String(10000), nullable=False), + sa.Column("license_spdx", sa.String(255), nullable=True), + sa.Column("default_branch", sa.String(255), nullable=False), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("hook", sa.String(255), nullable=True), + sa.Column( + "enabled_by_id", sa.Integer, sa.ForeignKey("account_user.id"), nullable=True + ), + sa.Column("created", sa.DateTime, nullable=False), + sa.Column("updated", sa.DateTime, nullable=False), + ) + + # This is the recommended way to run SQLAlchemy operations in a migration, see https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.execute + session = op.get_bind() + + # We don't know the client ID as this is a config variable. + # So to find the RemoteAccounts that correspond to GitHub, we need to check for the existence + # of the `repos` key in the `extra_data` JSON. We cannot make this very efficient sadly, because + # (a) in Postgres we are using JSON not JSONB so there is no efficient JSON querying and (b) the + # instance might be using MySQL/SQLite where we store it as `TEXT`. + + remote_accounts = session.execute(sa.select(remote_account_table)) + for remote_account in remote_accounts.mappings(): + if "repos" not in remote_account["extra_data"]: + continue + + repos = remote_account["extra_data"]["repos"] + + for id, github_repo in repos.items(): + # `id` (the dict key) is a string because JSON keys must be strings + + matching_db_repo_id = session.scalar( + sa.select(vcs_repositories_table).filter_by(provider_id=id) + ) + + if matching_db_repo_id is None: + # We are now storing _all_ repositories (even non-enabled ones) in the DB. + # The repo-user association will be created on the first sync after this migration, we need to download + # the list of users with access to the repo from the GitHub API. + session.execute( + vcs_repositories_table.insert().values( + id=uuid.uuid4(), + provider_id=id, + provider="github", + description=github_repo["description"], + name=github_repo["full_name"], + default_branch=github_repo["default_branch"], + # So far we have only supported github.com so we can safely assume the URL + html_url=f'https://github.com/{github_repo["full_name"]}', + # We have never stored this, it is queried at runtime right now. When the first + # sync happens after this migration, we will download all the license IDs from the VCS. + license_spdx=None, + # This repo wasn't enabled + hook=None, + enabled_by_id=None, + created=datetime.now(tz=timezone.utc), + updated=datetime.now(tz=timezone.utc), + ) + ) + else: + session.execute( + vcs_repositories_table.update() + .filter_by(id=matching_db_repo_id) + .values( + description=github_repo["description"], + name=github_repo["full_name"], + default_branch=github_repo["default_branch"], + html_url=f'https://github.com/{github_repo["full_name"]}', + updated=datetime.now(tz=timezone.utc), + ) + ) + + # Remove `repos` from the existing `extra_data`, leaving only the last sync timestamp + session.execute( + remote_account_table.update() + .filter_by(id=remote_account["id"]) + .values(extra_data={"last_sync": remote_account["extra_data"]["last_sync"]}) + ) + + """ + + # We initially set this to nullable=True so we can create the column without an error + # (it would be null for existing records) but after the SQLAlchemy operations above we + # have populated it so we can mark it non-nullable. + op.alter_column( + "vcs_repositories", "html_url", nullable=False, existing_nullable=True + ) + + op.rename_table("github_releases", "vcs_releases") + op.alter_column( + "vcs_releases", + "release_id", + new_column_name="provider_id", + type_=sa.String(length=255), + nullable=False, + existing_type=sa.Integer(), + existing_nullable=True, + ) + op.add_column( + "vcs_releases", + sa.Column("provider", sa.String(255), nullable=False, server_default="github"), + ) + if op.get_context().dialect.name == "postgresql": + op.alter_column( + "vcs_releases", + "errors", + type_=sa.dialects.postgresql.JSONB, + postgresql_using="errors::text::jsonb", + ) + + op.drop_constraint( + op.f("uq_github_releases_release_id"), table_name="vcs_releases", type_="unique" + ) + # A given provider cannot have duplicate repository IDs. + # These constraints are also inherently deleted when the `provider` column is dropped + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_releases_provider_id_provider"), + table_name="vcs_releases", + columns=["provider_id", "provider"], + ) + # A specific repository from a given provider cannot have multiple releases of the same tag + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_releases_provider_id_provider_tag"), + table_name="vcs_releases", + columns=["provider_id", "provider", "tag"], + ) + + op.create_table( + "vcs_repository_users", + sa.Column("repository_id", UUIDType(), primary_key=True), + sa.Column("user_id", sa.Integer(), primary_key=True), + sa.ForeignKeyConstraint( + ["repository_id"], + ["vcs_repositories.id"], + name=op.f("fk_vcs_repository_users_repository_id_vcs_repositories"), + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["accounts_user.id"], + name=op.f("fk_vcs_repository_users_user_id_accounts_user"), + ), + ) + # ### end Alembic commands ### + + +def downgrade(): + """Downgrade database.""" + + # Currently, the downgrade can only be peformed **without data**. The tables are transformed but + # data will not be successfully migrated. The upgrade migration has a large amount of custom logic + # for migrating the data into the new format, and this is not replicated/reversed for downgrading. + + op.alter_column( + "vcs_repositories", + "enabled_by_id", + new_column_name="user_id", + ) + op.drop_table("vcs_repository_users") + + op.rename_table("vcs_repositories", "github_repositories") + op.alter_column( + "github_repositories", + "provider_id", + new_column_name="github_id", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=False, + postgresql_using="provider_id::integer", + ) + op.alter_column( + "github_repositories", + "hook", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=True, + postgresql_using="hook::integer", + ) + op.drop_column("github_repositories", "provider") + op.drop_column("github_repositories", "description") + op.drop_column("github_repositories", "html_url") + op.drop_column("github_repositories", "license_spdx") + op.drop_column("github_repositories", "default_branch") + op.create_index( + op.f("ix_github_repositories_github_id"), + "github_repositories", + ["github_id"], + unique=True, + ) + op.create_index( + op.f("ix_github_repositories_name"), + "github_repositories", + ["name"], + unique=True, + ) + + op.rename_table("vcs_releases", "github_releases") + op.alter_column( + "github_releases", + "provider_id", + new_column_name="release_id", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=False, + postgresql_using="provider_id::integer", + ) + op.drop_column("github_releases", "provider") + if op.get_context().dialect.name == "postgresql": + op.alter_column( + "github_releases", + "errors", + type_=sa.dialects.postgresql.JSON, + postgresql_using="errors::text::json", + ) + op.create_unique_constraint( + op.f("uq_github_releases_release_id"), + table_name="github_releases", + columns=["release_id"], + ) + # ### end Alembic commands ### diff --git a/invenio_github/alembic/5a5428312b2b_create_github_branch.py b/invenio_vcs/alembic/5a5428312b2b_create_github_branch.py similarity index 100% rename from invenio_github/alembic/5a5428312b2b_create_github_branch.py rename to invenio_vcs/alembic/5a5428312b2b_create_github_branch.py diff --git a/invenio_github/alembic/b0eaee37b545_create_github_tables.py b/invenio_vcs/alembic/b0eaee37b545_create_github_tables.py similarity index 100% rename from invenio_github/alembic/b0eaee37b545_create_github_tables.py rename to invenio_vcs/alembic/b0eaee37b545_create_github_tables.py diff --git a/invenio_github/assets/semantic-ui/js/invenio_github/index.js b/invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js similarity index 93% rename from invenio_github/assets/semantic-ui/js/invenio_github/index.js rename to invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js index 4573c878..804c63ff 100644 --- a/invenio_github/assets/semantic-ui/js/invenio_github/index.js +++ b/invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js @@ -42,8 +42,9 @@ if (sync_button) { const buttonTextElem = document.getElementById("sync_repos_btn_text"); const buttonText = buttonTextElem.innerHTML; const loadingText = sync_button.dataset.loadingText; + const provider = sync_button.dataset.provider; - const url = "/api/user/github/repositories/sync"; + const url = `/api/user/vcs/${provider}/repositories/sync`; const request = new Request(url, { method: "POST", headers: REQUEST_HEADERS, @@ -129,18 +130,16 @@ if (repositories) { } function sendEnableDisableRequest(checked, repo) { - const repo_id = repo - .querySelector("input[data-repo-id]") - .getAttribute("data-repo-id"); + const input = repo.querySelector("input[data-repo-id]"); + const repo_id= input.getAttribute("data-repo-id"); + const provider = input.getAttribute("data-provider"); const switchMessage = repo.querySelector(".repo-switch-message"); let url; if (checked === true) { - url = "/api/user/github/repositories/" + repo_id + "/enable"; - } else { - if (checked === false) { - url = "/api/user/github/repositories/" + repo_id + "/disable"; - } + url = `/api/user/vcs/${provider}/repositories/${repo_id}/enable`; + } else if (checked === false) { + url = `/api/user/vcs/${provider}/repositories/${repo_id}/disable`; } const request = new Request(url, { diff --git a/invenio_github/config.py b/invenio_vcs/config.py similarity index 70% rename from invenio_github/config.py rename to invenio_vcs/config.py index 18814cfc..b5ef9493 100644 --- a/invenio_github/config.py +++ b/invenio_vcs/config.py @@ -23,11 +23,16 @@ """Configuration for GitHub module.""" from datetime import timedelta +from typing import TYPE_CHECKING -GITHUB_WEBHOOK_RECEIVER_ID = "github" -"""Local name of webhook receiver.""" +from flask import current_app -GITHUB_WEBHOOK_RECEIVER_URL = None +if TYPE_CHECKING: + from invenio_vcs.providers import RepositoryServiceProviderFactory + +VCS_PROVIDERS = [] + +# GITHUB_WEBHOOK_RECEIVER_URL = None """URL format to be used when creating a webhook on GitHub. This configuration variable must be set explicitly. Example:: @@ -41,7 +46,7 @@ context, doesn't work as expected. """ -GITHUB_SHARED_SECRET = "CHANGEME" +# GITHUB_SHARED_SECRET = "CHANGEME" """Shared secret between you and GitHub. Used to make GitHub sign webhook requests with HMAC. @@ -49,7 +54,7 @@ See http://developer.github.com/v3/repos/hooks/#example """ -GITHUB_INSECURE_SSL = False +# GITHUB_INSECURE_SSL = False """Determine if the GitHub webhook request will check the SSL certificate. Never set to True in a production environment, but can be useful for @@ -59,29 +64,41 @@ GITHUB_REFRESH_TIMEDELTA = timedelta(days=1) """Time period after which a GitHub account sync should be initiated.""" -GITHUB_RELEASE_CLASS = "invenio_github.api:GitHubRelease" +VCS_RELEASE_CLASS = "invenio_vcs.service:VCSRelease" """GitHubRelease class to be used for release handling.""" -GITHUB_TEMPLATE_INDEX = "invenio_github/settings/index.html" +VCS_TEMPLATE_INDEX = "invenio_vcs/settings/index.html" """Repositories list template.""" -GITHUB_TEMPLATE_VIEW = "invenio_github/settings/view.html" +VCS_TEMPLATE_VIEW = "invenio_vcs/settings/view.html" """Repository detail view template.""" GITHUB_ERROR_HANDLERS = None """Definition of the way specific exceptions are handled.""" -GITHUB_MAX_CONTRIBUTORS_NUMBER = 30 +VCS_MAX_CONTRIBUTORS_NUMBER = 30 """Max number of contributors of a release to be retrieved from Github.""" -GITHUB_INTEGRATION_ENABLED = False +VCS_INTEGRATION_ENABLED = False """Enables the github integration.""" -GITHUB_CITATION_FILE = None +VCS_CITATION_FILE = None """Citation file name.""" -GITHUB_CITATION_METADATA_SCHEMA = None +VCS_CITATION_METADATA_SCHEMA = None """Citation metadata schema.""" -GITHUB_ZIPBALL_TIMEOUT = 300 +VCS_ZIPBALL_TIMEOUT = 300 """Timeout for the zipball download, in seconds.""" + + +def get_provider_list(app=current_app) -> list["RepositoryServiceProviderFactory"]: + return app.config["VCS_PROVIDERS"] + + +def get_provider_by_id(id: str) -> "RepositoryServiceProviderFactory": + providers = get_provider_list() + for provider in providers: + if id == provider.id: + return provider + raise Exception(f"VCS provider with ID {id} not registered") diff --git a/invenio_vcs/contrib/github.py b/invenio_vcs/contrib/github.py new file mode 100644 index 00000000..ba45e86a --- /dev/null +++ b/invenio_vcs/contrib/github.py @@ -0,0 +1,414 @@ +# -*- coding: utf-8 -*- +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +import json + +import dateutil +import github3 +import requests +from flask import current_app +from github3.repos import ShortRepository +from invenio_i18n import gettext as _ +from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper +from werkzeug.utils import cached_property + +from invenio_vcs.errors import ReleaseZipballFetchError, UnexpectedProviderResponse +from invenio_vcs.generic_models import ( + GenericContributor, + GenericOwner, + GenericOwnerType, + GenericRelease, + GenericRepository, + GenericUser, + GenericWebhook, +) +from invenio_vcs.providers import ( + RepositoryServiceProvider, + RepositoryServiceProviderFactory, +) + + +class GitHubProviderFactory(RepositoryServiceProviderFactory): + def __init__( + self, + base_url, + webhook_receiver_url, + id="github", + name="GitHub", + description="Automatically archive your repositories", + credentials_key="GITHUB_APP_CREDENTIALS", + config={}, + ): + super().__init__( + GitHubProvider, + base_url=base_url, + webhook_receiver_url=webhook_receiver_url, + id=id, + name=name, + description=description, + credentials_key=credentials_key, + icon="github", + repository_name="repository", + repository_name_plural="repositories", + ) + + self._config = dict() + self._config.update( + shared_secret="", + insecure_ssl=False, + ) + self._config.update(config) + + @property + def remote_config(self): + request_token_params = { + # General `repo` scope is required for reading collaborators + # https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/scopes-for-oauth-apps + "scope": "read:user,user:email,admin:repo_hook,read:org,repo" + } + + helper = GitHubOAuthSettingsHelper( + title=self.name, + icon="fa fa-{}".format(self.icon), + description=self.description, + base_url=self.base_url, + app_key=self.credentials_key, + ) + github_app = helper.remote_app + github_app["disconnect_handler"] = self.oauth_handlers.disconnect_handler + github_app["signup_handler"][ + "setup" + ] = self.oauth_handlers.account_setup_handler + github_app["params"]["request_token_params"] = request_token_params + + return github_app + + @property + def config(self): + return self._config + + def webhook_is_create_release_event(self, event_payload): + action = event_payload.get("action") + is_draft_release = event_payload.get("release", {}).get("draft") + + # Draft releases do not create releases on invenio + is_create_release_event = ( + action in ("published", "released", "created") and not is_draft_release + ) + return is_create_release_event + + @staticmethod + def _extract_license(gh_repo_dict): + # The GitHub API returns the `license` as a simple key of the ShortRepository. + # But for some reason github3py does not include a mapping for this. + # So the only way to access it without making an additional request is to convert + # the repo to a dict. + license_obj = gh_repo_dict.get("license") + if license_obj is not None: + spdx = license_obj["spdx_id"] + if spdx == "NOASSERTION": + # For 'other' type of licenses, Github sets the spdx_id to NOASSERTION + return None + return spdx + return None + + def webhook_event_to_generic(self, event_payload): + release_published_at = event_payload["release"].get("published_at") + if release_published_at is not None: + release_published_at = dateutil.parser.parse(release_published_at) + + release = GenericRelease( + id=str(event_payload["release"]["id"]), + name=event_payload["release"].get("name"), + tag_name=event_payload["release"]["tag_name"], + tarball_url=event_payload["release"].get("tarball_url"), + zipball_url=event_payload["release"].get("zipball_url"), + html_url=event_payload["release"]["html_url"], + body=event_payload["release"].get("body"), + created_at=dateutil.parser.parse(event_payload["release"]["created_at"]), + published_at=release_published_at, + ) + + license_spdx = GitHubProviderFactory._extract_license( + event_payload["repository"] + ) + + repo = GenericRepository( + id=str(event_payload["repository"]["id"]), + full_name=event_payload["repository"]["full_name"], + html_url=event_payload["repository"]["html_url"], + description=event_payload["repository"].get("description"), + default_branch=event_payload["repository"]["default_branch"], + license_spdx=license_spdx, + ) + + return (release, repo) + + def url_for_tag(self, repository_name, tag_name): + return "{}/{}/tree/{}".format(self.base_url, repository_name, tag_name) + + def url_for_new_release(self, repository_name): + return "{}/{}/releases/new".format(self.base_url, repository_name) + + def url_for_new_file(self, repository_name, branch_name, file_name): + return "{}/{}/new/{}?filename={}".format( + self.base_url, repository_name, branch_name, file_name + ) + + +class GitHubProvider(RepositoryServiceProvider): + @cached_property + def _gh(self): + _gh = None + if self.factory.base_url == "https://github.com": + _gh = github3.login(token=self.access_token) + else: + _gh = github3.enterprise_login( + url=self.factory.base_url, token=self.access_token + ) + + assert _gh is not None + return _gh + + def list_repositories(self): + repos: dict[str, GenericRepository] = {} + for repo in self._gh.repositories(): + assert isinstance(repo, ShortRepository) + + if repo.permissions["admin"]: + repos[str(repo.id)] = GenericRepository( + id=str(repo.id), + full_name=repo.full_name, + description=repo.description, + html_url=repo.html_url, + default_branch=repo.default_branch, + license_spdx=GitHubProviderFactory._extract_license(repo.as_dict()), + ) + + return repos + + def list_repository_webhooks(self, repository_id): + assert repository_id.isdigit() + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return None + + hooks = [] + for hook in repo.hooks(): + hooks.append( + GenericWebhook( + id=str(hook.id), + repository_id=repository_id, + url=hook.config.get("url"), + ) + ) + return hooks + + def list_repository_user_ids(self, repository_id: str): + assert repository_id.isdigit() + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return None + + user_ids: list[str] = [] + for collaborator in repo.collaborators(): + if not collaborator.permissions["admin"]: + continue + + user_ids.append(str(collaborator.id)) + + return user_ids + + def get_repository(self, repository_id): + assert repository_id.isdigit() + + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return None + + return GenericRepository( + id=str(repo.id), + full_name=repo.full_name, + description=repo.description, + html_url=repo.html_url, + default_branch=repo.default_branch, + license_spdx=GitHubProviderFactory._extract_license(repo.as_dict()), + ) + + def create_webhook(self, repository_id): + assert repository_id.isdigit() + + hook_config = dict( + url=self.webhook_url, + content_type="json", + secret=self.factory.config["shared_secret"], + insecure_ssl="1" if self.factory.config["insecure_ssl"] else "0", + ) + + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return None + + hooks = (h for h in repo.hooks() if h.config.get("url", "") == self.webhook_url) + hook = next(hooks, None) + + if not hook: + hook = repo.create_hook("web", hook_config, events=["release"]) + else: + hook.edit(config=hook_config, events=["release"]) + + return str(hook.id) + + def delete_webhook(self, repository_id, hook_id=None): + assert repository_id.isdigit() + + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return False + + if hook_id is not None: + hook = repo.hook(hook_id) + else: + hooks = ( + h + for h in repo.hooks() + if self.is_valid_webhook(h.config.get("url", "")) + ) + hook = next(hooks, None) + + if not hook or hook.delete(): + return True + return False + + def get_own_user(self): + user = self._gh.me() + if user is not None: + return GenericUser(user.id, user.login, user.name) + + return None + + def list_repository_contributors(self, repository_id, max): + assert repository_id.isdigit() + + repo = self._gh.repository_with_id(repository_id) + if repo is None: + return None + + contributors_iter = repo.contributors(number=max) + # Consume the iterator to materialize the request and have a `last_status``. + contributors = list(contributors_iter) + status = contributors_iter.last_status + if status == 200: + # Sort by contributions and filter only users. + sorted_contributors = sorted( + (c for c in contributors if c.type == "User"), + key=lambda x: x.contributions_count, + reverse=True, + ) + + contributors = [] + for c in sorted_contributors: + contributions_count = c.contributions_count + c = c.refresh() + contributors.append( + GenericContributor( + id=c.id, + username=c.login, + display_name=c.name, + contributions_count=contributions_count, + company=c.company, + ) + ) + + return contributors + else: + raise UnexpectedProviderResponse( + _( + "Provider returned unexpected code: %(status)s for release in repo %(repo_id)s" + ) + % {"status": status, "repo_id": repository_id} + ) + + def get_repository_owner(self, repository_id): + assert repository_id.isdigit() + + repo = self._gh.repository_with_id(repository_id) + if repo is None: + return None + + owner_type = ( + GenericOwnerType.Person + if repo.owner.type == "User" + else GenericOwnerType.Organization + ) + + return GenericOwner( + id=repo.owner.id, + path_name=repo.owner.login, + display_name=repo.owner.full_name, + type=owner_type, + ) + + def resolve_release_zipball_url(self, release_zipball_url): + url = release_zipball_url + + # Execute a HEAD request to the zipball url to test if it is accessible. + response = self._gh.session.head(url, allow_redirects=True) + + # In case where there is a tag and branch with the same name, we might get back + # a "300 Multiple Choices" response, which requires fetching an "alternate" + # link. + if response.status_code == 300: + alternate_url = response.links.get("alternate", {}).get("url") + if alternate_url: + url = alternate_url # Use the alternate URL + response = self._gh.session.head(url, allow_redirects=True) + + # Another edge-case, is when the access token we have does not have the + # scopes/permissions to access public links. In that rare case we fallback to a + # non-authenticated request. + if response.status_code == 404: + current_app.logger.warning( + "GitHub zipball URL {url} not found, trying unauthenticated request.", + extra={"url": response.url}, + ) + response = requests.head(url, allow_redirects=True) + # If this response is successful we want to use the finally resolved URL to + # fetch the ZIP from. + if response.status_code == 200: + return response.url + + if response.status_code != 200: + raise ReleaseZipballFetchError() + + return response.url + + def fetch_release_zipball(self, release_zipball_url, timeout): + with self._gh.session.get( + release_zipball_url, stream=True, timeout=timeout + ) as resp: + yield resp.raw + + def retrieve_remote_file(self, repository_id, tag_name, file_name): + assert repository_id.isdigit() + + try: + resp = self._gh.repository_with_id(repository_id).file_contents( + path=file_name, ref=tag_name + ) + return resp.decoded + except github3.exceptions.NotFoundError: + return None + + def revoke_token(self, access_token): + client_id, client_secret = self._gh.session.retrieve_client_credentials() + url = self._gh._build_url("applications", str(client_id), "token") + with self._gh.session.temporary_basic_auth(client_id, client_secret): + response = self._gh._delete( + url, data=json.dumps({"access_token": access_token}) + ) + return response diff --git a/invenio_vcs/contrib/gitlab.py b/invenio_vcs/contrib/gitlab.py new file mode 100644 index 00000000..9eb278c4 --- /dev/null +++ b/invenio_vcs/contrib/gitlab.py @@ -0,0 +1,400 @@ +# -*- coding: utf-8 -*- +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +from __future__ import annotations + +from typing import Any + +import dateutil +import gitlab +import gitlab.const +import requests +from flask import current_app +from invenio_oauthclient import current_oauthclient +from werkzeug.utils import cached_property + +from invenio_vcs.generic_models import ( + GenericContributor, + GenericOwner, + GenericOwnerType, + GenericRelease, + GenericRepository, + GenericUser, + GenericWebhook, +) +from invenio_vcs.providers import ( + RepositoryServiceProvider, + RepositoryServiceProviderFactory, +) + + +def _gl_response_error_handler(f): + def inner_function(*args, **kwargs): + try: + return f(*args, **kwargs) + except gitlab.GitlabGetError as e: + if e.response_code == 404: + return None + else: + raise e + except gitlab.GitlabCreateError as e: + if e.response_code == 404: + return None + else: + raise e + + return inner_function + + +class GitLabProviderFactory(RepositoryServiceProviderFactory): + def __init__( + self, + base_url: str, + webhook_receiver_url: str, + id="gitlab", + name="GitLab", + description="Automatically archive your repositories", + credentials_key="GITLAB_APP_CREDENTIALS", + config={}, + ): + super().__init__( + GitLabProvider, + base_url=base_url, + webhook_receiver_url=webhook_receiver_url, + id=id, + name=name, + description=description, + credentials_key=credentials_key, + icon="gitlab", + repository_name="project", + repository_name_plural="projects", + ) + self._config = dict() + self._config.update(shared_validation_token="") + self._config.update(config) + + def _account_info_handler(self, remote, resp: dict): + gl = gitlab.Gitlab( + self.base_url, + oauth_token=resp["access_token"], + ) + gl.auth() + user_attrs = gl.user.attributes + handlers = current_oauthclient.signup_handlers[remote.name] + return handlers["info_serializer"](resp, user_attrs) + + def _account_info_serializer(self, remote, resp, user_info, **kwargs): + return dict( + user=dict( + email=user_info["email"], + profile=dict( + username=user_info["username"], + full_name=user_info["name"], + ), + ), + external_id=str(user_info["id"]), + external_method="gitlab", + ) + + @property + def remote_config(self): + return dict( + title=self.name, + description=self.description, + icon="fa fa-{}".format(self.icon), + authorized_handler="invenio_oauthclient.handlers:authorized_signup_handler", + disconnect_handler=self.oauth_handlers.disconnect_handler, + signup_handler=dict( + info=self._account_info_handler, + info_serializer=self._account_info_serializer, + setup=self.oauth_handlers.account_setup_handler, + view="invenio_oauthclient.handlers:signup_handler", + ), + params=dict( + base_url="{}/api/v4/".format(self.base_url), + request_token_url=None, + access_token_url="{}/oauth/token".format(self.base_url), + access_token_method="POST", + authorize_url="{}/oauth/authorize".format(self.base_url), + app_key=self.credentials_key, + ), + ) + + @property + def config(self): + return self._config + + def url_for_tag(self, repository_name, tag_name) -> str: + return "{}/{}/-/tags/{}".format(self.base_url, repository_name, tag_name) + + def url_for_new_file(self, repository_name, branch_name, file_name) -> str: + return "{}/{}/-/new/{}/?file_name={}".format( + self.base_url, repository_name, branch_name, file_name + ) + + def url_for_new_release(self, repository_name) -> str: + return "{}/{}/-/releases/new".format(self.base_url, repository_name) + + def webhook_is_create_release_event(self, event_payload: dict[str, Any]): + # https://archives.docs.gitlab.com/17.11/user/project/integrations/webhook_events/#release-events + + # GitLab does not have unpublished/draft releases the way GitHub does. However, it does have + # "upcoming releases" (https://archives.docs.gitlab.com/17.11/api/releases/#upcoming-releases) + # meaning ones with a release date in the future. + # TODO: do we want to return False for upcoming releases? + + object_kind = event_payload.get("object_kind") + action = event_payload.get("action") + + # existing `invenio-gitlab` instead uses the `tag_push` event which is more general than the `release` + # event (https://codebase.helmholtz.cloud/rodare/invenio-gitlab/-/blob/d66181697b8a34383b333306b559d13cd6fa829a/invenio_gitlab/receivers.py#L41). + # TODO: I recommend using the `release` event as this is a more 'formal' manual action and better corresponds to the release event in GitHub. Is this okay? + return object_kind == "release" and action == "create" + + def webhook_event_to_generic( + self, event_payload: dict[str, Any] + ) -> tuple[GenericRelease, GenericRepository]: + # https://archives.docs.gitlab.com/18.0/user/project/integrations/webhook_events/#release-events + # https://archives.docs.gitlab.com/17.11/user/project/integrations/webhook_events/#release-events + # https://archives.docs.gitlab.com/16.11/ee/user/project/integrations/webhook_events.html#release-events + + zipball_url: str | None = None + tarball_url: str | None = None + + for source in event_payload["assets"]["sources"]: + format = source["format"] + url = source["url"] + if format == "zip": + zipball_url = url + elif format == "tar": + tarball_url = url + + release = GenericRelease( + # GitLab does not expose the in-database ID of releases through the webhook payload or the REST API + # It does exist internally but it's never sent to us + id=event_payload["tag"], + tag_name=event_payload["tag"], + html_url=event_payload["url"], + name=event_payload["name"], + body=event_payload["description"], + zipball_url=zipball_url, + tarball_url=tarball_url, + created_at=dateutil.parser.parse(event_payload["created_at"]), + published_at=dateutil.parser.parse(event_payload["released_at"]), + ) + + repo = GitLabProviderFactory._proj_to_generic(event_payload["project"]) + return (release, repo) + + @staticmethod + def _extract_license(proj_attrs: dict[str, Any]): + license_obj = proj_attrs.get("license") + if license_obj is not None: + return license_obj["key"].upper() + return None + + @staticmethod + def _proj_to_generic(proj_attrs: dict[str, Any]): + return GenericRepository( + id=str(proj_attrs["id"]), + full_name=proj_attrs["path_with_namespace"], + default_branch=proj_attrs["default_branch"], + html_url=proj_attrs["web_url"], + description=proj_attrs["description"], + license_spdx=GitLabProviderFactory._extract_license(proj_attrs), + ) + + +class GitLabProvider(RepositoryServiceProvider): + @cached_property + def _gl(self): + gl = gitlab.Gitlab(self.factory.base_url, oauth_token=self.access_token) + gl.auth() + return gl + + @_gl_response_error_handler + def list_repositories(self) -> dict[str, GenericRepository] | None: + repos: dict[str, GenericRepository] = {} + for project in self._gl.projects.list( + iterator=True, + simple=False, + min_access_level=gitlab.const.MAINTAINER_ACCESS, + ): + repos[str(project.id)] = GenericRepository( + id=str(project.id), + full_name=project.path_with_namespace, + default_branch=project.default_branch, + html_url=project.web_url, + description=project.description, + # TODO: license is not returned in the projects list (only when querying an individual project). + # This would be super slow. Do we really need license here? + license_spdx=None, + ) + return repos + + @_gl_response_error_handler + def get_repository(self, repository_id: str) -> GenericRepository | None: + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id)) + return GitLabProviderFactory._proj_to_generic(proj.asdict()) + + @_gl_response_error_handler + def list_repository_contributors( + self, repository_id: str, max: int + ) -> list[GenericContributor] | None: + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id), lazy=True) + + contribs: list[GenericContributor] = [] + for index, contrib in enumerate( + proj.repository_contributors(iterator=True, order_by="commits", sort="desc") + ): + email = contrib["email"] + contrib_count = contrib["commits"] + + # repository_contributors returns a very small amount of data (not even the username) + # See here https://archives.docs.gitlab.com/17.11/api/repositories/#contributors + # So we try to enrich the data by searching for the user with the matching email. + # We will fail to find it if a) the user doesn't exist (e.g. repos imported/forked from somewhere else) + # or b) if the user has not made their email address public. + # By default, email addresses on GitLab are private, so this is unlikely to succeed. + matching_users = self._gl.users.list(search=email) + if len(matching_users) == 0: + contribs.append( + GenericContributor( + id=email, + username=email, + display_name=contrib["name"], + contributions_count=contrib_count, + ) + ) + else: + matching_user = matching_users[0] + contribs.append( + GenericContributor( + id=str(matching_user.id), + username=matching_user.username, + display_name=matching_user.name, + contributions_count=contrib_count, + ) + ) + + if index + 1 == max: + break + + return contribs + + @_gl_response_error_handler + def get_repository_owner(self, repository_id: str): + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id)) + return GenericOwner( + id=str(proj.namespace.id), + path_name=proj.namespace.path, + display_name=proj.namespace.name, + type=( + GenericOwnerType.Person + if proj.namespace.kind == "user" + else GenericOwnerType.Organization + ), + ) + + @_gl_response_error_handler + def list_repository_webhooks( + self, repository_id: str + ) -> list[GenericWebhook] | None: + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id), lazy=True) + hooks: list[GenericWebhook] = [] + for hook in proj.hooks.list(iterator=True): + hooks.append( + GenericWebhook( + id=str(hook.id), + repository_id=str(hook.project_id), + url=hook.url, + ) + ) + return hooks + + def list_repository_user_ids(self, repository_id: str) -> list[str] | None: + # https://docs.gitlab.com/api/members/#list-all-members-of-a-group-or-project-including-inherited-and-invited-members + user_ids: list[str] = [] + for member in self._gl.projects.get(repository_id, lazy=True).members_all.list( + iterator=True + ): + user_ids.append(str(member.id)) + return user_ids + + @_gl_response_error_handler + def create_webhook(self, repository_id: str) -> str | None: + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id), lazy=True) + + hook_data = { + "url": self.webhook_url, + "token": self.factory.config.get("shared_validation_token"), + "releases_events": True, + "description": "Managed by {}".format( + current_app.config.get("THEME_SITENAME", "Invenio") + ), + } + + resp = proj.hooks.create(hook_data) + return str(resp.id) + + @_gl_response_error_handler + def delete_webhook(self, repository_id: str, hook_id=None) -> bool: + assert repository_id.isdigit() + if hook_id is not None: + assert hook_id.isdigit() + + proj = self._gl.projects.get(int(repository_id), lazy=True) + if hook_id is None: + first_valid = self.get_first_valid_webhook(repository_id) + if first_valid is None: + return True + + proj.hooks.delete(int(first_valid.id)) + else: + proj.hooks.delete(int(hook_id)) + + return True + + @_gl_response_error_handler + def get_own_user(self) -> GenericUser | None: + user = self._gl.user + if user is None: + return None + return GenericUser( + id=str(user.id), + username=user.username, + display_name=user.name, + ) + + def resolve_release_zipball_url(self, release_zipball_url: str) -> str | None: + # No further resolution needs to be done for GitLab, so this is a no-op + return release_zipball_url + + @_gl_response_error_handler + def fetch_release_zipball(self, release_zipball_url: str, timeout: int): + resp = self._gl.http_get( + release_zipball_url, raw=True, streamed=True, timeout=timeout + ) + assert isinstance(resp, requests.Response) + with resp: + yield resp.raw + + @_gl_response_error_handler + def retrieve_remote_file(self, repository_id: str, tag_name: str, file_name: str): + assert repository_id.isdigit() + proj = self._gl.projects.get(repository_id, lazy=True) + file = proj.files.get(file_path=file_name, ref=tag_name) + return file.decode() + + def revoke_token(self, access_token: str): + # TODO: GitLab implements RFC7009 for OAuth Token Revocation. We might need to do this via OAuth instead of the GitLab API. + pass diff --git a/invenio_github/errors.py b/invenio_vcs/errors.py similarity index 84% rename from invenio_github/errors.py rename to invenio_vcs/errors.py index d60b12c0..e746f9c0 100644 --- a/invenio_github/errors.py +++ b/invenio_vcs/errors.py @@ -28,11 +28,11 @@ from invenio_i18n import gettext as _ -class GitHubError(Exception): +class VCSError(Exception): """General GitHub error.""" -class RepositoryAccessError(GitHubError): +class RepositoryAccessError(VCSError): """Repository access permissions error.""" message = _("The user cannot access the github repository") @@ -46,7 +46,7 @@ def __init__(self, user=None, repo=None, repo_id=None, message=None): self.repo_id = repo_id -class RepositoryDisabledError(GitHubError): +class RepositoryDisabledError(VCSError): """Repository access permissions error.""" message = _("This repository is not enabled for webhooks.") @@ -57,7 +57,7 @@ def __init__(self, repo=None, message=None): self.repo = repo -class RepositoryNotFoundError(GitHubError): +class RepositoryNotFoundError(VCSError): """Repository not found error.""" message = _("The repository does not exist.") @@ -68,7 +68,7 @@ def __init__(self, repo=None, message=None): self.repo = repo -class InvalidSenderError(GitHubError): +class InvalidSenderError(VCSError): """Invalid release sender error.""" message = _("Invalid sender for event") @@ -80,7 +80,7 @@ def __init__(self, event=None, user=None, message=None): self.user = user -class ReleaseAlreadyReceivedError(GitHubError): +class ReleaseAlreadyReceivedError(VCSError): """Invalid release sender error.""" message = _("The release has already been received.") @@ -91,7 +91,7 @@ def __init__(self, release=None, message=None): self.release = release -class CustomGitHubMetadataError(GitHubError): +class CustomGitHubMetadataError(VCSError): """Invalid Custom GitHub Metadata file.""" message = _("The metadata file is not valid JSON.") @@ -102,7 +102,7 @@ def __init__(self, file=None, message=None): self.file = file -class GithubTokenNotFound(GitHubError): +class GithubTokenNotFound(VCSError): """Oauth session token was not found.""" message = _("The oauth session token was not found.") @@ -113,7 +113,7 @@ def __init__(self, user=None, message=None): self.user = user -class RemoteAccountNotFound(GitHubError): +class RemoteAccountNotFound(VCSError): """Remote account for the user is not setup.""" message = _("RemoteAccount not found for user") @@ -124,7 +124,7 @@ def __init__(self, user=None, message=None): self.user = user -class RemoteAccountDataNotSet(GitHubError): +class RemoteAccountDataNotSet(VCSError): """Remote account extra data for the user is not set.""" message = _("RemoteAccount extra data not set for user.") @@ -135,7 +135,7 @@ def __init__(self, user=None, message=None): self.user = user -class ReleaseNotFound(GitHubError): +class ReleaseNotFound(VCSError): """Release does not exist.""" message = _("Release does not exist.") @@ -145,17 +145,17 @@ def __init__(self, message=None): super().__init__(message or self.message) -class UnexpectedGithubResponse(GitHubError): +class UnexpectedProviderResponse(VCSError): """Request to Github API returned an unexpected error.""" - message = _("Github API returned an unexpected error.") + message = _("Provider API returned an unexpected error.") def __init__(self, message=None): """Constructor.""" super().__init__(message or self.message) -class ReleaseZipballFetchError(GitHubError): +class ReleaseZipballFetchError(VCSError): """Error fetching release zipball file.""" message = _("Error fetching release zipball file.") @@ -163,3 +163,10 @@ class ReleaseZipballFetchError(GitHubError): def __init__(self, message=None): """Constructor.""" super().__init__(message or self.message) + + +class UserInfoNoneError(VCSError): + message = _("Provider did not return user profile information.") + + def __init__(self, message=None) -> None: + super().__init__(message or self.message) diff --git a/invenio_github/ext.py b/invenio_vcs/ext.py similarity index 57% rename from invenio_github/ext.py rename to invenio_vcs/ext.py index 516f0b3b..24cb4124 100644 --- a/invenio_github/ext.py +++ b/invenio_vcs/ext.py @@ -33,13 +33,15 @@ from six import string_types from werkzeug.utils import cached_property, import_string -from invenio_github.api import GitHubRelease -from invenio_github.utils import obj_or_import_string +from invenio_vcs.config import get_provider_list +from invenio_vcs.receivers import VCSReceiver +from invenio_vcs.service import VCSRelease +from invenio_vcs.utils import obj_or_import_string from . import config -class InvenioGitHub(object): +class InvenioVCS(object): """Invenio-GitHub extension.""" def __init__(self, app=None): @@ -50,10 +52,10 @@ def __init__(self, app=None): @cached_property def release_api_class(self): """Github Release API class.""" - cls = current_app.config["GITHUB_RELEASE_CLASS"] + cls = current_app.config["VCS_RELEASE_CLASS"] if isinstance(cls, string_types): cls = import_string(cls) - assert issubclass(cls, GitHubRelease) + assert issubclass(cls, VCSRelease) return cls @cached_property @@ -68,36 +70,63 @@ def release_error_handlers(self): def init_app(self, app): """Flask application initialization.""" self.init_config(app) - app.extensions["invenio-github"] = self + app.extensions["invenio-vcs"] = self def init_config(self, app): """Initialize configuration.""" app.config.setdefault( - "GITHUB_SETTINGS_TEMPLATE", - app.config.get("SETTINGS_TEMPLATE", "invenio_github/settings/base.html"), + "VCS_SETTINGS_TEMPLATE", + app.config.get("SETTINGS_TEMPLATE", "invenio_vcs/settings/base.html"), ) for k in dir(config): - if k.startswith("GITHUB_"): + if k.startswith("VCS_"): app.config.setdefault(k, getattr(config, k)) -def finalize_app(app): +def finalize_app_ui(app): """Finalize app.""" - init_menu(app) + if app.config.get("VCS_INTEGRATION_ENABLED", False): + init_menu(app) + init_webhooks(app) + + +def finalize_app_api(app): + """Finalize app.""" + if app.config.get("VCS_INTEGRATION_ENABLED", False): + init_webhooks(app) def init_menu(app): """Init menu.""" - if app.config.get("GITHUB_INTEGRATION_ENABLED", False): - current_menu.submenu("settings.github").register( - endpoint="invenio_github.get_repositories", + for provider in get_provider_list(app): + + def is_active(current_node): + return ( + request.endpoint.startswith("invenio_vcs.") + and request.view_args.get("provider", "") == current_node.name + ) + + current_menu.submenu(f"settings.{provider.id}").register( + endpoint="invenio_vcs.get_repositories", + endpoint_arguments_constructor=lambda id=provider.id: {"provider": id}, text=_( - "%(icon)s GitHub", + "%(icon)s %(provider)s", icon=LazyString( - lambda: f'' + lambda: f'' ), + provider=provider.name, ), order=10, - active_when=lambda: request.endpoint.startswith("invenio_github."), + active_when=is_active, ) + + +def init_webhooks(app): + state = app.extensions.get("invenio-webhooks") + if state is not None: + for provider in get_provider_list(app): + # Procedurally register the webhook receivers instead of including them as an entry point, since + # they are defined in the VCS provider config list rather than in the instance's setup.cfg file. + if provider.id not in state.receivers: + state.register(provider.id, VCSReceiver) diff --git a/invenio_vcs/generic_models.py b/invenio_vcs/generic_models.py new file mode 100644 index 00000000..f7975a85 --- /dev/null +++ b/invenio_vcs/generic_models.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +""" +Generic dataclass models to represent the bare minimum necessary data +from VCS providers. These are essentially the "lowest common factor" of +the otherwise large, complex, and heterogenous responses returned by APIs. + +These are used by higher-level calls to have a common set of data to +operate on. Provider implementations are responsible for converting API +responses into these generic classes. +""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass +from datetime import datetime +from enum import Enum + +from invenio_vcs.models import Repository + + +@dataclass +class GenericWebhook: + id: str + repository_id: str + url: str + + +@dataclass +class GenericRepository: + id: str + full_name: str + default_branch: str + html_url: str + description: str | None = None + license_spdx: str | None = None + + @staticmethod + def from_model(model: Repository): + return GenericRepository( + id=model.provider_id, + full_name=model.full_name, + default_branch=model.default_branch, + html_url=model.html_url, + description=model.description, + license_spdx=model.license_spdx, + ) + + def to_model(self, model: Repository): + changed = False + for key, value in asdict(self).items(): + if key in ["id"]: + continue + + db_value = getattr(model, key) + if db_value != value: + changed = True + setattr(model, key, value) + + return changed + + +@dataclass +class GenericRelease: + id: str + tag_name: str + created_at: datetime + html_url: str + name: str | None = None + body: str | None = None + tarball_url: str | None = None + zipball_url: str | None = None + published_at: datetime | None = None + + +@dataclass +class GenericUser: + id: str + username: str + display_name: str | None = None + + +class GenericOwnerType(Enum): + Person = 1 + Organization = 2 + + +@dataclass +class GenericOwner: + id: str + path_name: str + type: GenericOwnerType + display_name: str | None = None + + +@dataclass +class GenericContributor: + id: str + username: str + company: str | None = None + contributions_count: int | None = None + display_name: str | None = None diff --git a/invenio_github/models.py b/invenio_vcs/models.py similarity index 65% rename from invenio_github/models.py rename to invenio_vcs/models.py index 78ed0f96..c45f1c89 100644 --- a/invenio_github/models.py +++ b/invenio_vcs/models.py @@ -31,6 +31,7 @@ from invenio_db import db from invenio_i18n import lazy_gettext as _ from invenio_webhooks.models import Event +from sqlalchemy import UniqueConstraint from sqlalchemy.dialects import postgresql from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType @@ -107,10 +108,39 @@ def color(self): return RELEASE_STATUS_COLOR[self.name] +repository_user_association = db.Table( + "vcs_repository_users", + db.Model.metadata, + db.Column( + "repository_id", + UUIDType, + db.ForeignKey("vcs_repositories.id"), + primary_key=True, + ), + db.Column( + "user_id", db.Integer, db.ForeignKey("accounts_user.id"), primary_key=True + ), +) + + class Repository(db.Model, Timestamp): """Information about a GitHub repository.""" - __tablename__ = "github_repositories" + __tablename__ = "vcs_repositories" + + __table_args__ = ( + UniqueConstraint( + "provider", + "name", + name="uq_vcs_repositories_provider_name", + ), + UniqueConstraint( + "provider", + "provider_id", + name="uq_vcs_repositories_provider_provider_id", + ), + # Index("ix_vcs_repositories_provider_provider_id", "provider", "provider_id"), + ) id = db.Column( UUIDType, @@ -119,11 +149,9 @@ class Repository(db.Model, Timestamp): ) """Repository identifier.""" - github_id = db.Column( - db.Integer, - unique=True, - index=True, - nullable=True, + provider_id = db.Column( + db.String(255), + nullable=False, ) """Unique GitHub identifier for a repository. @@ -142,29 +170,66 @@ class Repository(db.Model, Timestamp): `github_id`, that only has a `name`. """ - name = db.Column(db.String(255), unique=True, index=True, nullable=False) - """Fully qualified name of the repository including user/organization.""" + provider = db.Column(db.String(255), nullable=False) + """Which VCS provider the repository is hosted by (and therefore the context in which to consider the provider_id)""" + + description = db.Column(db.String(10000), nullable=True) + html_url = db.Column(db.String(10000), nullable=False) + license_spdx = db.Column(db.String(255), nullable=True) + default_branch = db.Column(db.String(255), nullable=False) - user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) - """Reference user that can manage this repository.""" + full_name = db.Column("name", db.String(255), nullable=False) + """Fully qualified name of the repository including user/organization.""" - hook = db.Column(db.Integer) + hook = db.Column(db.String(255), nullable=True) """Hook identifier.""" + enabled_by_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) + # # Relationships # - user = db.relationship(User) + users = db.relationship(User, secondary=repository_user_association) + enabled_by_user = db.relationship(User, foreign_keys=[enabled_by_id]) @classmethod - def create(cls, user_id, github_id=None, name=None, **kwargs): + def create( + cls, + provider, + provider_id, + html_url, + default_branch, + full_name=None, + description=None, + license_spdx=None, + **kwargs, + ): """Create the repository.""" - obj = cls(user_id=user_id, github_id=github_id, name=name, **kwargs) + obj = cls( + provider=provider, + provider_id=provider_id, + full_name=full_name, + html_url=html_url, + default_branch=default_branch, + description=description, + license_spdx=license_spdx, + **kwargs, + ) db.session.add(obj) return obj + def add_user(self, user_id: int): + user = User(id=user_id) + user = db.session.merge(user) + self.users.append(user) + + def remove_user(self, user_id: int): + user = User(id=user_id) + user = db.session.merge(user) + self.users.remove(user) + @classmethod - def get(cls, github_id=None, name=None): + def get(cls, provider, provider_id=None, full_name=None): """Return a repository given its name or github id. :param integer github_id: GitHub repository identifier. @@ -177,10 +242,14 @@ def get(cls, github_id=None, name=None): exist. """ repo = None - if github_id: - repo = cls.query.filter(Repository.github_id == github_id).one_or_none() - if not repo and name is not None: - repo = cls.query.filter(Repository.name == name).one_or_none() + if provider_id: + repo = cls.query.filter( + Repository.provider_id == provider_id, Repository.provider == provider + ).one_or_none() + if not repo and full_name is not None: + repo = cls.query.filter( + Repository.full_name == full_name, Repository.provider == provider + ).one_or_none() return repo @@ -200,13 +269,27 @@ def latest_release(self, status=None): def __repr__(self): """Get repository representation.""" - return "".format(self=self) + return "".format(self=self) class Release(db.Model, Timestamp): """Information about a GitHub release.""" - __tablename__ = "github_releases" + __tablename__ = "vcs_releases" + + __table_args__ = ( + UniqueConstraint( + "provider", + "provider_id", + name="uq_vcs_releases_provider_id_provider", + ), + UniqueConstraint( + "provider_id", + "provider", + "tag", + name="uq_vcs_releases_provider_id_provider_tag", + ), + ) id = db.Column( UUIDType, @@ -215,9 +298,12 @@ class Release(db.Model, Timestamp): ) """Release identifier.""" - release_id = db.Column(db.Integer, unique=True, nullable=True) + provider_id = db.Column(db.String(255), nullable=True) """Unique GitHub release identifier.""" + provider = db.Column(db.String(255), nullable=False) + """Which VCS provider the release is hosted by (and therefore the context in which to consider the provider_id)""" + tag = db.Column(db.String(255)) """Release tag.""" @@ -258,4 +344,4 @@ class Release(db.Model, Timestamp): def __repr__(self): """Get release representation.""" - return f"" + return f"" diff --git a/invenio_vcs/oauth/handlers.py b/invenio_vcs/oauth/handlers.py new file mode 100644 index 00000000..3f5f505f --- /dev/null +++ b/invenio_vcs/oauth/handlers.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Implement OAuth client handler.""" + +import typing + +from flask import current_app, redirect, url_for +from flask_login import current_user +from invenio_db import db +from invenio_oauth2server.models import Token as ProviderToken +from invenio_oauthclient import oauth_unlink_external_id + +from invenio_vcs.service import VCSService +from invenio_vcs.tasks import disconnect_provider + +if typing.TYPE_CHECKING: + from invenio_vcs.providers import RepositoryServiceProviderFactory + + +class OAuthHandlers: + def __init__(self, provider_factory: "RepositoryServiceProviderFactory") -> None: + self.provider_factory = provider_factory + + def account_setup_handler(self, remote, token, resp): + """Perform post initialization.""" + try: + svc = VCSService( + self.provider_factory.for_user(token.remote_account.user_id) + ) + svc.init_account() + svc.sync() + db.session.commit() + except Exception as e: + current_app.logger.warning(str(e), exc_info=True) + + def disconnect_handler(self, remote): + """Disconnect callback handler for GitHub.""" + # User must be authenticated + if not current_user.is_authenticated: + return current_app.login_manager.unauthorized() + + external_method = self.provider_factory.id + external_ids = [ + i.id + for i in current_user.external_identifiers + if i.method == external_method + ] + if external_ids: + oauth_unlink_external_id(dict(id=external_ids[0], method=external_method)) + + svc = VCSService(self.provider_factory.for_user(current_user.id)) + token = svc.provider.session_token + + if token: + extra_data = token.remote_account.extra_data + + # Delete the token that we issued for GitHub to deliver webhooks + webhook_token_id = extra_data.get("tokens", {}).get("webhook") + ProviderToken.query.filter_by(id=webhook_token_id).delete() + + # Disable every GitHub webhooks from our side + repos = svc.user_enabled_repositories.all() + repos_with_hooks = [] + for repo in repos: + if repo.hook: + repos_with_hooks.append((repo.provider_id, repo.hook)) + svc.disable_repository(repo.provider_id) + + # Commit any changes before running the ascynhronous task + db.session.commit() + + # Send Celery task for webhooks removal and token revocation + disconnect_provider.delay( + self.provider_factory.id, + current_user.id, + token.access_token, + repos_with_hooks, + ) + + # Delete the RemoteAccount (along with the associated RemoteToken) + token.remote_account.delete() + db.session.commit() + + return redirect(url_for("invenio_oauthclient_settings.index")) diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py new file mode 100644 index 00000000..1cb2c575 --- /dev/null +++ b/invenio_vcs/providers.py @@ -0,0 +1,379 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Generator +from urllib.parse import urlparse + +from invenio_i18n import gettext as _ +from invenio_oauth2server.models import Token as ProviderToken +from invenio_oauthclient import current_oauthclient +from invenio_oauthclient.handlers import token_getter +from invenio_oauthclient.models import RemoteAccount, RemoteToken +from urllib3 import HTTPResponse +from werkzeug.local import LocalProxy +from werkzeug.utils import cached_property + +from invenio_vcs.errors import RemoteAccountDataNotSet +from invenio_vcs.generic_models import ( + GenericContributor, + GenericOwner, + GenericRelease, + GenericRepository, + GenericUser, + GenericWebhook, +) +from invenio_vcs.oauth.handlers import OAuthHandlers + + +class RepositoryServiceProviderFactory(ABC): + """ + A factory to create user-specific VCS providers. This class is instantiated once per instance, + usually in the `invenio.cfg` file. It contains general settings and methods that are impossible + to generalise and must be specified on a provider-specific level. + + All methods within this class (except the constructor) should be pure functions. + """ + + def __init__( + self, + provider: type["RepositoryServiceProvider"], + base_url: str, + webhook_receiver_url: str, + id: str, + name: str, + description: str, + icon: str, + credentials_key: str, + repository_name: str, + repository_name_plural: str, + ): + self.provider = provider + self.base_url = base_url + self.webhook_receiver_url = webhook_receiver_url + self.id = id + self.name = name + self.description = description + self.icon = icon + self.credentials_key = credentials_key + self.repository_name = repository_name + self.repository_name_plural = repository_name_plural + + @property + @abstractmethod + def remote_config(self) -> dict[str, Any]: + """ + Returns a dictionary as the config of the OAuth remote app for this provider. + The config of the app is usually based on the config variables provided + in the constructor. + """ + raise NotImplementedError + + @property + def oauth_handlers(self): + return OAuthHandlers(self) + + @cached_property + def remote(self): + return LocalProxy(lambda: current_oauthclient.oauth.remote_apps[self.id]) + + @property + @abstractmethod + def config(self) -> dict: + """ + Returns a configuration dictionary with options that are specific to a gvien provider. + """ + raise NotImplementedError + + @abstractmethod + def url_for_tag(self, repository_name, tag_name) -> str: + """ + Generates the URL for the UI page showing the file tree for the latest commit with a + given named tag. This is not the page showing the details of a corresponding release. + """ + raise NotImplementedError + + @abstractmethod + def url_for_new_release(self, repository_name) -> str: + """ + Generates the URL for the UI page through which the user can create a new release + for a specific repository. + """ + raise NotImplementedError + + @abstractmethod + def url_for_new_file(self, repository_name, branch_name, file_name) -> str: + """ + Generates the URL for the UI pages through which a new file with a specific name + on a specific branch in a specific repository can be created. Usually, + this allows the user to type the file contents directly or upload an existing + file. + """ + raise NotImplementedError + + @abstractmethod + def webhook_is_create_release_event(self, event_payload: dict[str, Any]): + """ + Returns whether the raw JSON payload of a webhook event is an event corresponding + to the publication of a webhook. Returning False will end further processing of the + event. + """ + raise NotImplementedError + + @abstractmethod + def webhook_event_to_generic( + self, event_payload: dict[str, Any] + ) -> tuple[GenericRelease, GenericRepository]: + """ + Returns the data of the release and repository as extracted from the raw JSON payload + of a webhook event, in generic form. + """ + raise NotImplementedError + + def for_user(self, user_id: int): + return self.provider(self, user_id) + + def for_access_token(self, user_id: int, access_token: str): + return self.provider(self, user_id, access_token=access_token) + + @property + def vocabulary(self): + return { + "id": self.id, + "name": self.name, + "repository_name": self.repository_name, + "repository_name_plural": self.repository_name_plural, + "icon": self.icon, + } + + +class RepositoryServiceProvider(ABC): + """ + The methods to interact with the API of a VCS provider. This class is user-specific + and is always created from a `RepositoryServiceProviderFactory`. + + While some of the default method implementations (such as `access_token`) make access to + the DB, overrides of the unimplemented methods should avoid doing so to minimise + unexpected behaviour. Interaction should be solely with the API of the VCS provider. + + Providers must currently support all of these operations. + """ + + def __init__( + self, factory: RepositoryServiceProviderFactory, user_id: int, access_token=None + ) -> None: + self.factory = factory + self.user_id = user_id + self._access_token = access_token + + @cached_property + def remote_account(self): + """ + Returns the OAuth Remote Account corresponding to the user's authentication + with the provider + """ + return RemoteAccount.get(self.user_id, self.factory.remote.consumer_key) + + @cached_property + def access_token(self): + """Return OAuth access token's value.""" + if self._access_token is not None: + return self._access_token + + token = RemoteToken.get(self.user_id, self.factory.remote.consumer_key) + if not token: + # The token is not yet in DB, it is retrieved from the request session. + return self.factory.remote.get_request_token()[0] + return token.access_token + + @property + def session_token(self): + """Return OAuth session token.""" + session_token = token_getter(self.factory.remote) + if session_token: + token = RemoteToken.get( + self.user_id, + self.factory.remote.consumer_key, + access_token=session_token[0], + ) + return token + return None + + @cached_property + def webhook_url(self): + """ + Returns a formatted version of the webhook receiver URL specified in the provider + factory. The `{token}` variable in this URL string is replaced with the user-specific + webhook token. + """ + if not self.remote_account.extra_data.get("tokens", {}).get("webhook"): + raise RemoteAccountDataNotSet( + self.user_id, _("Webhook data not found for user tokens (remote data).") + ) + + webhook_token = ProviderToken.query.filter_by( + id=self.remote_account.extra_data["tokens"]["webhook"] + ).first() + if webhook_token: + return self.factory.webhook_receiver_url.format( + token=webhook_token.access_token + ) + + def is_valid_webhook(self, url: str | None): + """Check if webhook url is valid. + + The webhook url is valid if it has the same host as the configured webhook url. + + :param str url: The webhook url to be checked. + :returns: True if the webhook url is valid, False otherwise. + """ + if not url: + return False + configured_host = urlparse(self.webhook_url).netloc + url_host = urlparse(url).netloc + if not (configured_host and url_host): + return False + return configured_host == url_host + + @abstractmethod + def list_repositories(self) -> dict[str, GenericRepository] | None: + """ + Returns a dictionary of {repository_id: GenericRepository} for the current + user. This should return _all_ repositories for which the user has permission + to create and delete webhooks. + + This means this function could return extremely large dictionaries in some cases, + but it will only be called during irregular sync events and stored in the DB. + """ + raise NotImplementedError + + @abstractmethod + def list_repository_webhooks( + self, repository_id: str + ) -> list[GenericWebhook] | None: + """ + Returns an arbitrarily ordered list of the current webhooks of a repository. + This list should only include active webhooks which generate events for which + the corresponding `RepositoryServiceProviderFactory.webhook_is_create_release_event` + would return True. + """ + raise NotImplementedError + + def get_first_valid_webhook(self, repository_id: str) -> GenericWebhook | None: + webhooks = self.list_repository_webhooks(repository_id) + if webhooks is None: + return None + for hook in webhooks: + if self.is_valid_webhook(hook.url): + return hook + return None + + @abstractmethod + def get_repository(self, repository_id: str) -> GenericRepository | None: + """ + Returns the details of a specific repository by ID, or None if the + repository does not exist or the user has no permission to view it. + """ + raise NotImplementedError + + @abstractmethod + def list_repository_contributors( + self, repository_id: str, max: int + ) -> list[GenericContributor] | None: + """ + Returns the list of entities that have contributed to a given repository. + This list may contain entities that are not currently or never have been + registered users of the VCS provider (e.g. in the case of repos imported + from a remote source). + + Returns None if the repository does not exist or the user has no permission + to view it or its contributors. + """ + raise NotImplementedError + + @abstractmethod + def list_repository_user_ids(self, repository_id: str) -> list[str] | None: + """ + Returns a list of the IDs of valid users registered with the VCS provider + that have sufficient permission to create/delete webhooks on the given + repository. This list should contain all users for which the corresponding + repo would be included in a `list_repositories` call. + + Returns None if the repository does not exist or the user has no permission + to view it or its member users. + """ + raise NotImplementedError + + @abstractmethod + def get_repository_owner(self, repository_id: str) -> GenericOwner | None: + """ + Returns the 'owner' of a repository, which is either a user or a group/organization. + Returns None if the repository does not exist or the user does not have permission + to find out its owner. + """ + raise NotImplementedError + + @abstractmethod + def create_webhook(self, repository_id: str) -> str | None: + """ + Creates a new webhook for a given repository, trigerred by a "create release" event. + The URL destination is specified by `RepositoryServiceProvider.webhook_url`. + Events must be delivered via an HTTP POST request with a JSON payload. + + Returns the ID of the new webhook as returned by the provider, or None if the + creation failed due to the repository not existing or the user not having permission + to create a webhook. + """ + raise NotImplementedError + + @abstractmethod + def delete_webhook(self, repository_id: str, hook_id: str | None = None) -> bool: + """ + Deletes a webhook from the specified repository. + If `hook_id` is specified, the webhook with that ID must be deleted. + Otherwise, all webhooks with URLs for which `is_valid_webhook` would return + True should be deleted. + + Returns True if the deletion was successful, and False if it failed due to + the repository not existing or the user not having permission to delete its + webhooks. + """ + raise NotImplementedError + + @abstractmethod + def get_own_user(self) -> GenericUser | None: + """ + Returns information about the user for which this class has been instantiated, + or None if the user does not exist (e.g. if the user ID is incorrectly specified). + """ + raise NotImplementedError + + @abstractmethod + def resolve_release_zipball_url(self, release_zipball_url: str) -> str | None: + # TODO: why do we have this + raise NotImplementedError + + @abstractmethod + def fetch_release_zipball( + self, release_zipball_url: str, timeout: int + ) -> Generator[HTTPResponse]: + """ + Returns the HTTP response for downloading the contents of a zipball from a given release. + This is provider-specific functionality as it will require attaching an auth token + to the request for private repos (and even public repos to avoid rate limits sometimes). + """ + raise NotImplementedError + + @abstractmethod + def retrieve_remote_file( + self, repository_id: str, ref_name: str, file_name: str + ) -> bytes | None: + """ + Downloads the contents of a specific file in a repo for a given ref (which could be + a tag, a commit ref, a branch name, etc). Returns the raw bytes, or None if the + repo/file does not exist or the user doesn't have permission to view it. + """ + raise NotImplementedError + + @abstractmethod + def revoke_token(self, access_token: str): + raise NotImplementedError diff --git a/invenio_github/proxies.py b/invenio_vcs/proxies.py similarity index 93% rename from invenio_github/proxies.py rename to invenio_vcs/proxies.py index 710e639f..7dc9f40d 100644 --- a/invenio_github/proxies.py +++ b/invenio_vcs/proxies.py @@ -27,4 +27,4 @@ from flask import current_app from werkzeug.local import LocalProxy -current_github = LocalProxy(lambda: current_app.extensions["invenio-github"]) +current_vcs = LocalProxy(lambda: current_app.extensions["invenio-vcs"]) diff --git a/invenio_github/receivers.py b/invenio_vcs/receivers.py similarity index 73% rename from invenio_github/receivers.py rename to invenio_vcs/receivers.py index 22fc1c9b..0220e387 100644 --- a/invenio_github/receivers.py +++ b/invenio_vcs/receivers.py @@ -25,8 +25,9 @@ from invenio_db import db from invenio_webhooks.models import Receiver -from invenio_github.models import Release, ReleaseStatus, Repository -from invenio_github.tasks import process_release +from invenio_vcs.config import get_provider_by_id +from invenio_vcs.models import Release, ReleaseStatus, Repository +from invenio_vcs.tasks import process_release from .errors import ( InvalidSenderError, @@ -37,9 +38,13 @@ ) -class GitHubReceiver(Receiver): +class VCSReceiver(Receiver): """Handle incoming notification from GitHub on a new release.""" + def __init__(self, receiver_id): + super().__init__(receiver_id) + self.provider_factory = get_provider_by_id(receiver_id) + def run(self, event): """Process an event. @@ -53,43 +58,42 @@ def run(self, event): def _handle_event(self, event): """Handles an incoming github event.""" - action = event.payload.get("action") - is_draft_release = event.payload.get("release", {}).get("draft") - - # Draft releases do not create releases on invenio - is_create_release_event = ( - action in ("published", "released", "created") and not is_draft_release + is_create_release_event = self.provider_factory.webhook_is_create_release_event( + event.payload ) if is_create_release_event: self._handle_create_release(event) - else: - pass def _handle_create_release(self, event): """Creates a release in invenio.""" try: - release_id = event.payload["release"]["id"] + generic_release, generic_repo = ( + self.provider_factory.webhook_event_to_generic(event.payload) + ) # Check if the release already exists existing_release = Release.query.filter_by( - release_id=release_id, + provider_id=generic_release.id, ).first() if existing_release: raise ReleaseAlreadyReceivedError(release=existing_release) # Create the Release - repo_id = event.payload["repository"]["id"] - repo_name = event.payload["repository"]["name"] - repo = Repository.get(repo_id, repo_name) + repo = Repository.get( + self.provider_factory.id, + provider_id=generic_repo.id, + full_name=generic_repo.full_name, + ) if not repo: - raise RepositoryNotFoundError(repo_name) + raise RepositoryNotFoundError(generic_repo.full_name) if repo.enabled: release = Release( - release_id=release_id, - tag=event.payload["release"]["tag_name"], + provider_id=generic_release.id, + provider=self.provider_factory.id, + tag=generic_release.tag_name, repository=repo, event=event, status=ReleaseStatus.RECEIVED, @@ -101,7 +105,7 @@ def _handle_create_release(self, event): # Process the release # Since 'process_release' is executed asynchronously, we commit the current state of session db.session.commit() - process_release.delay(release.release_id) + process_release.delay(self.provider_factory.id, release.provider_id) except (ReleaseAlreadyReceivedError, RepositoryDisabledError) as e: event.response_code = 409 diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py new file mode 100644 index 00000000..aaabe7cc --- /dev/null +++ b/invenio_vcs/service.py @@ -0,0 +1,620 @@ +from abc import abstractmethod +from contextlib import contextmanager +from dataclasses import asdict +from typing import TYPE_CHECKING + +from flask import current_app +from invenio_access.permissions import authenticated_user +from invenio_access.utils import get_identity +from invenio_accounts.models import User, UserIdentity +from invenio_db import db +from invenio_i18n import gettext as _ +from invenio_oauth2server.models import Token as ProviderToken +from invenio_oauthclient import oauth_link_external_id +from invenio_oauthclient.models import RemoteAccount +from sqlalchemy import delete, select +from sqlalchemy.exc import NoResultFound +from werkzeug.utils import cached_property + +from invenio_vcs.config import get_provider_by_id +from invenio_vcs.errors import ( + RemoteAccountDataNotSet, + RemoteAccountNotFound, + RepositoryAccessError, + RepositoryDisabledError, + RepositoryNotFoundError, + UserInfoNoneError, +) +from invenio_vcs.generic_models import GenericRelease, GenericRepository +from invenio_vcs.models import ( + Release, + ReleaseStatus, + Repository, + repository_user_association, +) +from invenio_vcs.proxies import current_vcs +from invenio_vcs.tasks import sync_hooks as sync_hooks_task +from invenio_vcs.utils import iso_utcnow + +if TYPE_CHECKING: + from invenio_vcs.providers import ( + RepositoryServiceProvider, + ) + + +class VCSService: + def __init__(self, provider: "RepositoryServiceProvider") -> None: + self.provider = provider + + @staticmethod + def for_provider_and_user(provider_id: str, user_id: int): + return VCSService(get_provider_by_id(provider_id).for_user(user_id)) + + @staticmethod + def for_provider_and_token(provider_id: str, user_id: int, access_token: str): + return VCSService( + get_provider_by_id(provider_id).for_access_token(user_id, access_token) + ) + + @cached_property + def is_authenticated(self): + return self.provider.session_token is not None + + @property + def user_available_repositories(self): + """Retrieve user repositories from user's remote data.""" + return Repository.query.join(repository_user_association).filter( + repository_user_association.c.user_id == self.provider.user_id, + Repository.provider == self.provider.factory.id, + ) + + @property + def user_enabled_repositories(self): + """Retrieve user repositories from the model.""" + return Repository.query.join(repository_user_association).filter( + repository_user_association.c.user_id == self.provider.user_id, + Repository.provider == self.provider.factory.id, + Repository.hook != None, + ) + + def list_repositories(self): + """Retrieves user repositories, containing db repositories plus remote repositories.""" + repos = {} + for db_repo in self.user_available_repositories: + repos[db_repo.provider_id] = asdict(GenericRepository.from_model(db_repo)) + release_instance = current_vcs.release_api_class( + db_repo.latest_release(), self.provider + ) + repos[db_repo.provider_id]["instance"] = db_repo + repos[db_repo.provider_id]["latest"] = release_instance + + return repos + + def get_repo_latest_release(self, repo): + """Retrieves the repository last release.""" + # Bail out fast if object (Repository) not in DB session. + if repo not in db.session: + return None + + q = repo.releases.filter_by(status=ReleaseStatus.PUBLISHED) + release_object = q.order_by(db.desc(Release.created)).first() + + return current_vcs.release_api_class(release_object, self.provider) + + def list_repo_releases(self, repo): + # Retrieve releases and sort them by creation date + release_instances = [] + for release_object in repo.releases.order_by(Release.created): + release_instances.append( + current_vcs.release_api_class(release_object, self.provider) + ) + return release_instances + + def get_repo_default_branch(self, repo_id): + db_repo = self.user_available_repositories.filter( + Repository.provider_id == repo_id + ).first() + + if db_repo is None: + return None + + return db_repo.default_branch + + def get_last_sync_time(self): + """Retrieves the last sync delta time from github's client extra data. + + Time is computed as the delta between now and the last sync time. + """ + extra_data = self.provider.remote_account.extra_data + if not extra_data.get("last_sync"): + raise RemoteAccountDataNotSet( + self.provider.user_id, + _("Last sync data is not set for user (remote data)."), + ) + + return extra_data["last_sync"] + + def get_repository(self, repo_id=None, repo_name=None): + """Retrieves one repository. + + Checks for access permission. + """ + repo = Repository.get( + self.provider.factory.id, provider_id=repo_id, full_name=repo_name + ) + if not repo: + raise RepositoryNotFoundError(repo_id) + + # Might raise a RepositoryAccessError + self.check_repo_access_permissions(repo) + + return repo + + def check_repo_access_permissions(self, repo: Repository): + """Checks permissions from user on repo. + + Repo has access if any of the following is True: + + - user is the owner of the repo + - user has access to the repo in GitHub (stored in RemoteAccount.extra_data.repos) + """ + if self.provider.user_id and repo: + user_is_collaborator = any( + user.id == self.provider.user_id for user in repo.users + ) + if user_is_collaborator: + return True + + if self.provider.remote_account and self.provider.remote_account.extra_data: + user_has_remote_access_count = self.user_available_repositories.filter( + Repository.provider_id == repo.provider_id + ).count() + if user_has_remote_access_count == 1: + return True + + raise RepositoryAccessError( + user=self.provider.user_id, repo=repo.full_name, repo_id=repo.provider_id + ) + + def sync_repo_users(self, db_repo: Repository): + """ + Synchronises the member users of the repository. + This retrieves a list of the IDs of users from the VCS who have sufficient access to the + repository (i.e. being able to access all details and create/manage webhooks). + The user IDs are compared locally to find Invenio users who have connected their VCS account. + This is then propagated to the database: Invenio users who have access to the repo are added to + the `repository_user_association` table, and ones who no longer have access are removed. + + :return: boolean of whether any changed were made to the DB + """ + + vcs_user_ids = self.provider.list_repository_user_ids(db_repo.provider_id) + if vcs_user_ids is None: + return + + vcs_user_identities: list[UserIdentity] = [] + # Find local users who have connected their VCS accounts with the IDs from the repo members + for extern_user_id in vcs_user_ids: + user_identity = UserIdentity.query.filter_by( + method=self.provider.factory.id, + id=extern_user_id, + ).first() + + if user_identity is None: + continue + + vcs_user_identities.append(user_identity) + + is_changed = False + + # Create user associations that exist in the VCS but not in the DB + for user_identity in vcs_user_identities: + if not any( + db_user.id == user_identity.id_user for db_user in db_repo.users + ): + db_repo.add_user(user_identity.id_user) + is_changed = True + + # Remove user associations that exist in the DB but not in the VCS + for db_user in db_repo.users: + if not any( + user_identity.id_user == db_user.id + for user_identity in vcs_user_identities + ): + db_repo.remove_user(db_user.id) + is_changed = True + + return is_changed + + def sync(self, hooks=True, async_hooks=True): + """Synchronize user repositories. + + :param bool hooks: True for syncing hooks. + :param bool async_hooks: True for sending of an asynchronous task to + sync hooks. + + .. note:: + + Syncing happens from GitHub's direction only. This means that we + consider the information on GitHub as valid, and we overwrite our + own state based on this information. + """ + vcs_repos = self.provider.list_repositories() + if vcs_repos is None: + vcs_repos = {} + + if hooks: + self._sync_hooks(vcs_repos.keys(), asynchronous=async_hooks) + + # Update changed names for repositories stored in DB + db_repos = ( + Repository.query.join(repository_user_association) + .filter( + repository_user_association.c.user_id == self.provider.user_id, + Repository.provider == self.provider.factory.id, + ) + .all() + ) + + for db_repo in db_repos: + vcs_repo = vcs_repos.get(db_repo.provider_id) + if not vcs_repo: + continue + + changed_users = self.sync_repo_users(db_repo) + changed_model = vcs_repo.to_model(db_repo) + if changed_users or changed_model: + db.session.add(db_repo) + + # Remove ownership from repositories that the user has no longer + # 'admin' permissions, or have been deleted. + delete_stmt = delete(repository_user_association).where( + repository_user_association.c.user_id == self.provider.user_id, + Repository.provider == self.provider.factory.id, + ~Repository.provider_id.in_(vcs_repos.keys()), + repository_user_association.c.repository_id == Repository.id, + ) + db.session.execute(delete_stmt) + + # Add new repos from VCS to the DB (without the hook activated) + for _, vcs_repo in vcs_repos.items(): + # We cannot just check the repo from the existing `db_repos` list as this only includes the repos to which the user + # already has access. E.g. a repo from the VCS might already exist in our DB but the user doesn't yet have access to it. + corresponding_db_repo = Repository.query.filter( + Repository.provider_id == vcs_repo.id, + Repository.provider == self.provider.factory.id, + ).first() + + if corresponding_db_repo is None: + # We do not yet have this repo registered for any user at all in our DB, so we need to create it. + corresponding_db_repo = Repository.create( + provider=self.provider.factory.id, + provider_id=vcs_repo.id, + html_url=vcs_repo.html_url, + default_branch=vcs_repo.default_branch, + full_name=vcs_repo.full_name, + description=vcs_repo.description, + license_spdx=vcs_repo.license_spdx, + ) + + # In any case (even if we already have the repo) we need to sync its member users + # E.g. maybe the repo is in our DB but the user for which this sync has been trigerred isn't registered as a member + self.sync_repo_users(corresponding_db_repo) + + # Update last sync + self.provider.remote_account.extra_data.update( + dict( + last_sync=iso_utcnow(), + ) + ) + self.provider.remote_account.extra_data.changed() + db.session.add(self.provider.remote_account) + + def _sync_hooks(self, repo_ids, asynchronous=True): + """Check if a hooks sync task needs to be started.""" + if not asynchronous: + for repo_id in repo_ids: + try: + self.sync_repo_hook(repo_id) + except RepositoryAccessError: + current_app.logger.warning( + str(RepositoryAccessError), exc_info=True + ) + except NoResultFound: + pass # Repository not in DB yet + else: + # If hooks will run asynchronously, we need to commit any changes done so far + db.session.commit() + sync_hooks_task.delay( + self.provider.factory.id, self.provider.user_id, list(repo_ids) + ) + + def sync_repo_hook(self, repo_id): + """Sync a GitHub repo's hook with the locally stored repo.""" + # Get the hook that we may have set in the past + hook = self.provider.get_first_valid_webhook(repo_id) + vcs_repo = self.provider.get_repository(repo_id) + assert vcs_repo is not None + + # If hook on GitHub exists, get or create corresponding db object and + # enable the hook. Otherwise remove the old hook information. + db_repo = Repository.get(self.provider.factory.id, provider_id=repo_id) + + if hook: + if not db_repo: + db_repo = Repository.create( + provider=self.provider.factory.id, + provider_id=repo_id, + html_url=vcs_repo.html_url, + default_branch=vcs_repo.default_branch, + full_name=vcs_repo.full_name, + description=vcs_repo.description, + license_spdx=vcs_repo.license_spdx, + ) + self.sync_repo_users(db_repo) + if not db_repo.enabled: + self.mark_repo_enabled(db_repo, hook.id) + else: + if db_repo: + self.mark_repo_disabled(db_repo) + + def mark_repo_disabled(self, db_repo: Repository): + """Disables an user repository.""" + db_repo.hook = None + db_repo.enabled_by_id = None + + def mark_repo_enabled(self, db_repo: Repository, hook_id: str): + """Enables an user repository.""" + db_repo.hook = hook_id + db_repo.enabled_by_id = self.provider.user_id + + def init_account(self): + """Setup a new GitHub account.""" + if not self.provider.remote_account: + raise RemoteAccountNotFound( + self.provider.user_id, _("Remote account was not found for user.") + ) + + user = self.provider.get_own_user() + if user is None: + raise UserInfoNoneError + + # Setup local access tokens to be used by the webhooks + hook_token = ProviderToken.create_personal( + f"{self.provider.factory.id}-webhook", + self.provider.user_id, + scopes=["webhooks:event"], + is_internal=True, + ) + # Initial structure of extra data + self.provider.remote_account.extra_data = dict( + id=user.id, + login=user.username, + name=user.display_name, + tokens=dict( + webhook=hook_token.id, + ), + last_sync=iso_utcnow(), + ) + + oauth_link_external_id( + User(id=self.provider.user_id), + dict(id=user.id, method=self.provider.factory.id), + ) + + db.session.add(self.provider.remote_account) + + def enable_repository(self, repository_id): + db_repo = self.user_available_repositories.filter( + Repository.provider_id == repository_id + ).first() + if db_repo is None: + raise RepositoryNotFoundError( + repository_id, _("Failed to enable repository.") + ) + + hook_id = self.provider.create_webhook(repository_id) + if hook_id is None: + return False + + self.mark_repo_enabled(db_repo, hook_id) + return True + + def disable_repository(self, repository_id, hook_id=None): + db_repo = self.user_available_repositories.filter( + Repository.provider_id == repository_id + ).first() + + if db_repo is None: + raise RepositoryNotFoundError( + repository_id, _("Failed to disable repository.") + ) + + if not db_repo.enabled: + raise RepositoryDisabledError(repository_id) + + if not self.provider.delete_webhook(repository_id, hook_id): + return False + + self.mark_repo_disabled(db_repo) + return True + + +class VCSRelease: + """A GitHub release.""" + + def __init__(self, release: Release, provider: "RepositoryServiceProvider"): + """Constructor.""" + self.db_release = release + self.provider = provider + self._resolved_zipball_url = None + + @cached_property + def record(self): + """Release record.""" + return self.resolve_record() + + @cached_property + def event(self): + """Get release event.""" + return self.db_release.event + + @cached_property + def payload(self): + """Return event payload.""" + return self.event.payload + + @cached_property + def _generic_release_and_repo(self): + return self.provider.factory.webhook_event_to_generic(self.payload) + + @cached_property + def generic_release(self) -> "GenericRelease": + """Return release metadata.""" + return self._generic_release_and_repo[0] + + @cached_property + def generic_repo(self) -> "GenericRepository": + """Return repo metadata.""" + return self._generic_release_and_repo[1] + + @cached_property + def db_repo(self) -> Repository: + """Return repository model from database.""" + if self.db_release.repository_id: + repository = self.db_release.repository + else: + repository = Repository.query.filter_by( + user_id=self.event.user_id, provider_id=self.provider.factory.id + ).one() + return repository + + @cached_property + def release_file_name(self): + """Returns release zipball file name.""" + tag_name = self.generic_release.tag_name + repo_name = self.generic_repo.full_name + filename = f"{repo_name}-{tag_name}.zip" + return filename + + @cached_property + def release_zipball_url(self): + """Returns the release zipball URL.""" + return self.generic_release.zipball_url + + @cached_property + def user_identity(self): + """Generates release owner's user identity.""" + identity = get_identity(self.db_repo.enabled_by_user) + identity.provides.add(authenticated_user) + identity.user = self.db_repo.enabled_by_user + return identity + + @cached_property + def contributors(self): + """Get list of contributors to a repository. + + The list of contributors is fetched from Github API, filtered for type "User" and sorted by contributions. + + :returns: a generator of objects that contains contributors information. + :raises UnexpectedGithubResponse: when Github API returns a status code other than 200. + """ + max_contributors = current_app.config.get("VCS_MAX_CONTRIBUTORS_NUMBER", 30) + return self.provider.list_repository_contributors( + self.db_repo.provider_id, max=max_contributors + ) + + @cached_property + def owner(self): + """Get owner of repository as a creator.""" + try: + return self.provider.get_repository_owner(self.db_repo.provider_id) + except Exception: + return None + + # Helper functions + + def is_first_release(self): + """Checks whether the current release is the first release of the repository.""" + latest_release = self.db_repo.latest_release(ReleaseStatus.PUBLISHED) + return True if not latest_release else False + + def test_zipball(self): + """Test if the zipball URL is accessible and return the resolved URL.""" + return self.resolve_zipball_url() + + def resolve_zipball_url(self, cache=True): + """Resolve the zipball URL. + + This method will try to resolve the zipball URL by making a HEAD request, + handling the following edge cases: + + - In the case of a 300 Multiple Choices response, which can happen when a tag + and branch have the same name, it will try to fetch an "alternate" link. + - If the access token does not have the required scopes/permissions to access + public links, it will fallback to a non-authenticated request. + """ + if self._resolved_zipball_url and cache: + return self._resolved_zipball_url + + url = self.release_zipball_url + url = self.provider.resolve_release_zipball_url(url) + + if cache: + self._resolved_zipball_url = url + + return url + + # High level API + + def release_failed(self): + """Set release status to FAILED.""" + self.db_release.status = ReleaseStatus.FAILED + + def release_processing(self): + """Set release status to PROCESSING.""" + self.db_release.status = ReleaseStatus.PROCESSING + + def release_published(self): + """Set release status to PUBLISHED.""" + self.db_release.status = ReleaseStatus.PUBLISHED + + @contextmanager + def fetch_zipball_file(self): + """Fetch release zipball file using the current github session.""" + timeout = current_app.config.get("VCS_ZIPBALL_TIMEOUT", 300) + zipball_url = self.resolve_zipball_url() + return self.provider.fetch_release_zipball(zipball_url, timeout) + + def publish(self): + """Publish a GitHub release.""" + raise NotImplementedError + + def process_release(self): + """Processes a github release.""" + raise NotImplementedError + + def resolve_record(self): + """Resolves a record from the release. To be implemented by the API class implementation.""" + raise NotImplementedError + + def serialize_record(self): + """Serializes the release record.""" + raise NotImplementedError + + @property + @abstractmethod + def badge_title(self): + """Stores a string to render in the record badge title (e.g. 'DOI').""" + return None + + @property + @abstractmethod + def badge_value(self): + """Stores a string to render in the record badge value (e.g. '10.1234/invenio.1234').""" + raise NotImplementedError + + @property + def record_url(self): + """Release self url (e.g. github HTML url).""" + raise NotImplementedError diff --git a/invenio_github/tasks.py b/invenio_vcs/tasks.py similarity index 68% rename from invenio_github/tasks.py rename to invenio_vcs/tasks.py index 36755e7e..cb8fd34f 100644 --- a/invenio_github/tasks.py +++ b/invenio_vcs/tasks.py @@ -24,8 +24,8 @@ """Task for managing GitHub integration.""" import datetime +from typing import TYPE_CHECKING -import github3 from celery import shared_task from flask import current_app, g from invenio_db import db @@ -33,9 +33,13 @@ from invenio_oauthclient.models import RemoteAccount from invenio_oauthclient.proxies import current_oauthclient -from invenio_github.errors import CustomGitHubMetadataError, RepositoryAccessError -from invenio_github.models import Release, ReleaseStatus -from invenio_github.proxies import current_github +from invenio_vcs.config import get_provider_by_id +from invenio_vcs.errors import CustomGitHubMetadataError, RepositoryAccessError +from invenio_vcs.models import Release, ReleaseStatus +from invenio_vcs.proxies import current_vcs + +if TYPE_CHECKING: + from invenio_vcs.service import VCSRelease def _get_err_obj(msg): @@ -46,15 +50,15 @@ def _get_err_obj(msg): return err -def release_gh_metadata_handler(release, ex): +def release_gh_metadata_handler(release: "VCSRelease", ex): """Handler for CustomGithubMetadataError.""" - release.release_object.errors = _get_err_obj(str(ex)) + release.db_release.errors = _get_err_obj(str(ex)) db.session.commit() -def release_default_exception_handler(release, ex): +def release_default_exception_handler(release: "VCSRelease", ex): """Default handler.""" - release.release_object.errors = _get_err_obj(str(ex)) + release.db_release.errors = _get_err_obj(str(ex)) db.session.commit() @@ -65,48 +69,47 @@ def release_default_exception_handler(release, ex): @shared_task(max_retries=6, default_retry_delay=10 * 60, rate_limit="100/m") -def disconnect_github(access_token, repo_hooks): +def disconnect_provider(provider_id, user_id, access_token, repo_hooks): """Uninstall webhooks.""" # Note at this point the remote account and all associated data have # already been deleted. The celery task is passed the access_token to make # some last cleanup and afterwards delete itself remotely. # Local import to avoid circular imports - from .api import GitHubAPI + from .service import VCSService try: # Create a nested transaction to make sure that hook deletion + token revoke is atomic with db.session.begin_nested(): - gh = github3.login(token=access_token) + svc = VCSService.for_provider_and_token(provider_id, user_id, access_token) + for repo_id, repo_hook in repo_hooks: - ghrepo = gh.repository_with_id(repo_id) - if ghrepo: - hook = ghrepo.hook(repo_hook) - if hook and hook.delete(): - current_app.logger.info( - _("Deleted hook from github repository."), - extra={"hook": hook.id, "repo": ghrepo.full_name}, - ) + if svc.disable_repository(repo_id, repo_hook): + current_app.logger.info( + _("Deleted hook from github repository."), + extra={"hook": repo_hook, "repo": repo_id}, + ) + # If we finished our clean-up successfully, we can revoke the token - GitHubAPI.revoke_token(access_token) + svc.provider.revoke_token(access_token) except Exception as exc: # Retry in case GitHub may be down... - disconnect_github.retry(exc=exc) + disconnect_provider.retry(exc=exc) @shared_task(max_retries=6, default_retry_delay=10 * 60, rate_limit="100/m") -def sync_hooks(user_id, repositories): +def sync_hooks(provider, user_id, repositories): """Sync repository hooks for a user.""" # Local import to avoid circular imports - from .api import GitHubAPI + from .service import VCSService try: # Sync hooks - gh = GitHubAPI(user_id=user_id) + svc = VCSService.for_provider_and_user(provider, user_id) for repo_id in repositories: try: with db.session.begin_nested(): - gh.sync_repo_hook(repo_id) + svc.sync_repo_hook(repo_id) # We commit per repository, because while the task is running db.session.commit() except RepositoryAccessError as e: @@ -118,14 +121,17 @@ def sync_hooks(user_id, repositories): @shared_task(ignore_result=True, max_retries=5, default_retry_delay=10 * 60) -def process_release(release_id): +def process_release(provider, release_id): """Process a received Release.""" release_model = Release.query.filter( - Release.release_id == release_id, + Release.provider_id == release_id, Release.status.in_([ReleaseStatus.RECEIVED, ReleaseStatus.FAILED]), ).one() - release = current_github.release_api_class(release_model) + provider = get_provider_by_id(provider).for_user( + release_model.repository.enabled_by_id + ) + release = current_vcs.release_api_class(release_model, provider) matched_error_cls = None matched_ex = None @@ -134,7 +140,7 @@ def process_release(release_id): release.process_release() db.session.commit() except Exception as ex: - error_handlers = current_github.release_error_handlers + error_handlers = current_vcs.release_error_handlers matched_ex = None for error_cls, handler in error_handlers + DEFAULT_ERROR_HANDLERS: if isinstance(ex, error_cls): @@ -148,32 +154,32 @@ def process_release(release_id): @shared_task(ignore_result=True) -def refresh_accounts(expiration_threshold=None): +def refresh_accounts(provider, expiration_threshold=None): """Refresh stale accounts, avoiding token expiration. :param expiration_threshold: Dictionary containing timedelta parameters referring to the maximum inactivity time. """ - expiration_date = datetime.datetime.utcnow() - datetime.timedelta( - **(expiration_threshold or {"days": 6 * 30}) - ) + expiration_date = datetime.datetime.now( + tz=datetime.timezone.utc + ) - datetime.timedelta(**(expiration_threshold or {"days": 6 * 30})) - remote = current_oauthclient.oauth.remote_apps["github"] + remote = current_oauthclient.oauth.remote_apps[provider] remote_accounts_to_be_updated = RemoteAccount.query.filter( RemoteAccount.updated < expiration_date, RemoteAccount.client_id == remote.consumer_key, ) for remote_account in remote_accounts_to_be_updated: - sync_account.delay(remote_account.user_id) + sync_account.delay(provider, remote_account.user_id) @shared_task(ignore_result=True) -def sync_account(user_id): +def sync_account(provider, user_id): """Sync a user account.""" # Local import to avoid circular imports - from .api import GitHubAPI + from .service import VCSService # Start a nested transaction so every data writing inside sync is executed atomically with db.session.begin_nested(): - gh = GitHubAPI(user_id=user_id) - gh.sync(hooks=False, async_hooks=False) + svc = VCSService.for_provider_and_user(provider, user_id) + svc.sync(hooks=False, async_hooks=False) diff --git a/invenio_github/templates/semantic-ui/invenio_github/base.html b/invenio_vcs/templates/semantic-ui/invenio_vcs/base.html similarity index 100% rename from invenio_github/templates/semantic-ui/invenio_github/base.html rename to invenio_vcs/templates/semantic-ui/invenio_vcs/base.html diff --git a/invenio_github/templates/semantic-ui/invenio_github/helpers.html b/invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html similarity index 83% rename from invenio_github/templates/semantic-ui/invenio_github/helpers.html rename to invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html index 024e049d..da997bd2 100644 --- a/invenio_github/templates/semantic-ui/invenio_github/helpers.html +++ b/invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html @@ -8,14 +8,14 @@ #} {% from "semantic-ui/invenio_formatter/macros/badges.html" import badges_formats_list %} -{%- macro doi_badge(doi, doi_url, github_id) %} +{%- macro doi_badge(doi, doi_url, provider_id, provider) %} {%- block doi_badge scoped %} - {% set image_url = url_for('invenio_github_badge.index', repo_github_id=github_id, _external=True) %} + {% set image_url = url_for('invenio_vcs_badge.index', provider=provider, repo_provider_id=provider_id, _external=True) %} @@ -59,8 +58,8 @@

helpers.panel_start( _('Releases'), btn_text=_('Create release'), - btn_icon='github icon', - btn_href=github_rel_url, + btn_icon=vocabulary["icon"] + ' icon', + btn_href=new_release_url, ) }}
@@ -70,10 +69,10 @@

{%- block enabled_repo_get_started scoped %} {%- endblock enabled_repo_get_started %} @@ -93,7 +92,7 @@

1 {{ _("Flip the switch") }}

{{ _("Toggle the switch below to turn on/off automatic preservation of your repository.") }}

- {{ helpers.repo_switch(repo, repo.github_id) }} + {{ helpers.repo_switch(repo, repo.provider_id) }}
@@ -129,17 +128,12 @@

2 {{ _("Create a release") }}

{%- for release in releases %} - {% set release_status = release.release_object.status.title %} - {% set release_status_color = release.release_object.status.color %} + {% set release_status = release.db_release.status.title %} + {% set release_status_color = release.db_release.status.color %} {% set release_status_icon_color = release_status_color %} - {% set release_tag = release.release_object.tag %} - {% set release_url = 'https://github.com/{0}/releases/tag/{1}'.format(repo.name, release_tag) %} + {% set release_tag = release.db_release.tag %} + {% set release_url = release.generic_release.html_url %} {% set release_name = release_tag %} - - {% if release.event %} - {% set release_name = release.event.payload.get("release", {}).get("name", release_name) %} - {% set release_url = release.event.payload.get("release", {}).get("html_url", release_url) %} - {% endif %} {% if release_status_color == "warning" %} {% set release_status_icon_color = "warning-color" %} {% endif %} @@ -174,9 +168,9 @@

2 {{ _("Create a release") }}

- {{ release_name or release_tag }} + {{ release_name or release_tag }}

@@ -185,13 +179,13 @@

2 {{ _("Create a release") }}

{%- block release_status scoped %}
- + - {{ release.release_object.status.title }} + {{ release.db_release.status.title }}

- {{ release.release_object.created|naturaltime }} + {{ release.db_release.created|naturaltime }}

{%- endblock release_status %} @@ -215,7 +209,7 @@

2 {{ _("Create a release") }}

{%- block metadata_tab scoped %} {%- endblock metadata_tab %} - {%- if release.release_object.errors %} + {%- if release.db_release.errors %} @@ -229,11 +223,10 @@

2 {{ _("Create a release") }}

{%- block releasetab_cff %} {% set repo_name = value %} - {% set citation_cff_create_link = 'https://github.com/{0}/new/{1}?filename=CITATION.cff'.format(repo.name, (default_branch or 'master')) %}

@@ -263,8 +256,7 @@

{{ _("Citation File") }}

{%- block releasetab_payload %} {%- if release.event %}
-

{{ _("GitHub Payload") }}

- +

{{ _("%(name)s Payload", name=vocabulary["name"]) }}

{{ _("Received") }} {{ release.event.created|datetimeformat }}.
@@ -282,7 +274,7 @@

{{ _("GitHub Payload") }}

{%- block releasetab_errors %} - {%- if release.release_object.errors %} + {%- if release.db_release.errors %}
@@ -292,7 +284,7 @@

{{ _("Errors") }}

-
{{ release.release_object.errors|tojson(indent=4) }}
+
{{ release.db_release.errors|tojson(indent=4) }}
@@ -326,5 +318,5 @@

{{ _("Errors") }}

{%- block javascript %} {{ super() }} - {{ webpack['invenio-github-init.js'] }} + {{ webpack['invenio-vcs-init.js'] }} {%- endblock javascript %} diff --git a/invenio_github/translations/cs/LC_MESSAGES/messages.po b/invenio_vcs/translations/cs/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/cs/LC_MESSAGES/messages.po rename to invenio_vcs/translations/cs/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/da/LC_MESSAGES/messages.po b/invenio_vcs/translations/da/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/da/LC_MESSAGES/messages.po rename to invenio_vcs/translations/da/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/de/LC_MESSAGES/messages.po b/invenio_vcs/translations/de/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/de/LC_MESSAGES/messages.po rename to invenio_vcs/translations/de/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/en/LC_MESSAGES/messages.po b/invenio_vcs/translations/en/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/en/LC_MESSAGES/messages.po rename to invenio_vcs/translations/en/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/es/LC_MESSAGES/messages.po b/invenio_vcs/translations/es/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/es/LC_MESSAGES/messages.po rename to invenio_vcs/translations/es/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/fr/LC_MESSAGES/messages.po b/invenio_vcs/translations/fr/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/fr/LC_MESSAGES/messages.po rename to invenio_vcs/translations/fr/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/it/LC_MESSAGES/messages.po b/invenio_vcs/translations/it/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/it/LC_MESSAGES/messages.po rename to invenio_vcs/translations/it/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/messages.pot b/invenio_vcs/translations/messages.pot similarity index 100% rename from invenio_github/translations/messages.pot rename to invenio_vcs/translations/messages.pot diff --git a/invenio_github/utils.py b/invenio_vcs/utils.py similarity index 92% rename from invenio_github/utils.py rename to invenio_vcs/utils.py index 64034ac3..c8f6d126 100644 --- a/invenio_github/utils.py +++ b/invenio_vcs/utils.py @@ -19,17 +19,16 @@ """Various utility functions.""" -from datetime import datetime +from datetime import datetime, timezone import dateutil.parser -import pytz import six from werkzeug.utils import import_string def utcnow(): """UTC timestamp (with timezone).""" - return datetime.now(tz=pytz.utc) + return datetime.now(tz=timezone.utc) def iso_utcnow(): @@ -41,7 +40,7 @@ def parse_timestamp(x): """Parse ISO8601 formatted timestamp.""" dt = dateutil.parser.parse(x) if dt.tzinfo is None: - dt = dt.replace(tzinfo=pytz.utc) + dt = dt.replace(tzinfo=timezone.utc) return dt diff --git a/invenio_github/views/__init__.py b/invenio_vcs/views/__init__.py similarity index 100% rename from invenio_github/views/__init__.py rename to invenio_vcs/views/__init__.py diff --git a/invenio_github/views/badge.py b/invenio_vcs/views/badge.py similarity index 66% rename from invenio_github/views/badge.py rename to invenio_vcs/views/badge.py index ef2f9068..ee9db80e 100644 --- a/invenio_github/views/badge.py +++ b/invenio_vcs/views/badge.py @@ -25,30 +25,29 @@ from __future__ import absolute_import -from flask import Blueprint, abort, current_app, redirect, url_for +from flask import Blueprint, abort, redirect, url_for from flask_login import current_user -from invenio_github.api import GitHubAPI -from invenio_github.errors import ReleaseNotFound -from invenio_github.models import ReleaseStatus, Repository -from invenio_github.proxies import current_github +from invenio_vcs.config import get_provider_by_id +from invenio_vcs.models import ReleaseStatus, Repository +from invenio_vcs.proxies import current_vcs +from invenio_vcs.service import VCSService blueprint = Blueprint( - "invenio_github_badge", + "invenio_vcs_badge", __name__, - url_prefix="/badge", + url_prefix="/badge/", static_folder="../static", template_folder="../templates", ) -# -# Views -# -@blueprint.route("/.svg") -def index(repo_github_id): +@blueprint.route("/.svg") +def index(provider, repo_provider_id): """Generate a badge for a specific GitHub repository (by github ID).""" - repo = Repository.query.filter(Repository.github_id == repo_github_id).one_or_none() + repo = Repository.query.filter( + Repository.provider_id == repo_provider_id, Repository.provider == provider + ).one_or_none() if not repo: abort(404) @@ -56,7 +55,9 @@ def index(repo_github_id): if not latest_release: abort(404) - release = current_github.release_api_class(latest_release) + provider = get_provider_by_id(provider).for_user(current_user.id) + release = current_vcs.release_api_class(latest_release, provider) + # release.badge_title points to "DOI" # release.badge_value points to the record "pids.doi.identifier" badge_url = url_for( @@ -70,9 +71,11 @@ def index(repo_github_id): # Kept for backward compatibility @blueprint.route("//.svg") -def index_old(user_id, repo_name): +def index_old(provider, user_id, repo_name): """Generate a badge for a specific GitHub repository (by name).""" - repo = Repository.query.filter(Repository.name == repo_name).one_or_none() + repo = Repository.query.filter( + Repository.full_name == repo_name, Repository.provider == provider + ).one_or_none() if not repo: abort(404) @@ -80,7 +83,9 @@ def index_old(user_id, repo_name): if not latest_release: abort(404) - release = current_github.release_api_class(latest_release) + provider = get_provider_by_id(provider).for_user(current_user.id) + release = current_vcs.release_api_class(latest_release, provider) + # release.badge_title points to "DOI" # release.badge_value points to the record "pids.doi.identifier" badge_url = url_for( @@ -93,11 +98,13 @@ def index_old(user_id, repo_name): # Kept for backward compatibility -@blueprint.route("/latestdoi/") -def latest_doi(github_id): +@blueprint.route("/latestdoi/") +def latest_doi(provider, provider_id): """Redirect to the newest record version.""" # Without user_id, we can't use GitHubAPI. Therefore, we fetch the latest release using the Repository model directly. - repo = Repository.query.filter(Repository.github_id == github_id).one_or_none() + repo = Repository.query.filter( + Repository.provider_id == provider_id, Repository.provider == provider + ).one_or_none() if not repo: abort(404) @@ -105,7 +112,8 @@ def latest_doi(github_id): if not latest_release: abort(404) - release = current_github.release_api_class(latest_release) + provider = get_provider_by_id(provider).for_user(current_user.id) + release = current_vcs.release_api_class(latest_release, provider) # record.url points to DOI url or HTML url if Datacite is not enabled. return redirect(release.record_url) @@ -113,11 +121,11 @@ def latest_doi(github_id): # Kept for backward compatibility @blueprint.route("/latestdoi//") -def latest_doi_old(user_id, repo_name): +def latest_doi_old(provider, user_id, repo_name): """Redirect to the newest record version.""" - github_api = GitHubAPI(user_id) - repo = github_api.get_repository(repo_name=repo_name) - release = github_api.repo_last_published_release(repo) + svc = VCSService.for_provider_and_user(provider, user_id) + repo = svc.get_repository(repo_name=repo_name) + release = svc.get_repo_latest_release(repo) if not release: abort(404) diff --git a/invenio_github/views/github.py b/invenio_vcs/views/vcs.py similarity index 63% rename from invenio_github/views/github.py rename to invenio_vcs/views/vcs.py index e24efefe..16f1a7d4 100644 --- a/invenio_github/views/github.py +++ b/invenio_vcs/views/vcs.py @@ -32,7 +32,7 @@ from invenio_i18n import gettext as _ from sqlalchemy.orm.exc import NoResultFound -from invenio_github.api import GitHubAPI +from invenio_vcs.service import VCSService from ..errors import GithubTokenNotFound, RepositoryAccessError, RepositoryNotFoundError @@ -43,12 +43,12 @@ def request_session_token(): def decorator(f): @wraps(f) def inner(*args, **kwargs): - github = GitHubAPI(user_id=current_user.id) - token = github.session_token - if token: + provider = kwargs["provider"] + svc = VCSService.for_provider_and_user(provider, current_user.id) + if svc.is_authenticated: return f(*args, **kwargs) raise GithubTokenNotFound( - current_user, _("Github session token is requested") + current_user, _("VCS provider session token is required") ) return inner @@ -59,13 +59,13 @@ def inner(*args, **kwargs): def create_ui_blueprint(app): """Creates blueprint and registers UI endpoints if the integration is enabled.""" blueprint = Blueprint( - "invenio_github", + "invenio_vcs", __name__, static_folder="../static", template_folder="../templates", - url_prefix="/account/settings/github", + url_prefix="/account/settings/vcs/", ) - if app.config.get("GITHUB_INTEGRATION_ENABLED", False): + if app.config.get("VCS_INTEGRATION_ENABLED", False): with app.app_context(): # Todo: Temporary fix, it should be removed when inveniosoftware/invenio-theme#355 is merged register_ui_routes(blueprint) return blueprint @@ -73,8 +73,10 @@ def create_ui_blueprint(app): def create_api_blueprint(app): """Creates blueprint and registers API endpoints if the integration is enabled.""" - blueprint_api = Blueprint("invenio_github_api", __name__) - if app.config.get("GITHUB_INTEGRATION_ENABLED", False): + blueprint_api = Blueprint( + "invenio_vcs_api", __name__, url_prefix="/user/vcs/" + ) + if app.config.get("VCS_INTEGRATION_ENABLED", False): register_api_routes(blueprint_api) return blueprint_api @@ -84,51 +86,60 @@ def register_ui_routes(blueprint): @blueprint.route("/") @login_required - def get_repositories(): + def get_repositories(provider): """Display list of the user's repositories.""" - github = GitHubAPI(user_id=current_user.id) - ctx = dict(connected=False) - if github.session_token: + svc = VCSService.for_provider_and_user(provider, current_user.id) + ctx: dict = dict( + connected=False, + provider=provider, + vocabulary=svc.provider.factory.vocabulary, + ) + + if svc.is_authenticated: # Generate the repositories view object - repos = github.get_user_repositories() - last_sync = github.get_last_sync_time() + repos = svc.list_repositories() + last_sync = svc.get_last_sync_time() ctx.update( { "connected": True, - "repos": sorted(repos.items(), key=lambda x: x[1]["full_name"]), + "repos": repos, "last_sync": last_sync, } ) - return render_template(current_app.config["GITHUB_TEMPLATE_INDEX"], **ctx) + return render_template(current_app.config["VCS_TEMPLATE_INDEX"], **ctx) - @blueprint.route("/repository/") + @blueprint.route("/repository/") @login_required @request_session_token() - def get_repository(repo_name): + def get_repository(provider, repo_id): """Displays one repository. Retrieves and builds context to display all repository releases, if any. """ - user_id = current_user.id - github = GitHubAPI(user_id=user_id) + svc = VCSService.for_provider_and_user(provider, current_user.id) try: - repo = github.get_repository(repo_name) - latest_release = github.repo_last_published_release(repo) - default_branch = ( - github.account.extra_data.get("repos", {}) - .get(str(repo.github_id), None) - .get("default_branch", None) + repo = svc.get_repository(repo_id) + latest_release = svc.get_repo_latest_release(repo) + default_branch = svc.get_repo_default_branch(repo_id) + releases = svc.list_repo_releases(repo) + new_release_url = svc.provider.factory.url_for_new_release(repo.full_name) + new_citation_file_url = svc.provider.factory.url_for_new_file( + repo.full_name, default_branch or "main", "CITATION.cff" ) - releases = github.get_repository_releases(repo=repo) + return render_template( - current_app.config["GITHUB_TEMPLATE_VIEW"], + current_app.config["VCS_TEMPLATE_VIEW"], latest_release=latest_release, + provider=provider, repo=repo, releases=releases, default_branch=default_branch, + new_release_url=new_release_url, + new_citation_file_url=new_citation_file_url, + vocabulary=svc.provider.factory.vocabulary, ) except RepositoryAccessError: abort(403) @@ -144,8 +155,8 @@ def register_api_routes(blueprint): @login_required @request_session_token() - @blueprint.route("/user/github/repositories/sync", methods=["POST"]) - def sync_user_repositories(): + @blueprint.route("/repositories/sync", methods=["POST"]) + def sync_user_repositories(provider): """Synchronizes user repos. Currently: @@ -154,8 +165,8 @@ def sync_user_repositories(): POST /account/settings/github/hook """ try: - github = GitHubAPI(user_id=current_user.id) - github.sync(async_hooks=False) + svc = VCSService.for_provider_and_user(provider, current_user.id) + svc.sync(async_hooks=False) db.session.commit() except Exception as exc: current_app.logger.exception(str(exc)) @@ -165,13 +176,13 @@ def sync_user_repositories(): @login_required @request_session_token() - @blueprint.route("/user/github/", methods=["POST"]) - def init_user_github(): + @blueprint.route("/", methods=["POST"]) + def init_user_github(provider): """Initialises github account for an user.""" try: - github = GitHubAPI(user_id=current_user.id) - github.init_account() - github.sync(async_hooks=False) + svc = VCSService.for_provider_and_user(provider, current_user.id) + svc.init_account() + svc.sync(async_hooks=False) db.session.commit() except Exception as exc: current_app.logger.exception(str(exc)) @@ -180,10 +191,8 @@ def init_user_github(): @login_required @request_session_token() - @blueprint.route( - "/user/github/repositories//enable", methods=["POST"] - ) - def enable_repository(repository_id): + @blueprint.route("/repositories//enable", methods=["POST"]) + def enable_repository(provider, repository_id): """Enables one repository. Currently: @@ -192,18 +201,9 @@ def enable_repository(repository_id): POST /account/settings/github/hook """ try: - github = GitHubAPI(user_id=current_user.id) - - repos = github.account.extra_data.get("repos", {}) + svc = VCSService.for_provider_and_user(provider, current_user.id) + create_success = svc.enable_repository(repository_id) - if str(repository_id) not in repos: - raise RepositoryNotFoundError( - repository_id, _("Failed to enable repository.") - ) - - create_success = github.create_hook( - repository_id, repos[str(repository_id)]["full_name"] - ) db.session.commit() if create_success: return "", 201 @@ -221,10 +221,8 @@ def enable_repository(repository_id): @login_required @request_session_token() - @blueprint.route( - "/user/github/repositories//disable", methods=["POST"] - ) - def disable_repository(repository_id): + @blueprint.route("/repositories//disable", methods=["POST"]) + def disable_repository(provider, repository_id): """Disables one repository. Currently: @@ -233,21 +231,10 @@ def disable_repository(repository_id): DELETE /account/settings/github/hook """ try: - github = GitHubAPI(user_id=current_user.id) - - repos = github.account.extra_data.get("repos", {}) - - if str(repository_id) not in repos: - raise RepositoryNotFoundError( - repository_id, _("Failed to disable repository.") - ) + svc = VCSService.for_provider_and_user(provider, current_user.id) + remove_success = svc.disable_repository(repository_id) - remove_success = False - if repos: - remove_success = github.remove_hook( - repository_id, repos[str(repository_id)]["full_name"] - ) - db.session.commit() + db.session.commit() if remove_success: return "", 204 else: diff --git a/invenio_github/webpack.py b/invenio_vcs/webpack.py similarity index 89% rename from invenio_github/webpack.py rename to invenio_vcs/webpack.py index 9f507392..ca839e63 100644 --- a/invenio_github/webpack.py +++ b/invenio_vcs/webpack.py @@ -16,7 +16,7 @@ "semantic-ui": dict( entry={ # Add your webpack entrypoints - "invenio-github-init": "./js/invenio_github/index.js", + "invenio-vcs-init": "./js/invenio_vcs/index.js", }, dependencies={"@babel/runtime": "^7.9.0"}, ), diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..0e4f9fb4 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "invenio-vcs", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} diff --git a/setup.cfg b/setup.cfg index d51fba95..5f534355 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,16 +24,16 @@ # as an Intergovernmental Organization or submit itself to any jurisdiction. [metadata] -name = invenio-github -version = attr: invenio_github.__version__ +name = invenio-vcs +version = attr: invenio_vcs.__version__ description = "Invenio module that adds GitHub integration to the platform." long_description = file: README.rst, CHANGES.rst -keywords = invenio github +keywords = invenio github gitlab vcs license = MIT author = CERN author_email = info@inveniosoftware.org platforms = any -url = https://github.com/inveniosoftware/invenio-github +url = https://github.com/inveniosoftware/invenio-vcs classifiers = Development Status :: 5 - Production/Stable @@ -47,11 +47,13 @@ install_requires = PyYAML>=5.4.1 email-validator>=1.0.5 github3.py>=4.0.1,<5.0.0 + python-gitlab>=6.2.0,<7.0.0 humanize>=0.5.1 + python-dateutil>=2.9.0,<3.0.0 invenio-assets>=4.0.0,<5.0.0 invenio-accounts>=6.0.0,<7.0.0 invenio-celery>=2.0.0,<3.0.0 - invenio-db>=2.0.0,<3.0.0 + invenio-db[postgresql,mysql]>=2.0.0,<3.0.0 invenio-formatter>=3.0.0,<4.0.0 invenio-i18n>=3.0.0,<4.0.0 invenio-oauth2server>=3.0.0,<4.0.0 @@ -85,28 +87,28 @@ opensearch2 = [options.entry_points] invenio_base.apps = - invenio_github = invenio_github:InvenioGitHub + invenio_vcs = invenio_vcs:InvenioVCS invenio_base.api_apps = - invenio_github = invenio_github:InvenioGitHub + invenio_vcs = invenio_vcs:InvenioVCS invenio_base.blueprints = - invenio_github_badge = invenio_github.views.badge:blueprint - invenio_github_github = invenio_github.views.github:create_ui_blueprint + invenio_vcs_badge = invenio_vcs.views.badge:blueprint + invenio_vcs_vcs = invenio_vcs.views.vcs:create_ui_blueprint invenio_base.api_blueprints = - invenio_github = invenio_github.views.github:create_api_blueprint + invenio_vcs = invenio_vcs.views.vcs:create_api_blueprint invenio_base.finalize_app = - invenio_github = invenio_github.ext:finalize_app + invenio_vcs = invenio_vcs.ext:finalize_app_ui +invenio_base.api_finalize_app = + invenio_vcs = invenio_vcs.ext:finalize_app_api invenio_celery.tasks = - invenio_github = invenio_github.tasks + invenio_vcs = invenio_vcs.tasks invenio_db.alembic = - invenio_github = invenio_github:alembic + invenio_vcs = invenio_vcs:alembic invenio_db.models = - invenio_github = invenio_github.models + invenio_vcs = invenio_vcs.models invenio_i18n.translations = - messages = invenio_github -invenio_webhooks.receivers = - github = invenio_github.receivers:GitHubReceiver + messages = invenio_vcs invenio_assets.webpack = - invenio_github = invenio_github.webpack:theme + invenio_vcs = invenio_vcs.webpack:theme [build_sphinx] source-dir = docs/ @@ -117,23 +119,23 @@ all_files = 1 universal = 1 [compile_catalog] -directory = invenio_github/translations/ +directory = invenio_vcs/translations/ use_fuzzy = True [extract_messages] copyright_holder = CERN msgid_bugs_address = info@inveniosoftware.org mapping_file = babel.ini -output_file = invenio_github/translations/messages.pot +output_file = invenio_vcs/translations/messages.pot add_comments = NOTE [init_catalog] -input_file = invenio_github/translations/messages.pot -output_dir = invenio_github/translations/ +input_file = invenio_vcs/translations/messages.pot +output_dir = invenio_vcs/translations/ [update_catalog] -input_file = invenio_github/translations/messages.pot -output_dir = invenio_github/translations/ +input_file = invenio_vcs/translations/messages.pot +output_dir = invenio_vcs/translations/ [pydocstyle] add_ignore = D401,D403 @@ -146,5 +148,5 @@ ignore = *-requirements.txt [tool:pytest] -addopts = --black --isort --pydocstyle --ignore=docs --cov=invenio_github --cov-report=term-missing -testpaths = tests invenio_github +addopts = --black --isort --pydocstyle --ignore=docs --cov=invenio_vcs --cov-report=term-missing +testpaths = tests invenio_vcs diff --git a/tests/fixtures.py b/tests/fixtures.py index ce4020a7..4db166be 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -27,11 +27,11 @@ from six import BytesIO -from invenio_github.api import GitHubRelease -from invenio_github.models import ReleaseStatus +from invenio_vcs.models import ReleaseStatus +from invenio_vcs.service import VCSRelease -class TestGithubRelease(GitHubRelease): +class TestGithubRelease(VCSRelease): """Implements GithubRelease with test methods.""" def publish(self): @@ -39,8 +39,8 @@ def publish(self): Does not create a "real" record, as this only used to test the API. """ - self.release_object.status = ReleaseStatus.PUBLISHED - self.release_object.record_id = "445aaacd-9de1-41ab-af52-25ab6cb93df7" + self.generic_release.status = ReleaseStatus.PUBLISHED + self.generic_release.record_id = "445aaacd-9de1-41ab-af52-25ab6cb93df7" return {} def process_release(self): diff --git a/tests/test_api.py b/tests/test_api.py index 44389d3f..11eac7ca 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -11,8 +11,8 @@ import pytest from invenio_webhooks.models import Event -from invenio_github.api import GitHubAPI, GitHubRelease -from invenio_github.models import Release, ReleaseStatus +from invenio_vcs.models import Release, ReleaseStatus +from invenio_vcs.service import VCSRelease from .fixtures import PAYLOAD as github_payload_fixture @@ -58,7 +58,7 @@ def test_release_api(app, test_user, github_api): status=ReleaseStatus.RECEIVED, ) # Idea is to test the public interface of GithubRelease - gh = GitHubRelease(release) + gh = VCSRelease(release) # Validate that public methods raise NotImplementedError with pytest.raises(NotImplementedError): @@ -109,7 +109,7 @@ def test_release_branch_tag_conflict(app, test_user, github_api): status=ReleaseStatus.RECEIVED, ) # Idea is to test the public interface of GithubRelease - rel_api = GitHubRelease(release) + rel_api = VCSRelease(release) resolved_url = rel_api.resolve_zipball_url() ref_tag_url = ( "https://github.com/auser/repo-2/zipball/refs/tags/v1.0-tag-and-branch" diff --git a/tests/test_invenio_github.py b/tests/test_invenio_github.py index ceed134a..a9d9a891 100644 --- a/tests/test_invenio_github.py +++ b/tests/test_invenio_github.py @@ -27,12 +27,12 @@ from flask import Flask -from invenio_github import InvenioGitHub +from invenio_vcs import InvenioVCS def test_version(): """Test version import.""" - from invenio_github import __version__ + from invenio_vcs import __version__ assert __version__ @@ -40,11 +40,11 @@ def test_version(): def test_init(): """Test extension initialization.""" app = Flask("testapp") - ext = InvenioGitHub(app) + ext = InvenioVCS(app) assert "invenio-github" in app.extensions app = Flask("testapp") - ext = InvenioGitHub() + ext = InvenioVCS() assert "invenio-github" not in app.extensions ext.init_app(app) assert "invenio-github" in app.extensions diff --git a/tests/test_models.py b/tests/test_models.py index be63e40f..76086b6e 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -22,7 +22,7 @@ """Test cases for badge creation.""" -from invenio_github.models import Repository +from invenio_vcs.models import Repository def test_repository_unbound(app): diff --git a/tests/test_tasks.py b/tests/test_tasks.py index 88ada337..2109d7f4 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -26,10 +26,10 @@ from invenio_webhooks.models import Event from mock import patch -from invenio_github.api import GitHubAPI -from invenio_github.models import Release, ReleaseStatus, Repository -from invenio_github.tasks import process_release, refresh_accounts -from invenio_github.utils import iso_utcnow +from invenio_vcs.api import GitHubAPI +from invenio_vcs.models import Release, ReleaseStatus, Repository +from invenio_vcs.tasks import process_release, refresh_accounts +from invenio_vcs.utils import iso_utcnow from . import fixtures diff --git a/tests/test_webhook.py b/tests/test_webhook.py index 14c3317e..2fa404b3 100644 --- a/tests/test_webhook.py +++ b/tests/test_webhook.py @@ -27,8 +27,8 @@ # from invenio_rdm_records.proxies import current_rdm_records_service from invenio_webhooks.models import Event -from invenio_github.api import GitHubAPI -from invenio_github.models import ReleaseStatus, Repository +from invenio_vcs.api import GitHubAPI +from invenio_vcs.models import ReleaseStatus, Repository def test_webhook_post(app, db, tester_id, remote_token, github_api): @@ -40,7 +40,7 @@ def test_webhook_post(app, db, tester_id, remote_token, github_api): hook = 1234 tag = "v1.0" - repo = Repository.get(github_id=repo_id, name=repo_name) + repo = Repository.get(github_id=repo_id, full_name=repo_name) if not repo: repo = Repository.create(tester_id, repo_id, repo_name) @@ -79,7 +79,7 @@ def test_webhook_post_fail(app, tester_id, remote_token, github_api): hook = 1234 # Create a repository - repo = Repository.get(github_id=repo_id, name=repo_name) + repo = Repository.get(github_id=repo_id, full_name=repo_name) if not repo: repo = Repository.create(tester_id, repo_id, repo_name)