From 4978bce8ed860ca0dd4f644d824701eadb0150e5 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 7 Aug 2025 16:39:19 +0200 Subject: [PATCH 01/19] WIP: start transformation into generic VCS package --- .env | 2 + ...54318294_switch_to_generic_git_services.py | 91 +++++++++ invenio_github/api.py | 16 +- invenio_github/config.py | 4 + invenio_github/contrib/github.py | 178 ++++++++++++++++ invenio_github/ext.py | 5 +- invenio_github/models.py | 16 +- invenio_github/providers.py | 190 ++++++++++++++++++ invenio_github/proxies.py | 2 +- invenio_github/service.py | 49 +++++ invenio_github/tasks.py | 6 +- .../invenio_github/settings/index.html | 2 +- invenio_github/views/badge.py | 8 +- invenio_github/views/{github.py => vcs.py} | 15 +- setup.cfg | 2 +- 15 files changed, 551 insertions(+), 35 deletions(-) create mode 100644 .env create mode 100644 invenio_github/alembic/1754318294_switch_to_generic_git_services.py create mode 100644 invenio_github/contrib/github.py create mode 100644 invenio_github/providers.py create mode 100644 invenio_github/service.py rename invenio_github/views/{github.py => vcs.py} (94%) diff --git a/.env b/.env new file mode 100644 index 00000000..84c43fa4 --- /dev/null +++ b/.env @@ -0,0 +1,2 @@ +# TODO: do not commit +INVENIO_THEME_FRONTPAGE=False diff --git a/invenio_github/alembic/1754318294_switch_to_generic_git_services.py b/invenio_github/alembic/1754318294_switch_to_generic_git_services.py new file mode 100644 index 00000000..5f3bd44d --- /dev/null +++ b/invenio_github/alembic/1754318294_switch_to_generic_git_services.py @@ -0,0 +1,91 @@ +# +# This file is part of Invenio. +# Copyright (C) 2016-2018 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Switch to generic git services""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "1754318294" +down_revision = "b0eaee37b545" +branch_labels = () +depends_on = None + + +def upgrade(): + """Upgrade database.""" + op.rename_table("github_repositories", "vcs_repositories") + op.alter_column( + "vcs_repositories", + "github_id", + new_column_name="provider_id", + type_=sa.String(length=255), + nullable=False, + existing_type=sa.Integer(), + existing_nullable=True, + ) + op.alter_column( + "vcs_repositories", + "hook", + type_=sa.String(length=255), + nullable=True, + existing_type=sa.Integer(), + existing_nullable=True, + ) + op.add_column( + "vcs_repositories", sa.Column("provider", sa.String(255), nullable=False) + ) + + op.rename_table("github_releases", "vcs_releases") + op.alter_column( + "vcs_releases", + "release_id", + new_column_name="provider_id", + type_=sa.String(length=255), + nullable=False, + existing_type=sa.Integer(), + existing_nullable=True, + ) + # ### end Alembic commands ### + + +def downgrade(): + """Downgrade database.""" + op.rename_table("vcs_repositories", "github_repositories") + op.alter_column( + "github_repositories", + "provider_id", + new_column_name="github_id", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=False, + postgresql_using="provider_id::integer", + ) + op.alter_column( + "github_repositories", + "hook", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=True, + ) + op.drop_column("github_repositories", "provider") + + op.rename_table("vcs_releases", "github_releases") + op.alter_column( + "github_releases", + "provider_id", + new_column_name="release_id", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=False, + postgresql_using="provider_id::integer", + ) + # ### end Alembic commands ### diff --git a/invenio_github/api.py b/invenio_github/api.py index 374b5d10..b0845605 100644 --- a/invenio_github/api.py +++ b/invenio_github/api.py @@ -47,7 +47,7 @@ from werkzeug.utils import cached_property from invenio_github.models import Release, ReleaseStatus, Repository -from invenio_github.proxies import current_github +from invenio_github.proxies import current_vcs from invenio_github.tasks import sync_hooks as sync_hooks_task from invenio_github.utils import iso_utcnow, parse_timestamp, utcnow @@ -64,8 +64,9 @@ class GitHubAPI(object): """Wrapper for GitHub API.""" - def __init__(self, user_id=None): + def __init__(self, remote, user_id=None): """Create a GitHub API object.""" + self.remote = remote self.user_id = user_id @cached_property @@ -95,11 +96,6 @@ def session_token(self): return token return None - remote = LocalProxy( - lambda: current_oauthclient.oauth.remote_apps[ - current_app.config["GITHUB_WEBHOOK_RECEIVER_ID"] - ] - ) """Return OAuth remote application.""" def check_repo_access_permissions(self, repo): @@ -363,7 +359,7 @@ def repo_last_published_release(self, repo): release_instance = None release_object = repo.latest_release(ReleaseStatus.PUBLISHED) if release_object: - release_instance = current_github.release_api_class(release_object) + release_instance = current_vcs.release_api_class(release_object) return release_instance def get_repository_releases(self, repo): @@ -373,7 +369,7 @@ def get_repository_releases(self, repo): # Retrieve releases and sort them by creation date release_instances = [] for release_object in repo.releases.order_by(Release.created): - release_instance = current_github.release_api_class(release_object) + release_instance = current_vcs.release_api_class(release_object) release_instances.append(release_instance) return release_instances @@ -390,7 +386,7 @@ def get_user_repositories(self): ) for repo in db_repos: if str(repo.github_id) in repos: - release_instance = current_github.release_api_class( + release_instance = current_vcs.release_api_class( repo.latest_release() ) repos[str(repo.github_id)]["instance"] = repo diff --git a/invenio_github/config.py b/invenio_github/config.py index 18814cfc..2832cbb4 100644 --- a/invenio_github/config.py +++ b/invenio_github/config.py @@ -24,6 +24,10 @@ from datetime import timedelta +from invenio_github.contrib.github import GitHubProvider + +VCS_PROVIDERS = [] + GITHUB_WEBHOOK_RECEIVER_ID = "github" """Local name of webhook receiver.""" diff --git a/invenio_github/contrib/github.py b/invenio_github/contrib/github.py new file mode 100644 index 00000000..6ec3fa84 --- /dev/null +++ b/invenio_github/contrib/github.py @@ -0,0 +1,178 @@ +from collections import defaultdict + +import github3 +from github3.repos import ShortRepository +from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper +from werkzeug.utils import cached_property + +from invenio_github.oauth.handlers import account_setup_handler, disconnect_handler +from invenio_github.providers import ( + GenericRelease, + GenericRepository, + GenericWebhook, + RepositoryServiceProvider, + RepositoryServiceProviderFactory, +) + + +class GitHubProviderFactory(RepositoryServiceProviderFactory): + def __init__( + self, + webhook_receiver_url, + id="github", + name="GitHub", + config={}, + ): + super().__init__(GitHubProvider, webhook_receiver_url) + self._id = id + self._name = name + self._config = defaultdict( + config, + base_url="https://github.com", + credentials_key="GITHUB_APP_CREDENTIALS", + shared_secret="", + insecure_ssl=False, + ) + + @property + def remote_config(self): + request_token_params = { + "scope": "read:user,user:email,admin:repo_hook,read:org" + } + + helper = GitHubOAuthSettingsHelper( + base_url=self.base_url, app_key=self.credentials_key + ) + github_app = helper.remote_app + github_app["disconnect_handler"] = disconnect_handler + github_app["signup_handler"]["setup"] = account_setup_handler + github_app["params"]["request_token_params"] = request_token_params + + return github_app + + @property + def id(self): + return self._id + + @property + def name(self): + return self._name + + @property + def repository_name(self): + return "repository" + + @property + def repository_name_plural(self): + return "repositories" + + @property + def icon(self): + return "github" + + @property + def config(self): + return self._config + + +class GitHubProvider(RepositoryServiceProvider): + @cached_property + def _gh(self): + return github3.login(token=self.access_token(self.user_id)) + + def list_repositories(self): + if self._gh is None: + return None + + repos: dict[str, GenericRepository] = {} + for repo in self._gh.repositories(): + assert isinstance(repo, ShortRepository) + + if repo.permissions["admin"]: + repos[str(repo.id)] = GenericRepository( + str(repo.id), + repo.full_name, + repo.description, + repo.default_branch, + ) + + return repos + + def list_repository_webhooks(self, repository_id): + assert repository_id.isdigit() + if self._gh is None: + return None + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return None + + hooks = [] + for hook in repo.hooks(): + hooks.append( + GenericWebhook(str(hook.id), repository_id, hook.config.get("url", "")) + ) + return hooks + + def get_repo_latest_release(self, repository_id): + assert repository_id.isdigit() + if self._gh is None: + return None + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return None + + release = repo.latest_release() + if not release: + return None + + return GenericRelease( + str(release.id), + release.name, + release.tag_name, + release.tarball_url, + release.zipball_url, + release.created_at, + ) + + def create_webhook(self, repository_id, url): + assert repository_id.isdigit() + if self._gh is None: + return None + + hook_config = dict( + url=url, + content_type="json", + secret=self.factory.config["shared_secret"], + insecure_ssl="1" if self.factory.config["insecure_ssl"] else "0", + ) + + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return False + + hooks = (h for h in repo.hooks() if h.config.get("url", "") == url) + hook = next(hooks, None) + + if not hook: + hook = repo.create_hook("web", hook_config, events=["release"]) + else: + hook.edit(config=hook_config, events=["release"]) + + return True + + def delete_webhook(self, repository_id): + assert repository_id.isdigit() + if self._gh is None: + return None + + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return False + + hooks = ( + h for h in repo.hooks() if self.is_valid_webhook(h.config.get("url", "")) + ) + hook = next(hooks, None) + if not hook or hook.delete(): + return True + return False diff --git a/invenio_github/ext.py b/invenio_github/ext.py index 516f0b3b..a7e190a1 100644 --- a/invenio_github/ext.py +++ b/invenio_github/ext.py @@ -50,7 +50,7 @@ def __init__(self, app=None): @cached_property def release_api_class(self): """Github Release API class.""" - cls = current_app.config["GITHUB_RELEASE_CLASS"] + cls = current_app.config["VCS_RELEASE_CLASS"] if isinstance(cls, string_types): cls = import_string(cls) assert issubclass(cls, GitHubRelease) @@ -78,7 +78,7 @@ def init_config(self, app): ) for k in dir(config): - if k.startswith("GITHUB_"): + if k.startswith("GITHUB_") or k.startswith("VCS_"): app.config.setdefault(k, getattr(config, k)) @@ -92,6 +92,7 @@ def init_menu(app): if app.config.get("GITHUB_INTEGRATION_ENABLED", False): current_menu.submenu("settings.github").register( endpoint="invenio_github.get_repositories", + endpoint_arguments_constructor=lambda: {"provider": "github"}, text=_( "%(icon)s GitHub", icon=LazyString( diff --git a/invenio_github/models.py b/invenio_github/models.py index 78ed0f96..3259006e 100644 --- a/invenio_github/models.py +++ b/invenio_github/models.py @@ -110,7 +110,7 @@ def color(self): class Repository(db.Model, Timestamp): """Information about a GitHub repository.""" - __tablename__ = "github_repositories" + __tablename__ = "vcs_repositories" id = db.Column( UUIDType, @@ -119,11 +119,10 @@ class Repository(db.Model, Timestamp): ) """Repository identifier.""" - github_id = db.Column( - db.Integer, - unique=True, + provider_id = db.Column( + db.String(255), index=True, - nullable=True, + nullable=False, ) """Unique GitHub identifier for a repository. @@ -142,13 +141,16 @@ class Repository(db.Model, Timestamp): `github_id`, that only has a `name`. """ + provider = db.Column(db.String(255), nullable=False) + """Which VCS provider the repository is hosted by (and therefore the context in which to consider the provider_id)""" + name = db.Column(db.String(255), unique=True, index=True, nullable=False) """Fully qualified name of the repository including user/organization.""" user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) """Reference user that can manage this repository.""" - hook = db.Column(db.Integer) + hook = db.Column(db.String(255)) """Hook identifier.""" # @@ -215,7 +217,7 @@ class Release(db.Model, Timestamp): ) """Release identifier.""" - release_id = db.Column(db.Integer, unique=True, nullable=True) + provider_id = db.Column(db.String(255), nullable=True) """Unique GitHub release identifier.""" tag = db.Column(db.String(255)) diff --git a/invenio_github/providers.py b/invenio_github/providers.py new file mode 100644 index 00000000..2a1cfb87 --- /dev/null +++ b/invenio_github/providers.py @@ -0,0 +1,190 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from datetime import datetime +from urllib.parse import urlparse + +from flask import current_app +from invenio_i18n import gettext as _ +from invenio_oauth2server.models import Token as ProviderToken +from invenio_oauthclient import current_oauthclient +from invenio_oauthclient.handlers import token_getter +from invenio_oauthclient.models import RemoteAccount, RemoteToken +from werkzeug.local import LocalProxy +from werkzeug.utils import cached_property + +from invenio_github.errors import RemoteAccountDataNotSet + + +@dataclass +class GenericWebhook: + id: str + repository_id: str + url: str + + +@dataclass +class GenericRepository: + id: str + full_name: str + description: str + default_branch: str + + +@dataclass +class GenericRelease: + id: str + name: str + tag_name: str + tarball_url: str + zipball_url: str + created_at: datetime + + +class RepositoryServiceProviderFactory(ABC): + def __init__( + self, provider: type["RepositoryServiceProvider"], webhook_receiver_url: str + ): + self.provider = provider + self.webhook_receiver_url = webhook_receiver_url + + @property + @abstractmethod + def remote_config(self): + pass + + @cached_property + def remote(self): + return LocalProxy(lambda: current_oauthclient.oauth.remote_apps[self.id]) + + @property + @abstractmethod + def id(self) -> str: + pass + + @property + @abstractmethod + def name(self) -> str: + pass + + @property + @abstractmethod + def repository_name(self) -> str: + pass + + @property + @abstractmethod + def repository_name_plural(self) -> str: + pass + + @property + @abstractmethod + def icon(self) -> str: + pass + + @property + @abstractmethod + def config(self) -> dict: + pass + + def for_user(self, user_id: str): + return self.provider(self, user_id) + + +class RepositoryServiceProvider(ABC): + def __init__(self, factory: RepositoryServiceProviderFactory, user_id: str) -> None: + self.factory = factory + self.user_id = user_id + + @cached_property + def remote_account(self): + """Return remote account.""" + return RemoteAccount.get(self.user_id, self.factory.remote.consumer_key) + + @cached_property + def user_available_repositories(self): + """Retrieve user repositories from user's remote data.""" + return self.remote_account.extra_data.get("repos", {}) + + @cached_property + def access_token(self): + """Return OAuth access token's value.""" + token = RemoteToken.get(self.user_id, self.factory.remote.consumer_key) + if not token: + # The token is not yet in DB, it is retrieved from the request session. + return self.factory.remote.get_request_token()[0] + return token.access_token + + @property + def session_token(self): + """Return OAuth session token.""" + session_token = None + if self.user_id is not None: + session_token = token_getter(self.factory.remote) + if session_token: + token = RemoteToken.get( + self.user_id, + self.factory.remote.consumer_key, + access_token=session_token[0], + ) + return token + return None + + @cached_property + def webhook_url(self): + """Return the url to be used by a GitHub webhook.""" + if not self.remote_account.extra_data.get("tokens", {}).get("webhook"): + raise RemoteAccountDataNotSet( + self.user_id, _("Webhook data not found for user tokens (remote data).") + ) + + webhook_token = ProviderToken.query.filter_by( + id=self.remote_account.extra_data["tokens"]["webhook"] + ).first() + if webhook_token: + return self.factory.webhook_receiver_url.format( + token=webhook_token.access_token + ) + + def is_valid_webhook(self, url): + """Check if webhook url is valid. + + The webhook url is valid if it has the same host as the configured webhook url. + + :param str url: The webhook url to be checked. + :returns: True if the webhook url is valid, False otherwise. + """ + if not url: + return False + configured_host = urlparse(self.webhook_url).netloc + url_host = urlparse(url).netloc + if not (configured_host and url_host): + return False + return configured_host == url_host + + @abstractmethod + def list_repositories(self): + pass + + @abstractmethod + def list_repository_webhooks(self, repository_id): + pass + + @abstractmethod + def get_repo_latest_release(self, repository_id): + pass + + @abstractmethod + def create_webhook(self, repository_id, url): + pass + + @abstractmethod + def delete_webhook(self, repository_id, webhook_id): + pass + + +def get_provider_by_id(id: str) -> RepositoryServiceProviderFactory: + providers = current_app.config["VCS_PROVIDERS"] + for provider in providers: + if id == provider.id: + return provider + raise Exception(f"VCS provider with ID {id} not registered") diff --git a/invenio_github/proxies.py b/invenio_github/proxies.py index 710e639f..6fe7d73c 100644 --- a/invenio_github/proxies.py +++ b/invenio_github/proxies.py @@ -27,4 +27,4 @@ from flask import current_app from werkzeug.local import LocalProxy -current_github = LocalProxy(lambda: current_app.extensions["invenio-github"]) +current_vcs = LocalProxy(lambda: current_app.extensions["invenio-github"]) diff --git a/invenio_github/service.py b/invenio_github/service.py new file mode 100644 index 00000000..9c746ba8 --- /dev/null +++ b/invenio_github/service.py @@ -0,0 +1,49 @@ +from copy import deepcopy + +from invenio_i18n import gettext as _ +from werkzeug.utils import cached_property + +from invenio_github.errors import RemoteAccountDataNotSet +from invenio_github.models import Repository +from invenio_github.providers import get_provider_by_id +from invenio_github.proxies import current_vcs + + +class VersionControlService: + def __init__(self, provider: str, user_id: str) -> None: + self.provider = get_provider_by_id(provider).for_user(user_id) + + @cached_property + def is_authenticated(self): + return self.provider.session_token is not None + + def list_repositories(self): + """Retrieves user repositories, containing db repositories plus remote repositories.""" + vcs_repos = deepcopy(self.provider.user_available_repositories) + if vcs_repos: + # 'Enhance' our repos dict, from our database model + db_repos = Repository.query.filter( + Repository.provider_id.in_([int(k) for k in vcs_repos.keys()]) + ) + for db_repo in db_repos: + if str(db_repo.provider_id) in vcs_repos: + release_instance = current_vcs.release_api_class( + db_repo.latest_release() + ) + vcs_repos[str(db_repo.github_id)]["instance"] = db_repo + vcs_repos[str(db_repo.github_id)]["latest"] = release_instance + return vcs_repos + + def get_last_sync_time(self): + """Retrieves the last sync delta time from github's client extra data. + + Time is computed as the delta between now and the last sync time. + """ + extra_data = self.provider.remote_account.extra_data + if not extra_data.get("last_sync"): + raise RemoteAccountDataNotSet( + self.provider.user_id, + _("Last sync data is not set for user (remote data)."), + ) + + return extra_data["last_sync"] diff --git a/invenio_github/tasks.py b/invenio_github/tasks.py index 36755e7e..3c0754a9 100644 --- a/invenio_github/tasks.py +++ b/invenio_github/tasks.py @@ -35,7 +35,7 @@ from invenio_github.errors import CustomGitHubMetadataError, RepositoryAccessError from invenio_github.models import Release, ReleaseStatus -from invenio_github.proxies import current_github +from invenio_github.proxies import current_vcs def _get_err_obj(msg): @@ -125,7 +125,7 @@ def process_release(release_id): Release.status.in_([ReleaseStatus.RECEIVED, ReleaseStatus.FAILED]), ).one() - release = current_github.release_api_class(release_model) + release = current_vcs.release_api_class(release_model) matched_error_cls = None matched_ex = None @@ -134,7 +134,7 @@ def process_release(release_id): release.process_release() db.session.commit() except Exception as ex: - error_handlers = current_github.release_error_handlers + error_handlers = current_vcs.release_error_handlers matched_ex = None for error_cls, handler in error_handlers + DEFAULT_ERROR_HANDLERS: if isinstance(ex, error_cls): diff --git a/invenio_github/templates/semantic-ui/invenio_github/settings/index.html b/invenio_github/templates/semantic-ui/invenio_github/settings/index.html index e487f24c..d49e3f22 100644 --- a/invenio_github/templates/semantic-ui/invenio_github/settings/index.html +++ b/invenio_github/templates/semantic-ui/invenio_github/settings/index.html @@ -161,7 +161,7 @@

3 {{ _("Get the badge") }}

{{ _('Software preservation made simple!') }}

diff --git a/invenio_github/views/badge.py b/invenio_github/views/badge.py index ef2f9068..de79c02e 100644 --- a/invenio_github/views/badge.py +++ b/invenio_github/views/badge.py @@ -31,7 +31,7 @@ from invenio_github.api import GitHubAPI from invenio_github.errors import ReleaseNotFound from invenio_github.models import ReleaseStatus, Repository -from invenio_github.proxies import current_github +from invenio_github.proxies import current_vcs blueprint = Blueprint( "invenio_github_badge", @@ -56,7 +56,7 @@ def index(repo_github_id): if not latest_release: abort(404) - release = current_github.release_api_class(latest_release) + release = current_vcs.release_api_class(latest_release) # release.badge_title points to "DOI" # release.badge_value points to the record "pids.doi.identifier" badge_url = url_for( @@ -80,7 +80,7 @@ def index_old(user_id, repo_name): if not latest_release: abort(404) - release = current_github.release_api_class(latest_release) + release = current_vcs.release_api_class(latest_release) # release.badge_title points to "DOI" # release.badge_value points to the record "pids.doi.identifier" badge_url = url_for( @@ -105,7 +105,7 @@ def latest_doi(github_id): if not latest_release: abort(404) - release = current_github.release_api_class(latest_release) + release = current_vcs.release_api_class(latest_release) # record.url points to DOI url or HTML url if Datacite is not enabled. return redirect(release.record_url) diff --git a/invenio_github/views/github.py b/invenio_github/views/vcs.py similarity index 94% rename from invenio_github/views/github.py rename to invenio_github/views/vcs.py index e24efefe..e60c5231 100644 --- a/invenio_github/views/github.py +++ b/invenio_github/views/vcs.py @@ -30,9 +30,12 @@ from flask_login import current_user, login_required from invenio_db import db from invenio_i18n import gettext as _ +from invenio_oauthclient.proxies import current_oauthclient from sqlalchemy.orm.exc import NoResultFound from invenio_github.api import GitHubAPI +from invenio_github.providers import get_provider_by_id +from invenio_github.service import VersionControlService from ..errors import GithubTokenNotFound, RepositoryAccessError, RepositoryNotFoundError @@ -63,7 +66,7 @@ def create_ui_blueprint(app): __name__, static_folder="../static", template_folder="../templates", - url_prefix="/account/settings/github", + url_prefix="/account/settings/", ) if app.config.get("GITHUB_INTEGRATION_ENABLED", False): with app.app_context(): # Todo: Temporary fix, it should be removed when inveniosoftware/invenio-theme#355 is merged @@ -84,14 +87,14 @@ def register_ui_routes(blueprint): @blueprint.route("/") @login_required - def get_repositories(): + def get_repositories(provider): """Display list of the user's repositories.""" - github = GitHubAPI(user_id=current_user.id) + svc = VersionControlService(provider, current_user.id) ctx = dict(connected=False) - if github.session_token: + if svc.is_authenticated: # Generate the repositories view object - repos = github.get_user_repositories() - last_sync = github.get_last_sync_time() + repos = svc.list_repositories() + last_sync = svc.get_last_sync_time() ctx.update( { diff --git a/setup.cfg b/setup.cfg index d51fba95..fb8b5eeb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,7 +51,7 @@ install_requires = invenio-assets>=4.0.0,<5.0.0 invenio-accounts>=6.0.0,<7.0.0 invenio-celery>=2.0.0,<3.0.0 - invenio-db>=2.0.0,<3.0.0 + invenio-db[postgresql,mysql]>=2.0.0,<3.0.0 invenio-formatter>=3.0.0,<4.0.0 invenio-i18n>=3.0.0,<4.0.0 invenio-oauth2server>=3.0.0,<4.0.0 From 5e4b5cdd43c35e296b9e2d97e5487abf8f2512ad Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 8 Aug 2025 11:15:57 +0200 Subject: [PATCH 02/19] WIP: rename directory paths --- invenio_github/service.py | 49 -------- {invenio_github => invenio_vcs}/__init__.py | 0 ...54318294_switch_to_generic_git_services.py | 0 .../5a5428312b2b_create_github_branch.py | 0 .../b0eaee37b545_create_github_tables.py | 0 {invenio_github => invenio_vcs}/api.py | 0 .../semantic-ui/js/invenio_github/index.js | 0 {invenio_github => invenio_vcs}/config.py | 2 +- .../contrib/github.py | 13 ++ {invenio_github => invenio_vcs}/errors.py | 0 {invenio_github => invenio_vcs}/ext.py | 29 +++-- {invenio_github => invenio_vcs}/models.py | 11 +- .../oauth/handlers.py | 0 .../oauth/remote_app.py | 0 {invenio_github => invenio_vcs}/providers.py | 14 ++- {invenio_github => invenio_vcs}/proxies.py | 0 {invenio_github => invenio_vcs}/receivers.py | 0 invenio_vcs/service.py | 119 ++++++++++++++++++ {invenio_github => invenio_vcs}/tasks.py | 0 .../semantic-ui/invenio_github/base.html | 0 .../semantic-ui/invenio_github/helpers.html | 0 .../invenio_github/settings/base.html | 0 .../invenio_github/settings/helpers.html | 0 .../invenio_github/settings/index.html | 0 .../invenio_github/settings/index_item.html | 0 .../invenio_github/settings/view.html | 0 .../translations/cs/LC_MESSAGES/messages.po | 0 .../translations/da/LC_MESSAGES/messages.po | 0 .../translations/de/LC_MESSAGES/messages.po | 0 .../translations/en/LC_MESSAGES/messages.po | 0 .../translations/es/LC_MESSAGES/messages.po | 0 .../translations/fr/LC_MESSAGES/messages.po | 0 .../translations/it/LC_MESSAGES/messages.po | 0 .../translations/messages.pot | 0 {invenio_github => invenio_vcs}/utils.py | 0 .../views/__init__.py | 0 .../views/badge.py | 0 {invenio_github => invenio_vcs}/views/vcs.py | 30 ++--- {invenio_github => invenio_vcs}/webpack.py | 0 setup.cfg | 48 +++---- 40 files changed, 203 insertions(+), 112 deletions(-) delete mode 100644 invenio_github/service.py rename {invenio_github => invenio_vcs}/__init__.py (100%) rename {invenio_github => invenio_vcs}/alembic/1754318294_switch_to_generic_git_services.py (100%) rename {invenio_github => invenio_vcs}/alembic/5a5428312b2b_create_github_branch.py (100%) rename {invenio_github => invenio_vcs}/alembic/b0eaee37b545_create_github_tables.py (100%) rename {invenio_github => invenio_vcs}/api.py (100%) rename {invenio_github => invenio_vcs}/assets/semantic-ui/js/invenio_github/index.js (100%) rename {invenio_github => invenio_vcs}/config.py (98%) rename {invenio_github => invenio_vcs}/contrib/github.py (92%) rename {invenio_github => invenio_vcs}/errors.py (100%) rename {invenio_github => invenio_vcs}/ext.py (78%) rename {invenio_github => invenio_vcs}/models.py (96%) rename {invenio_github => invenio_vcs}/oauth/handlers.py (100%) rename {invenio_github => invenio_vcs}/oauth/remote_app.py (100%) rename {invenio_github => invenio_vcs}/providers.py (94%) rename {invenio_github => invenio_vcs}/proxies.py (100%) rename {invenio_github => invenio_vcs}/receivers.py (100%) create mode 100644 invenio_vcs/service.py rename {invenio_github => invenio_vcs}/tasks.py (100%) rename {invenio_github => invenio_vcs}/templates/semantic-ui/invenio_github/base.html (100%) rename {invenio_github => invenio_vcs}/templates/semantic-ui/invenio_github/helpers.html (100%) rename {invenio_github => invenio_vcs}/templates/semantic-ui/invenio_github/settings/base.html (100%) rename {invenio_github => invenio_vcs}/templates/semantic-ui/invenio_github/settings/helpers.html (100%) rename {invenio_github => invenio_vcs}/templates/semantic-ui/invenio_github/settings/index.html (100%) rename {invenio_github => invenio_vcs}/templates/semantic-ui/invenio_github/settings/index_item.html (100%) rename {invenio_github => invenio_vcs}/templates/semantic-ui/invenio_github/settings/view.html (100%) rename {invenio_github => invenio_vcs}/translations/cs/LC_MESSAGES/messages.po (100%) rename {invenio_github => invenio_vcs}/translations/da/LC_MESSAGES/messages.po (100%) rename {invenio_github => invenio_vcs}/translations/de/LC_MESSAGES/messages.po (100%) rename {invenio_github => invenio_vcs}/translations/en/LC_MESSAGES/messages.po (100%) rename {invenio_github => invenio_vcs}/translations/es/LC_MESSAGES/messages.po (100%) rename {invenio_github => invenio_vcs}/translations/fr/LC_MESSAGES/messages.po (100%) rename {invenio_github => invenio_vcs}/translations/it/LC_MESSAGES/messages.po (100%) rename {invenio_github => invenio_vcs}/translations/messages.pot (100%) rename {invenio_github => invenio_vcs}/utils.py (100%) rename {invenio_github => invenio_vcs}/views/__init__.py (100%) rename {invenio_github => invenio_vcs}/views/badge.py (100%) rename {invenio_github => invenio_vcs}/views/vcs.py (90%) rename {invenio_github => invenio_vcs}/webpack.py (100%) diff --git a/invenio_github/service.py b/invenio_github/service.py deleted file mode 100644 index 9c746ba8..00000000 --- a/invenio_github/service.py +++ /dev/null @@ -1,49 +0,0 @@ -from copy import deepcopy - -from invenio_i18n import gettext as _ -from werkzeug.utils import cached_property - -from invenio_github.errors import RemoteAccountDataNotSet -from invenio_github.models import Repository -from invenio_github.providers import get_provider_by_id -from invenio_github.proxies import current_vcs - - -class VersionControlService: - def __init__(self, provider: str, user_id: str) -> None: - self.provider = get_provider_by_id(provider).for_user(user_id) - - @cached_property - def is_authenticated(self): - return self.provider.session_token is not None - - def list_repositories(self): - """Retrieves user repositories, containing db repositories plus remote repositories.""" - vcs_repos = deepcopy(self.provider.user_available_repositories) - if vcs_repos: - # 'Enhance' our repos dict, from our database model - db_repos = Repository.query.filter( - Repository.provider_id.in_([int(k) for k in vcs_repos.keys()]) - ) - for db_repo in db_repos: - if str(db_repo.provider_id) in vcs_repos: - release_instance = current_vcs.release_api_class( - db_repo.latest_release() - ) - vcs_repos[str(db_repo.github_id)]["instance"] = db_repo - vcs_repos[str(db_repo.github_id)]["latest"] = release_instance - return vcs_repos - - def get_last_sync_time(self): - """Retrieves the last sync delta time from github's client extra data. - - Time is computed as the delta between now and the last sync time. - """ - extra_data = self.provider.remote_account.extra_data - if not extra_data.get("last_sync"): - raise RemoteAccountDataNotSet( - self.provider.user_id, - _("Last sync data is not set for user (remote data)."), - ) - - return extra_data["last_sync"] diff --git a/invenio_github/__init__.py b/invenio_vcs/__init__.py similarity index 100% rename from invenio_github/__init__.py rename to invenio_vcs/__init__.py diff --git a/invenio_github/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py similarity index 100% rename from invenio_github/alembic/1754318294_switch_to_generic_git_services.py rename to invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py diff --git a/invenio_github/alembic/5a5428312b2b_create_github_branch.py b/invenio_vcs/alembic/5a5428312b2b_create_github_branch.py similarity index 100% rename from invenio_github/alembic/5a5428312b2b_create_github_branch.py rename to invenio_vcs/alembic/5a5428312b2b_create_github_branch.py diff --git a/invenio_github/alembic/b0eaee37b545_create_github_tables.py b/invenio_vcs/alembic/b0eaee37b545_create_github_tables.py similarity index 100% rename from invenio_github/alembic/b0eaee37b545_create_github_tables.py rename to invenio_vcs/alembic/b0eaee37b545_create_github_tables.py diff --git a/invenio_github/api.py b/invenio_vcs/api.py similarity index 100% rename from invenio_github/api.py rename to invenio_vcs/api.py diff --git a/invenio_github/assets/semantic-ui/js/invenio_github/index.js b/invenio_vcs/assets/semantic-ui/js/invenio_github/index.js similarity index 100% rename from invenio_github/assets/semantic-ui/js/invenio_github/index.js rename to invenio_vcs/assets/semantic-ui/js/invenio_github/index.js diff --git a/invenio_github/config.py b/invenio_vcs/config.py similarity index 98% rename from invenio_github/config.py rename to invenio_vcs/config.py index 2832cbb4..a5b0b726 100644 --- a/invenio_github/config.py +++ b/invenio_vcs/config.py @@ -78,7 +78,7 @@ GITHUB_MAX_CONTRIBUTORS_NUMBER = 30 """Max number of contributors of a release to be retrieved from Github.""" -GITHUB_INTEGRATION_ENABLED = False +VCS_INTEGRATION_ENABLED = False """Enables the github integration.""" GITHUB_CITATION_FILE = None diff --git a/invenio_github/contrib/github.py b/invenio_vcs/contrib/github.py similarity index 92% rename from invenio_github/contrib/github.py rename to invenio_vcs/contrib/github.py index 6ec3fa84..7a7ed317 100644 --- a/invenio_github/contrib/github.py +++ b/invenio_vcs/contrib/github.py @@ -113,6 +113,19 @@ def list_repository_webhooks(self, repository_id): ) return hooks + def get_repository(self, repository_id): + assert repository_id.isdigit() + if self._gh is None: + return None + + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return None + + return GenericRepository( + str(repo.id), repo.full_name, repo.description, repo.default_branch + ) + def get_repo_latest_release(self, repository_id): assert repository_id.isdigit() if self._gh is None: diff --git a/invenio_github/errors.py b/invenio_vcs/errors.py similarity index 100% rename from invenio_github/errors.py rename to invenio_vcs/errors.py diff --git a/invenio_github/ext.py b/invenio_vcs/ext.py similarity index 78% rename from invenio_github/ext.py rename to invenio_vcs/ext.py index a7e190a1..62e6958c 100644 --- a/invenio_github/ext.py +++ b/invenio_vcs/ext.py @@ -34,6 +34,7 @@ from werkzeug.utils import cached_property, import_string from invenio_github.api import GitHubRelease +from invenio_github.providers import get_provider_list from invenio_github.utils import obj_or_import_string from . import config @@ -68,7 +69,7 @@ def release_error_handlers(self): def init_app(self, app): """Flask application initialization.""" self.init_config(app) - app.extensions["invenio-github"] = self + app.extensions["invenio-vcs"] = self def init_config(self, app): """Initialize configuration.""" @@ -89,16 +90,18 @@ def finalize_app(app): def init_menu(app): """Init menu.""" - if app.config.get("GITHUB_INTEGRATION_ENABLED", False): - current_menu.submenu("settings.github").register( - endpoint="invenio_github.get_repositories", - endpoint_arguments_constructor=lambda: {"provider": "github"}, - text=_( - "%(icon)s GitHub", - icon=LazyString( - lambda: f'' + if app.config.get("VCS_INTEGRATION_ENABLED", False): + for provider in get_provider_list(): + current_menu.submenu(f"settings.{provider.id}").register( + endpoint="invenio_vcs.get_repositories", + endpoint_arguments_constructor=lambda: {"provider": provider.id}, + text=_( + "%(icon)s $(provider)", + icon=LazyString( + lambda: f'' + ), + provider=LazyString(lambda: provider.name), ), - ), - order=10, - active_when=lambda: request.endpoint.startswith("invenio_github."), - ) + order=10, + active_when=lambda: request.endpoint.startswith("invenio_vcs."), + ) diff --git a/invenio_github/models.py b/invenio_vcs/models.py similarity index 96% rename from invenio_github/models.py rename to invenio_vcs/models.py index 3259006e..c564b2ba 100644 --- a/invenio_github/models.py +++ b/invenio_vcs/models.py @@ -35,6 +35,8 @@ from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType +from invenio_github.providers import GenericRelease + RELEASE_STATUS_TITLES = { "RECEIVED": _("Received"), "PROCESSING": _("Processing"), @@ -166,7 +168,7 @@ def create(cls, user_id, github_id=None, name=None, **kwargs): return obj @classmethod - def get(cls, github_id=None, name=None): + def get(cls, provider_id=None, name=None): """Return a repository given its name or github id. :param integer github_id: GitHub repository identifier. @@ -179,8 +181,8 @@ def get(cls, github_id=None, name=None): exist. """ repo = None - if github_id: - repo = cls.query.filter(Repository.github_id == github_id).one_or_none() + if provider_id: + repo = cls.query.filter(Repository.provider_id == provider_id).one_or_none() if not repo and name is not None: repo = cls.query.filter(Repository.name == name).one_or_none() @@ -261,3 +263,6 @@ class Release(db.Model, Timestamp): def __repr__(self): """Get release representation.""" return f"" + + def to_generic(self): + return GenericRelease(self.id, "", self.tag, "", "", self.created) diff --git a/invenio_github/oauth/handlers.py b/invenio_vcs/oauth/handlers.py similarity index 100% rename from invenio_github/oauth/handlers.py rename to invenio_vcs/oauth/handlers.py diff --git a/invenio_github/oauth/remote_app.py b/invenio_vcs/oauth/remote_app.py similarity index 100% rename from invenio_github/oauth/remote_app.py rename to invenio_vcs/oauth/remote_app.py diff --git a/invenio_github/providers.py b/invenio_vcs/providers.py similarity index 94% rename from invenio_github/providers.py rename to invenio_vcs/providers.py index 2a1cfb87..8eb7e95a 100644 --- a/invenio_github/providers.py +++ b/invenio_vcs/providers.py @@ -117,9 +117,7 @@ def access_token(self): @property def session_token(self): """Return OAuth session token.""" - session_token = None - if self.user_id is not None: - session_token = token_getter(self.factory.remote) + session_token = token_getter(self.factory.remote) if session_token: token = RemoteToken.get( self.user_id, @@ -169,6 +167,10 @@ def list_repositories(self): def list_repository_webhooks(self, repository_id): pass + @abstractmethod + def get_repository(self, repository_id): + pass + @abstractmethod def get_repo_latest_release(self, repository_id): pass @@ -182,8 +184,12 @@ def delete_webhook(self, repository_id, webhook_id): pass +def get_provider_list() -> list[RepositoryServiceProviderFactory]: + return current_app.config["VCS_PROVIDERS"] + + def get_provider_by_id(id: str) -> RepositoryServiceProviderFactory: - providers = current_app.config["VCS_PROVIDERS"] + providers = get_provider_list() for provider in providers: if id == provider.id: return provider diff --git a/invenio_github/proxies.py b/invenio_vcs/proxies.py similarity index 100% rename from invenio_github/proxies.py rename to invenio_vcs/proxies.py diff --git a/invenio_github/receivers.py b/invenio_vcs/receivers.py similarity index 100% rename from invenio_github/receivers.py rename to invenio_vcs/receivers.py diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py new file mode 100644 index 00000000..67d10462 --- /dev/null +++ b/invenio_vcs/service.py @@ -0,0 +1,119 @@ +from copy import deepcopy +from dataclasses import asdict + +from invenio_db import db +from invenio_i18n import gettext as _ +from werkzeug.utils import cached_property + +from invenio_github.errors import ( + RemoteAccountDataNotSet, + RepositoryAccessError, + RepositoryNotFoundError, +) +from invenio_github.models import Release, ReleaseStatus, Repository +from invenio_github.providers import GenericRelease, get_provider_by_id + + +class VersionControlService: + def __init__(self, provider: str, user_id: str) -> None: + self.provider = get_provider_by_id(provider).for_user(user_id) + + @cached_property + def is_authenticated(self): + return self.provider.session_token is not None + + def list_repositories(self): + """Retrieves user repositories, containing db repositories plus remote repositories.""" + vcs_repos = deepcopy(self.provider.user_available_repositories) + if vcs_repos: + # 'Enhance' our repos dict, from our database model + db_repos = Repository.query.filter( + Repository.provider_id.in_([int(k) for k in vcs_repos.keys()]) + ) + for db_repo in db_repos: + if str(db_repo.provider_id) in vcs_repos: + release_instance = self.provider.get_repo_latest_release( + db_repo.provider_id + ) + + vcs_repos[str(db_repo.github_id)]["instance"] = db_repo + vcs_repos[str(db_repo.github_id)]["latest"] = release_instance + + return vcs_repos + + def get_repo_latest_release(self, repo): + """Retrieves the repository last release.""" + # Bail out fast if object (Repository) not in DB session. + if repo not in db.session: + return None + + q = repo.releases.filter_by(status=ReleaseStatus.PUBLISHED) + release_object = q.order_by(db.desc(Release.created)).first() + + return release_object.to_generic() + + def list_repo_releases(self, repo): + # Retrieve releases and sort them by creation date + release_instances = [] + for release_object in repo.releases.order_by(Release.created): + release_instances.append(release_object.to_generic()) + return release_instances + + def get_repo_default_branch(self, repo_id): + return ( + self.provider.remote_account.extra_data.get("repos", {}) + .get(repo_id, None) + .get("default_branch", None) + ) + + def get_last_sync_time(self): + """Retrieves the last sync delta time from github's client extra data. + + Time is computed as the delta between now and the last sync time. + """ + extra_data = self.provider.remote_account.extra_data + if not extra_data.get("last_sync"): + raise RemoteAccountDataNotSet( + self.provider.user_id, + _("Last sync data is not set for user (remote data)."), + ) + + return extra_data["last_sync"] + + def get_repository(self, repo_id): + """Retrieves one repository. + + Checks for access permission. + """ + repo = Repository.get(provider_id=repo_id) + if not repo: + raise RepositoryNotFoundError(repo_id) + + # Might raise a RepositoryAccessError + self.check_repo_access_permissions(repo) + + return repo + + def check_repo_access_permissions(self, repo): + """Checks permissions from user on repo. + + Repo has access if any of the following is True: + + - user is the owner of the repo + - user has access to the repo in GitHub (stored in RemoteAccount.extra_data.repos) + """ + if self.provider.user_id and repo and repo.user_id: + user_is_owner = repo.user_id == int(self.provider.user_id) + if user_is_owner: + return True + + if self.provider.remote_account and self.provider.remote_account.extra_data: + user_has_remote_access = self.provider.user_available_repositories.get( + str(repo.github_id) + ) + if user_has_remote_access: + return True + + raise RepositoryAccessError( + user=self.provider.user_id, repo=repo.name, repo_id=repo.provider_id + ) diff --git a/invenio_github/tasks.py b/invenio_vcs/tasks.py similarity index 100% rename from invenio_github/tasks.py rename to invenio_vcs/tasks.py diff --git a/invenio_github/templates/semantic-ui/invenio_github/base.html b/invenio_vcs/templates/semantic-ui/invenio_github/base.html similarity index 100% rename from invenio_github/templates/semantic-ui/invenio_github/base.html rename to invenio_vcs/templates/semantic-ui/invenio_github/base.html diff --git a/invenio_github/templates/semantic-ui/invenio_github/helpers.html b/invenio_vcs/templates/semantic-ui/invenio_github/helpers.html similarity index 100% rename from invenio_github/templates/semantic-ui/invenio_github/helpers.html rename to invenio_vcs/templates/semantic-ui/invenio_github/helpers.html diff --git a/invenio_github/templates/semantic-ui/invenio_github/settings/base.html b/invenio_vcs/templates/semantic-ui/invenio_github/settings/base.html similarity index 100% rename from invenio_github/templates/semantic-ui/invenio_github/settings/base.html rename to invenio_vcs/templates/semantic-ui/invenio_github/settings/base.html diff --git a/invenio_github/templates/semantic-ui/invenio_github/settings/helpers.html b/invenio_vcs/templates/semantic-ui/invenio_github/settings/helpers.html similarity index 100% rename from invenio_github/templates/semantic-ui/invenio_github/settings/helpers.html rename to invenio_vcs/templates/semantic-ui/invenio_github/settings/helpers.html diff --git a/invenio_github/templates/semantic-ui/invenio_github/settings/index.html b/invenio_vcs/templates/semantic-ui/invenio_github/settings/index.html similarity index 100% rename from invenio_github/templates/semantic-ui/invenio_github/settings/index.html rename to invenio_vcs/templates/semantic-ui/invenio_github/settings/index.html diff --git a/invenio_github/templates/semantic-ui/invenio_github/settings/index_item.html b/invenio_vcs/templates/semantic-ui/invenio_github/settings/index_item.html similarity index 100% rename from invenio_github/templates/semantic-ui/invenio_github/settings/index_item.html rename to invenio_vcs/templates/semantic-ui/invenio_github/settings/index_item.html diff --git a/invenio_github/templates/semantic-ui/invenio_github/settings/view.html b/invenio_vcs/templates/semantic-ui/invenio_github/settings/view.html similarity index 100% rename from invenio_github/templates/semantic-ui/invenio_github/settings/view.html rename to invenio_vcs/templates/semantic-ui/invenio_github/settings/view.html diff --git a/invenio_github/translations/cs/LC_MESSAGES/messages.po b/invenio_vcs/translations/cs/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/cs/LC_MESSAGES/messages.po rename to invenio_vcs/translations/cs/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/da/LC_MESSAGES/messages.po b/invenio_vcs/translations/da/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/da/LC_MESSAGES/messages.po rename to invenio_vcs/translations/da/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/de/LC_MESSAGES/messages.po b/invenio_vcs/translations/de/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/de/LC_MESSAGES/messages.po rename to invenio_vcs/translations/de/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/en/LC_MESSAGES/messages.po b/invenio_vcs/translations/en/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/en/LC_MESSAGES/messages.po rename to invenio_vcs/translations/en/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/es/LC_MESSAGES/messages.po b/invenio_vcs/translations/es/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/es/LC_MESSAGES/messages.po rename to invenio_vcs/translations/es/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/fr/LC_MESSAGES/messages.po b/invenio_vcs/translations/fr/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/fr/LC_MESSAGES/messages.po rename to invenio_vcs/translations/fr/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/it/LC_MESSAGES/messages.po b/invenio_vcs/translations/it/LC_MESSAGES/messages.po similarity index 100% rename from invenio_github/translations/it/LC_MESSAGES/messages.po rename to invenio_vcs/translations/it/LC_MESSAGES/messages.po diff --git a/invenio_github/translations/messages.pot b/invenio_vcs/translations/messages.pot similarity index 100% rename from invenio_github/translations/messages.pot rename to invenio_vcs/translations/messages.pot diff --git a/invenio_github/utils.py b/invenio_vcs/utils.py similarity index 100% rename from invenio_github/utils.py rename to invenio_vcs/utils.py diff --git a/invenio_github/views/__init__.py b/invenio_vcs/views/__init__.py similarity index 100% rename from invenio_github/views/__init__.py rename to invenio_vcs/views/__init__.py diff --git a/invenio_github/views/badge.py b/invenio_vcs/views/badge.py similarity index 100% rename from invenio_github/views/badge.py rename to invenio_vcs/views/badge.py diff --git a/invenio_github/views/vcs.py b/invenio_vcs/views/vcs.py similarity index 90% rename from invenio_github/views/vcs.py rename to invenio_vcs/views/vcs.py index e60c5231..7bfe4a23 100644 --- a/invenio_github/views/vcs.py +++ b/invenio_vcs/views/vcs.py @@ -30,7 +30,6 @@ from flask_login import current_user, login_required from invenio_db import db from invenio_i18n import gettext as _ -from invenio_oauthclient.proxies import current_oauthclient from sqlalchemy.orm.exc import NoResultFound from invenio_github.api import GitHubAPI @@ -46,12 +45,12 @@ def request_session_token(): def decorator(f): @wraps(f) def inner(*args, **kwargs): - github = GitHubAPI(user_id=current_user.id) - token = github.session_token - if token: + provider = kwargs["provider"] + svc = VersionControlService(provider, current_user.id) + if svc.is_authenticated: return f(*args, **kwargs) raise GithubTokenNotFound( - current_user, _("Github session token is requested") + current_user, _("VCS provider session token is required") ) return inner @@ -99,33 +98,28 @@ def get_repositories(provider): ctx.update( { "connected": True, - "repos": sorted(repos.items(), key=lambda x: x[1]["full_name"]), + "repos": repos, "last_sync": last_sync, } ) return render_template(current_app.config["GITHUB_TEMPLATE_INDEX"], **ctx) - @blueprint.route("/repository/") + @blueprint.route("/repository/") @login_required @request_session_token() - def get_repository(repo_name): + def get_repository(provider, repo_id): """Displays one repository. Retrieves and builds context to display all repository releases, if any. """ - user_id = current_user.id - github = GitHubAPI(user_id=user_id) + svc = VersionControlService(provider, current_user.id) try: - repo = github.get_repository(repo_name) - latest_release = github.repo_last_published_release(repo) - default_branch = ( - github.account.extra_data.get("repos", {}) - .get(str(repo.github_id), None) - .get("default_branch", None) - ) - releases = github.get_repository_releases(repo=repo) + repo = svc.get_repository(repo_id) + latest_release = svc.get_repo_latest_release(repo) + default_branch = svc.get_repo_default_branch(repo_id) + releases = svc.list_repo_releases(repo) return render_template( current_app.config["GITHUB_TEMPLATE_VIEW"], latest_release=latest_release, diff --git a/invenio_github/webpack.py b/invenio_vcs/webpack.py similarity index 100% rename from invenio_github/webpack.py rename to invenio_vcs/webpack.py diff --git a/setup.cfg b/setup.cfg index fb8b5eeb..5800e175 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,16 +24,16 @@ # as an Intergovernmental Organization or submit itself to any jurisdiction. [metadata] -name = invenio-github -version = attr: invenio_github.__version__ +name = invenio-vcs +version = attr: invenio_vcs.__version__ description = "Invenio module that adds GitHub integration to the platform." long_description = file: README.rst, CHANGES.rst -keywords = invenio github +keywords = invenio github gitlab vcs license = MIT author = CERN author_email = info@inveniosoftware.org platforms = any -url = https://github.com/inveniosoftware/invenio-github +url = https://github.com/inveniosoftware/invenio-vcs classifiers = Development Status :: 5 - Production/Stable @@ -85,28 +85,28 @@ opensearch2 = [options.entry_points] invenio_base.apps = - invenio_github = invenio_github:InvenioGitHub + invenio_vcs = invenio_vcs:InvenioGitHub invenio_base.api_apps = - invenio_github = invenio_github:InvenioGitHub + invenio_vcs = invenio_vcs:InvenioGitHub invenio_base.blueprints = - invenio_github_badge = invenio_github.views.badge:blueprint - invenio_github_github = invenio_github.views.github:create_ui_blueprint + invenio_vcs = invenio_vcs.views.badge:blueprint + invenio_vcs_vcs = invenio_vcs.views.vcs:create_ui_blueprint invenio_base.api_blueprints = - invenio_github = invenio_github.views.github:create_api_blueprint + invenio_vcs = invenio_vcs.views.vcs:create_api_blueprint invenio_base.finalize_app = - invenio_github = invenio_github.ext:finalize_app + invenio_vcs = invenio_vcs.ext:finalize_app invenio_celery.tasks = - invenio_github = invenio_github.tasks + invenio_vcs = invenio_vcs.tasks invenio_db.alembic = - invenio_github = invenio_github:alembic + invenio_vcs = invenio_vcs:alembic invenio_db.models = - invenio_github = invenio_github.models + invenio_vcs = invenio_vcs.models invenio_i18n.translations = - messages = invenio_github + messages = invenio_vcs invenio_webhooks.receivers = - github = invenio_github.receivers:GitHubReceiver + vcs = invenio_vcs.receivers:GitHubReceiver invenio_assets.webpack = - invenio_github = invenio_github.webpack:theme + invenio_vcs = invenio_vcs.webpack:theme [build_sphinx] source-dir = docs/ @@ -117,23 +117,23 @@ all_files = 1 universal = 1 [compile_catalog] -directory = invenio_github/translations/ +directory = invenio_vcs/translations/ use_fuzzy = True [extract_messages] copyright_holder = CERN msgid_bugs_address = info@inveniosoftware.org mapping_file = babel.ini -output_file = invenio_github/translations/messages.pot +output_file = invenio_vcs/translations/messages.pot add_comments = NOTE [init_catalog] -input_file = invenio_github/translations/messages.pot -output_dir = invenio_github/translations/ +input_file = invenio_vcs/translations/messages.pot +output_dir = invenio_vcs/translations/ [update_catalog] -input_file = invenio_github/translations/messages.pot -output_dir = invenio_github/translations/ +input_file = invenio_vcs/translations/messages.pot +output_dir = invenio_vcs/translations/ [pydocstyle] add_ignore = D401,D403 @@ -146,5 +146,5 @@ ignore = *-requirements.txt [tool:pytest] -addopts = --black --isort --pydocstyle --ignore=docs --cov=invenio_github --cov-report=term-missing -testpaths = tests invenio_github +addopts = --black --isort --pydocstyle --ignore=docs --cov=invenio_vcs --cov-report=term-missing +testpaths = tests invenio_vcs From ed09eafdee1b68f193ff2b09112ec0cfe849cd6e Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 8 Aug 2025 11:37:46 +0200 Subject: [PATCH 03/19] WIP: rename import references --- docs/conf.py | 2 +- invenio_vcs/api.py | 8 ++++---- invenio_vcs/config.py | 2 +- invenio_vcs/contrib/github.py | 4 ++-- invenio_vcs/ext.py | 6 +++--- invenio_vcs/models.py | 2 +- invenio_vcs/oauth/handlers.py | 4 ++-- invenio_vcs/oauth/remote_app.py | 2 +- invenio_vcs/providers.py | 2 +- invenio_vcs/receivers.py | 4 ++-- invenio_vcs/service.py | 6 +++--- invenio_vcs/tasks.py | 6 +++--- invenio_vcs/views/badge.py | 8 ++++---- invenio_vcs/views/vcs.py | 6 +++--- tests/fixtures.py | 4 ++-- tests/test_api.py | 4 ++-- tests/test_invenio_github.py | 4 ++-- tests/test_models.py | 2 +- tests/test_tasks.py | 8 ++++---- tests/test_webhook.py | 4 ++-- 20 files changed, 44 insertions(+), 44 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 75c64ae5..3e73247e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,7 +25,7 @@ """Sphinx configuration.""" -from invenio_github import __version__ +from invenio_vcs import __version__ # -- General configuration ------------------------------------------------ diff --git a/invenio_vcs/api.py b/invenio_vcs/api.py index b0845605..59b1aee4 100644 --- a/invenio_vcs/api.py +++ b/invenio_vcs/api.py @@ -46,10 +46,10 @@ from werkzeug.local import LocalProxy from werkzeug.utils import cached_property -from invenio_github.models import Release, ReleaseStatus, Repository -from invenio_github.proxies import current_vcs -from invenio_github.tasks import sync_hooks as sync_hooks_task -from invenio_github.utils import iso_utcnow, parse_timestamp, utcnow +from invenio_vcs.models import Release, ReleaseStatus, Repository +from invenio_vcs.proxies import current_vcs +from invenio_vcs.tasks import sync_hooks as sync_hooks_task +from invenio_vcs.utils import iso_utcnow, parse_timestamp, utcnow from .errors import ( ReleaseZipballFetchError, diff --git a/invenio_vcs/config.py b/invenio_vcs/config.py index a5b0b726..74234a0e 100644 --- a/invenio_vcs/config.py +++ b/invenio_vcs/config.py @@ -24,7 +24,7 @@ from datetime import timedelta -from invenio_github.contrib.github import GitHubProvider +from invenio_vcs.contrib.github import GitHubProvider VCS_PROVIDERS = [] diff --git a/invenio_vcs/contrib/github.py b/invenio_vcs/contrib/github.py index 7a7ed317..32fd52b2 100644 --- a/invenio_vcs/contrib/github.py +++ b/invenio_vcs/contrib/github.py @@ -5,8 +5,8 @@ from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper from werkzeug.utils import cached_property -from invenio_github.oauth.handlers import account_setup_handler, disconnect_handler -from invenio_github.providers import ( +from invenio_vcs.oauth.handlers import account_setup_handler, disconnect_handler +from invenio_vcs.providers import ( GenericRelease, GenericRepository, GenericWebhook, diff --git a/invenio_vcs/ext.py b/invenio_vcs/ext.py index 62e6958c..a75a9e14 100644 --- a/invenio_vcs/ext.py +++ b/invenio_vcs/ext.py @@ -33,9 +33,9 @@ from six import string_types from werkzeug.utils import cached_property, import_string -from invenio_github.api import GitHubRelease -from invenio_github.providers import get_provider_list -from invenio_github.utils import obj_or_import_string +from invenio_vcs.api import GitHubRelease +from invenio_vcs.providers import get_provider_list +from invenio_vcs.utils import obj_or_import_string from . import config diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index c564b2ba..d4288d85 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -35,7 +35,7 @@ from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType -from invenio_github.providers import GenericRelease +from invenio_vcs.providers import GenericRelease RELEASE_STATUS_TITLES = { "RECEIVED": _("Received"), diff --git a/invenio_vcs/oauth/handlers.py b/invenio_vcs/oauth/handlers.py index ad2cc15a..483b865a 100644 --- a/invenio_vcs/oauth/handlers.py +++ b/invenio_vcs/oauth/handlers.py @@ -28,8 +28,8 @@ from invenio_oauth2server.models import Token as ProviderToken from invenio_oauthclient import oauth_unlink_external_id -from invenio_github.api import GitHubAPI -from invenio_github.tasks import disconnect_github +from invenio_vcs.api import GitHubAPI +from invenio_vcs.tasks import disconnect_github def account_setup_handler(remote, token, resp): diff --git a/invenio_vcs/oauth/remote_app.py b/invenio_vcs/oauth/remote_app.py index 89525b90..d30347b0 100644 --- a/invenio_vcs/oauth/remote_app.py +++ b/invenio_vcs/oauth/remote_app.py @@ -9,7 +9,7 @@ from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper -from invenio_github.oauth.handlers import account_setup_handler, disconnect_handler +from invenio_vcs.oauth.handlers import account_setup_handler, disconnect_handler request_token_params = {"scope": "read:user,user:email,admin:repo_hook,read:org"} diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py index 8eb7e95a..74052c7a 100644 --- a/invenio_vcs/providers.py +++ b/invenio_vcs/providers.py @@ -12,7 +12,7 @@ from werkzeug.local import LocalProxy from werkzeug.utils import cached_property -from invenio_github.errors import RemoteAccountDataNotSet +from invenio_vcs.errors import RemoteAccountDataNotSet @dataclass diff --git a/invenio_vcs/receivers.py b/invenio_vcs/receivers.py index 22fc1c9b..4f0ab426 100644 --- a/invenio_vcs/receivers.py +++ b/invenio_vcs/receivers.py @@ -25,8 +25,8 @@ from invenio_db import db from invenio_webhooks.models import Receiver -from invenio_github.models import Release, ReleaseStatus, Repository -from invenio_github.tasks import process_release +from invenio_vcs.models import Release, ReleaseStatus, Repository +from invenio_vcs.tasks import process_release from .errors import ( InvalidSenderError, diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index 67d10462..c539d53e 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -5,13 +5,13 @@ from invenio_i18n import gettext as _ from werkzeug.utils import cached_property -from invenio_github.errors import ( +from invenio_vcs.errors import ( RemoteAccountDataNotSet, RepositoryAccessError, RepositoryNotFoundError, ) -from invenio_github.models import Release, ReleaseStatus, Repository -from invenio_github.providers import GenericRelease, get_provider_by_id +from invenio_vcs.models import Release, ReleaseStatus, Repository +from invenio_vcs.providers import GenericRelease, get_provider_by_id class VersionControlService: diff --git a/invenio_vcs/tasks.py b/invenio_vcs/tasks.py index 3c0754a9..4954263f 100644 --- a/invenio_vcs/tasks.py +++ b/invenio_vcs/tasks.py @@ -33,9 +33,9 @@ from invenio_oauthclient.models import RemoteAccount from invenio_oauthclient.proxies import current_oauthclient -from invenio_github.errors import CustomGitHubMetadataError, RepositoryAccessError -from invenio_github.models import Release, ReleaseStatus -from invenio_github.proxies import current_vcs +from invenio_vcs.errors import CustomGitHubMetadataError, RepositoryAccessError +from invenio_vcs.models import Release, ReleaseStatus +from invenio_vcs.proxies import current_vcs def _get_err_obj(msg): diff --git a/invenio_vcs/views/badge.py b/invenio_vcs/views/badge.py index de79c02e..700ec524 100644 --- a/invenio_vcs/views/badge.py +++ b/invenio_vcs/views/badge.py @@ -28,10 +28,10 @@ from flask import Blueprint, abort, current_app, redirect, url_for from flask_login import current_user -from invenio_github.api import GitHubAPI -from invenio_github.errors import ReleaseNotFound -from invenio_github.models import ReleaseStatus, Repository -from invenio_github.proxies import current_vcs +from invenio_vcs.api import GitHubAPI +from invenio_vcs.errors import ReleaseNotFound +from invenio_vcs.models import ReleaseStatus, Repository +from invenio_vcs.proxies import current_vcs blueprint = Blueprint( "invenio_github_badge", diff --git a/invenio_vcs/views/vcs.py b/invenio_vcs/views/vcs.py index 7bfe4a23..4d079f3d 100644 --- a/invenio_vcs/views/vcs.py +++ b/invenio_vcs/views/vcs.py @@ -32,9 +32,9 @@ from invenio_i18n import gettext as _ from sqlalchemy.orm.exc import NoResultFound -from invenio_github.api import GitHubAPI -from invenio_github.providers import get_provider_by_id -from invenio_github.service import VersionControlService +from invenio_vcs.api import GitHubAPI +from invenio_vcs.providers import get_provider_by_id +from invenio_vcs.service import VersionControlService from ..errors import GithubTokenNotFound, RepositoryAccessError, RepositoryNotFoundError diff --git a/tests/fixtures.py b/tests/fixtures.py index ce4020a7..ad807e9b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -27,8 +27,8 @@ from six import BytesIO -from invenio_github.api import GitHubRelease -from invenio_github.models import ReleaseStatus +from invenio_vcs.api import GitHubRelease +from invenio_vcs.models import ReleaseStatus class TestGithubRelease(GitHubRelease): diff --git a/tests/test_api.py b/tests/test_api.py index 44389d3f..4223bfc4 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -11,8 +11,8 @@ import pytest from invenio_webhooks.models import Event -from invenio_github.api import GitHubAPI, GitHubRelease -from invenio_github.models import Release, ReleaseStatus +from invenio_vcs.api import GitHubAPI, GitHubRelease +from invenio_vcs.models import Release, ReleaseStatus from .fixtures import PAYLOAD as github_payload_fixture diff --git a/tests/test_invenio_github.py b/tests/test_invenio_github.py index ceed134a..ca653287 100644 --- a/tests/test_invenio_github.py +++ b/tests/test_invenio_github.py @@ -27,12 +27,12 @@ from flask import Flask -from invenio_github import InvenioGitHub +from invenio_vcs import InvenioGitHub def test_version(): """Test version import.""" - from invenio_github import __version__ + from invenio_vcs import __version__ assert __version__ diff --git a/tests/test_models.py b/tests/test_models.py index be63e40f..76086b6e 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -22,7 +22,7 @@ """Test cases for badge creation.""" -from invenio_github.models import Repository +from invenio_vcs.models import Repository def test_repository_unbound(app): diff --git a/tests/test_tasks.py b/tests/test_tasks.py index 88ada337..2109d7f4 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -26,10 +26,10 @@ from invenio_webhooks.models import Event from mock import patch -from invenio_github.api import GitHubAPI -from invenio_github.models import Release, ReleaseStatus, Repository -from invenio_github.tasks import process_release, refresh_accounts -from invenio_github.utils import iso_utcnow +from invenio_vcs.api import GitHubAPI +from invenio_vcs.models import Release, ReleaseStatus, Repository +from invenio_vcs.tasks import process_release, refresh_accounts +from invenio_vcs.utils import iso_utcnow from . import fixtures diff --git a/tests/test_webhook.py b/tests/test_webhook.py index 14c3317e..a04212aa 100644 --- a/tests/test_webhook.py +++ b/tests/test_webhook.py @@ -27,8 +27,8 @@ # from invenio_rdm_records.proxies import current_rdm_records_service from invenio_webhooks.models import Event -from invenio_github.api import GitHubAPI -from invenio_github.models import ReleaseStatus, Repository +from invenio_vcs.api import GitHubAPI +from invenio_vcs.models import ReleaseStatus, Repository def test_webhook_post(app, db, tester_id, remote_token, github_api): From 06439311a5fff17f36fd5d995b2990ae29f7ce10 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Tue, 12 Aug 2025 12:12:04 +0200 Subject: [PATCH 04/19] WIP: start making generic version of GithubRelease class --- invenio_vcs/api.py | 254 +---------------------------- invenio_vcs/config.py | 2 +- invenio_vcs/contrib/github.py | 93 +++++++---- invenio_vcs/errors.py | 4 +- invenio_vcs/ext.py | 5 +- invenio_vcs/models.py | 13 +- invenio_vcs/providers.py | 292 ++++++++++++++++++++++++++++++++-- invenio_vcs/service.py | 173 +++++++++++++++++++- invenio_vcs/tasks.py | 8 +- invenio_vcs/utils.py | 7 +- invenio_vcs/views/vcs.py | 64 +++----- tests/fixtures.py | 8 +- tests/test_api.py | 6 +- 13 files changed, 555 insertions(+), 374 deletions(-) diff --git a/invenio_vcs/api.py b/invenio_vcs/api.py index 59b1aee4..86eb8923 100644 --- a/invenio_vcs/api.py +++ b/invenio_vcs/api.py @@ -57,7 +57,7 @@ RemoteAccountNotFound, RepositoryAccessError, RepositoryNotFoundError, - UnexpectedGithubResponse, + UnexpectedProviderResponse, ) @@ -470,255 +470,3 @@ def revoke_token(cls, token): with gh_api.session.temporary_basic_auth(client_id, client_secret): response = gh_api._delete(url, data=json.dumps({"access_token": token})) return response - - -class GitHubRelease(object): - """A GitHub release.""" - - def __init__(self, release): - """Constructor.""" - self.release_object = release - self._resolved_zipball_url = None - - @cached_property - def record(self): - """Release record.""" - return self.resolve_record() - - @cached_property - def gh(self): - """Return GitHubAPI object.""" - return GitHubAPI(user_id=self.event.user_id) - - @cached_property - def event(self): - """Get release event.""" - return self.release_object.event - - @cached_property - def payload(self): - """Return event payload.""" - return self.event.payload - - @cached_property - def release_payload(self): - """Return release metadata.""" - return self.payload["release"] - - @cached_property - def repository_payload(self): - """Return repository metadata.""" - return self.payload["repository"] - - @cached_property - def repository_object(self): - """Return repository model from database.""" - if self.release_object.repository_id: - repository = self.release_object.repository - else: - repository = Repository.query.filter_by( - user_id=self.event.user_id, - ).one() - return repository - - @cached_property - def release_file_name(self): - """Returns release zipball file name.""" - tag_name = self.release_payload["tag_name"] - repo_name = self.repository_payload["full_name"] - filename = f"{repo_name}-{tag_name}.zip" - return filename - - @cached_property - def release_zipball_url(self): - """Returns the release zipball URL.""" - return self.release_payload["zipball_url"] - - @cached_property - def user_identity(self): - """Generates release owner's user identity.""" - identity = get_identity(self.repository_object.user) - identity.provides.add(authenticated_user) - identity.user = self.repository_object.user - return identity - - @cached_property - def contributors(self): - """Get list of contributors to a repository. - - The list of contributors is fetched from Github API, filtered for type "User" and sorted by contributions. - - :returns: a generator of objects that contains contributors information. - :raises UnexpectedGithubResponse: when Github API returns a status code other than 200. - """ - max_contributors = current_app.config.get("GITHUB_MAX_CONTRIBUTORS_NUMBER", 30) - contributors_iter = self.gh.api.repository_with_id( - self.repository_object.github_id - ).contributors(number=max_contributors) - - # Consume the iterator to materialize the request and have a `last_status``. - contributors = list(contributors_iter) - status = contributors_iter.last_status - if status == 200: - # Sort by contributions and filter only users. - sorted_contributors = sorted( - (c for c in contributors if c.type == "User"), - key=lambda x: x.contributions, - reverse=True, - ) - - # Expand contributors using `Contributor.refresh()` - contributors = [x.refresh().as_dict() for x in sorted_contributors] - return contributors - else: - # Contributors fetch failed - raise UnexpectedGithubResponse( - _("Github returned unexpected code: %(status)s for release %(repo_id)s") - % {"status": status, "repo_id": self.repository_object.github_id} - ) - - @cached_property - def owner(self): - """Get owner of repository as a creator.""" - try: - owner = self.gh.api.repository_with_id( - self.repository_object.github_id - ).owner - return owner - except Exception: - return None - - # Helper functions - - def is_first_release(self): - """Checks whether the current release is the first release of the repository.""" - latest_release = self.repository_object.latest_release(ReleaseStatus.PUBLISHED) - return True if not latest_release else False - - def test_zipball(self): - """Test if the zipball URL is accessible and return the resolved URL.""" - return self.resolve_zipball_url() - - def resolve_zipball_url(self, cache=True): - """Resolve the zipball URL. - - This method will try to resolve the zipball URL by making a HEAD request, - handling the following edge cases: - - - In the case of a 300 Multiple Choices response, which can happen when a tag - and branch have the same name, it will try to fetch an "alternate" link. - - If the access token does not have the required scopes/permissions to access - public links, it will fallback to a non-authenticated request. - """ - if self._resolved_zipball_url and cache: - return self._resolved_zipball_url - - url = self.release_zipball_url - - # Execute a HEAD request to the zipball url to test if it is accessible. - response = self.gh.api.session.head(url, allow_redirects=True) - - # In case where there is a tag and branch with the same name, we might get back - # a "300 Multiple Choices" response, which requires fetching an "alternate" - # link. - if response.status_code == 300: - alternate_url = response.links.get("alternate", {}).get("url") - if alternate_url: - url = alternate_url # Use the alternate URL - response = self.gh.api.session.head(url, allow_redirects=True) - - # Another edge-case, is when the access token we have does not have the - # scopes/permissions to access public links. In that rare case we fallback to a - # non-authenticated request. - if response.status_code == 404: - current_app.logger.warning( - "GitHub zipball URL {url} not found, trying unauthenticated request.", - extra={"url": response.url}, - ) - response = requests.head(url, allow_redirects=True) - # If this response is successful we want to use the finally resolved URL to - # fetch the ZIP from. - if response.status_code == 200: - return response.url - - if response.status_code != 200: - raise ReleaseZipballFetchError() - - if cache: - self._resolved_zipball_url = response.url - - return response.url - - # High level API - - def release_failed(self): - """Set release status to FAILED.""" - self.release_object.status = ReleaseStatus.FAILED - - def release_processing(self): - """Set release status to PROCESSING.""" - self.release_object.status = ReleaseStatus.PROCESSING - - def release_published(self): - """Set release status to PUBLISHED.""" - self.release_object.status = ReleaseStatus.PUBLISHED - - def retrieve_remote_file(self, file_name): - """Retrieves a file from the repository, for the current release, using the github client. - - :param file_name: the name of the file to be retrieved from the repository. - :returns: the file contents or None, if the file if not fetched. - """ - gh_repo_owner = self.repository_payload["owner"]["login"] - gh_repo_name = self.repository_payload["name"] - gh_tag_name = self.release_payload["tag_name"] - try: - content = self.gh.api.repository(gh_repo_owner, gh_repo_name).file_contents( - path=file_name, ref=gh_tag_name - ) - except github3.exceptions.NotFoundError: - # github3 raises a github3.exceptions.NotFoundError if the file is not found - return None - return content - - @contextmanager - def fetch_zipball_file(self): - """Fetch release zipball file using the current github session.""" - session = self.gh.api.session - timeout = current_app.config.get("GITHUB_ZIPBALL_TIMEOUT", 300) - zipball_url = self.resolve_zipball_url() - with session.get(zipball_url, stream=True, timeout=timeout) as resp: - yield resp.raw - - def publish(self): - """Publish a GitHub release.""" - raise NotImplementedError - - def process_release(self): - """Processes a github release.""" - raise NotImplementedError - - def resolve_record(self): - """Resolves a record from the release. To be implemented by the API class implementation.""" - raise NotImplementedError - - def serialize_record(self): - """Serializes the release record.""" - raise NotImplementedError - - @property - @abstractmethod - def badge_title(self): - """Stores a string to render in the record badge title (e.g. 'DOI').""" - return None - - @property - @abstractmethod - def badge_value(self): - """Stores a string to render in the record badge value (e.g. '10.1234/invenio.1234').""" - raise NotImplementedError - - @property - def record_url(self): - """Release self url (e.g. github HTML url).""" - raise NotImplementedError diff --git a/invenio_vcs/config.py b/invenio_vcs/config.py index 74234a0e..2178f301 100644 --- a/invenio_vcs/config.py +++ b/invenio_vcs/config.py @@ -75,7 +75,7 @@ GITHUB_ERROR_HANDLERS = None """Definition of the way specific exceptions are handled.""" -GITHUB_MAX_CONTRIBUTORS_NUMBER = 30 +VCS_MAX_CONTRIBUTORS_NUMBER = 30 """Max number of contributors of a release to be retrieved from Github.""" VCS_INTEGRATION_ENABLED = False diff --git a/invenio_vcs/contrib/github.py b/invenio_vcs/contrib/github.py index 32fd52b2..2d9428e0 100644 --- a/invenio_vcs/contrib/github.py +++ b/invenio_vcs/contrib/github.py @@ -2,13 +2,16 @@ import github3 from github3.repos import ShortRepository +from invenio_i18n import gettext as _ from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper from werkzeug.utils import cached_property +from invenio_vcs.errors import UnexpectedProviderResponse from invenio_vcs.oauth.handlers import account_setup_handler, disconnect_handler from invenio_vcs.providers import ( - GenericRelease, + GenericContributor, GenericRepository, + GenericUser, GenericWebhook, RepositoryServiceProvider, RepositoryServiceProviderFactory, @@ -41,7 +44,7 @@ def remote_config(self): } helper = GitHubOAuthSettingsHelper( - base_url=self.base_url, app_key=self.credentials_key + base_url=self.config["base_url"], app_key=self.config["credentials_key"] ) github_app = helper.remote_app github_app["disconnect_handler"] = disconnect_handler @@ -126,34 +129,13 @@ def get_repository(self, repository_id): str(repo.id), repo.full_name, repo.description, repo.default_branch ) - def get_repo_latest_release(self, repository_id): + def create_webhook(self, repository_id): assert repository_id.isdigit() if self._gh is None: - return None - repo = self._gh.repository_with_id(int(repository_id)) - if repo is None: - return None - - release = repo.latest_release() - if not release: - return None - - return GenericRelease( - str(release.id), - release.name, - release.tag_name, - release.tarball_url, - release.zipball_url, - release.created_at, - ) - - def create_webhook(self, repository_id, url): - assert repository_id.isdigit() - if self._gh is None: - return None + return False hook_config = dict( - url=url, + url=self.webhook_url, content_type="json", secret=self.factory.config["shared_secret"], insecure_ssl="1" if self.factory.config["insecure_ssl"] else "0", @@ -163,7 +145,7 @@ def create_webhook(self, repository_id, url): if repo is None: return False - hooks = (h for h in repo.hooks() if h.config.get("url", "") == url) + hooks = (h for h in repo.hooks() if h.config.get("url", "") == self.webhook_url) hook = next(hooks, None) if not hook: @@ -176,7 +158,7 @@ def create_webhook(self, repository_id, url): def delete_webhook(self, repository_id): assert repository_id.isdigit() if self._gh is None: - return None + return False repo = self._gh.repository_with_id(int(repository_id)) if repo is None: @@ -189,3 +171,58 @@ def delete_webhook(self, repository_id): if not hook or hook.delete(): return True return False + + def get_own_user(self): + if self._gh is None: + return None + + user = self._gh.me() + if user is not None: + return GenericUser(user.id, user.login, user.name) + + return None + + def list_repository_contributors(self, repository_id, max): + assert repository_id.isdigit() + if self._gh is None: + return None + + repo = self._gh.repository_with_id(repository_id) + if repo is None: + return None + + contributors_iter = repo.contributors(number=max) + # Consume the iterator to materialize the request and have a `last_status``. + contributors = list(contributors_iter) + status = contributors_iter.last_status + if status == 200: + # Sort by contributions and filter only users. + sorted_contributors = sorted( + (c for c in contributors if c.type == "User"), + key=lambda x: x.contributions_count, + reverse=True, + ) + + contributors = [ + GenericContributor(x.id, x.login, x.full_name, x.contributions_count) + for x in sorted_contributors + ] + return contributors + else: + raise UnexpectedProviderResponse( + _( + "Provider returned unexpected code: %(status)s for release in repo %(repo_id)s" + ) + % {"status": status, "repo_id": repository_id} + ) + + def get_repository_owner(self, repository_id): + assert repository_id.isdigit() + if self._gh is None: + return None + + repo = self._gh.repository_with_id(repository_id) + if repo is None: + return None + + return GenericUser(repo.owner.id, repo.owner.login, repo.owner.full_name) diff --git a/invenio_vcs/errors.py b/invenio_vcs/errors.py index d60b12c0..ec9fb400 100644 --- a/invenio_vcs/errors.py +++ b/invenio_vcs/errors.py @@ -145,10 +145,10 @@ def __init__(self, message=None): super().__init__(message or self.message) -class UnexpectedGithubResponse(GitHubError): +class UnexpectedProviderResponse(GitHubError): """Request to Github API returned an unexpected error.""" - message = _("Github API returned an unexpected error.") + message = _("Provider API returned an unexpected error.") def __init__(self, message=None): """Constructor.""" diff --git a/invenio_vcs/ext.py b/invenio_vcs/ext.py index a75a9e14..c6e6660b 100644 --- a/invenio_vcs/ext.py +++ b/invenio_vcs/ext.py @@ -33,8 +33,7 @@ from six import string_types from werkzeug.utils import cached_property, import_string -from invenio_vcs.api import GitHubRelease -from invenio_vcs.providers import get_provider_list +from invenio_vcs.providers import VCSRelease, get_provider_list from invenio_vcs.utils import obj_or_import_string from . import config @@ -54,7 +53,7 @@ def release_api_class(self): cls = current_app.config["VCS_RELEASE_CLASS"] if isinstance(cls, string_types): cls = import_string(cls) - assert issubclass(cls, GitHubRelease) + assert issubclass(cls, VCSRelease) return cls @cached_property diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index d4288d85..acf2a777 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -161,9 +161,15 @@ class Repository(db.Model, Timestamp): user = db.relationship(User) @classmethod - def create(cls, user_id, github_id=None, name=None, **kwargs): + def create(cls, user_id, provider, provider_id=None, name=None, **kwargs): """Create the repository.""" - obj = cls(user_id=user_id, github_id=github_id, name=name, **kwargs) + obj = cls( + user_id=user_id, + provider=provider, + provider_id=provider_id, + name=name, + **kwargs, + ) db.session.add(obj) return obj @@ -263,6 +269,3 @@ class Release(db.Model, Timestamp): def __repr__(self): """Get release representation.""" return f"" - - def to_generic(self): - return GenericRelease(self.id, "", self.tag, "", "", self.created) diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py index 74052c7a..aba6f806 100644 --- a/invenio_vcs/providers.py +++ b/invenio_vcs/providers.py @@ -4,6 +4,8 @@ from urllib.parse import urlparse from flask import current_app +from invenio_access.permissions import authenticated_user +from invenio_access.utils import get_identity from invenio_i18n import gettext as _ from invenio_oauth2server.models import Token as ProviderToken from invenio_oauthclient import current_oauthclient @@ -13,6 +15,7 @@ from werkzeug.utils import cached_property from invenio_vcs.errors import RemoteAccountDataNotSet +from invenio_vcs.models import Repository @dataclass @@ -40,6 +43,21 @@ class GenericRelease: created_at: datetime +@dataclass +class GenericUser: + id: str + username: str + display_name: str + + +@dataclass +class GenericContributor: + id: str + username: str + display_name: str + contributions_count: int + + class RepositoryServiceProviderFactory(ABC): def __init__( self, provider: type["RepositoryServiceProvider"], webhook_receiver_url: str @@ -50,7 +68,7 @@ def __init__( @property @abstractmethod def remote_config(self): - pass + raise NotImplementedError @cached_property def remote(self): @@ -59,32 +77,32 @@ def remote(self): @property @abstractmethod def id(self) -> str: - pass + raise NotImplementedError @property @abstractmethod def name(self) -> str: - pass + raise NotImplementedError @property @abstractmethod def repository_name(self) -> str: - pass + raise NotImplementedError @property @abstractmethod def repository_name_plural(self) -> str: - pass + raise NotImplementedError @property @abstractmethod def icon(self) -> str: - pass + raise NotImplementedError @property @abstractmethod def config(self) -> dict: - pass + raise NotImplementedError def for_user(self, user_id: str): return self.provider(self, user_id) @@ -161,27 +179,42 @@ def is_valid_webhook(self, url): @abstractmethod def list_repositories(self): - pass + raise NotImplementedError @abstractmethod def list_repository_webhooks(self, repository_id): - pass + raise NotImplementedError + + def get_first_valid_webhook(self, repository_id): + webhooks = self.list_repository_webhooks(repository_id) + for hook in webhooks: + if self.is_valid_webhook(hook.url): + return hook + return None @abstractmethod def get_repository(self, repository_id): - pass + raise NotImplementedError + + @abstractmethod + def list_repository_contributors(self, repository_id, max): + raise NotImplementedError + + @abstractmethod + def get_repository_owner(self, repository_id): + raise NotImplementedError @abstractmethod - def get_repo_latest_release(self, repository_id): - pass + def create_webhook(self, repository_id): + raise NotImplementedError @abstractmethod - def create_webhook(self, repository_id, url): - pass + def delete_webhook(self, repository_id): + raise NotImplementedError @abstractmethod - def delete_webhook(self, repository_id, webhook_id): - pass + def get_own_user(self): + raise NotImplementedError def get_provider_list() -> list[RepositoryServiceProviderFactory]: @@ -194,3 +227,230 @@ def get_provider_by_id(id: str) -> RepositoryServiceProviderFactory: if id == provider.id: return provider raise Exception(f"VCS provider with ID {id} not registered") + + +class VCSRelease: + """A GitHub release.""" + + def __init__(self, release, provider: RepositoryServiceProvider): + """Constructor.""" + self.db_release = release + self.provider = provider + self._resolved_zipball_url = None + + @cached_property + def record(self): + """Release record.""" + return self.resolve_record() + + @cached_property + def event(self): + """Get release event.""" + return self.db_release.event + + @cached_property + def payload(self): + """Return event payload.""" + return self.event.payload + + @cached_property + def release_payload(self): + """Return release metadata.""" + return self.payload["release"] + + @cached_property + def repository_payload(self): + """Return repository metadata.""" + return self.payload["repository"] + + @cached_property + def repository_object(self): + """Return repository model from database.""" + if self.db_release.repository_id: + repository = self.db_release.repository + else: + repository = Repository.query.filter_by( + user_id=self.event.user_id, provider_id=self.provider.factory.id + ).one() + return repository + + @cached_property + def release_file_name(self): + """Returns release zipball file name.""" + tag_name = self.release_payload["tag_name"] + repo_name = self.repository_payload["full_name"] + filename = f"{repo_name}-{tag_name}.zip" + return filename + + @cached_property + def release_zipball_url(self): + """Returns the release zipball URL.""" + return self.release_payload["zipball_url"] + + @cached_property + def user_identity(self): + """Generates release owner's user identity.""" + identity = get_identity(self.repository_object.user) + identity.provides.add(authenticated_user) + identity.user = self.repository_object.user + return identity + + @cached_property + def contributors(self): + """Get list of contributors to a repository. + + The list of contributors is fetched from Github API, filtered for type "User" and sorted by contributions. + + :returns: a generator of objects that contains contributors information. + :raises UnexpectedGithubResponse: when Github API returns a status code other than 200. + """ + max_contributors = current_app.config.get("VCS_MAX_CONTRIBUTORS_NUMBER", 30) + return self.provider.list_repository_contributors( + self.repository_object.id, max=max_contributors + ) + + @cached_property + def owner(self): + """Get owner of repository as a creator.""" + try: + owner = self.gh.api.repository_with_id( + self.repository_object.github_id + ).owner + return owner + except Exception: + return None + + # Helper functions + + def is_first_release(self): + """Checks whether the current release is the first release of the repository.""" + latest_release = self.repository_object.latest_release(ReleaseStatus.PUBLISHED) + return True if not latest_release else False + + def test_zipball(self): + """Test if the zipball URL is accessible and return the resolved URL.""" + return self.resolve_zipball_url() + + def resolve_zipball_url(self, cache=True): + """Resolve the zipball URL. + + This method will try to resolve the zipball URL by making a HEAD request, + handling the following edge cases: + + - In the case of a 300 Multiple Choices response, which can happen when a tag + and branch have the same name, it will try to fetch an "alternate" link. + - If the access token does not have the required scopes/permissions to access + public links, it will fallback to a non-authenticated request. + """ + if self._resolved_zipball_url and cache: + return self._resolved_zipball_url + + url = self.release_zipball_url + + # Execute a HEAD request to the zipball url to test if it is accessible. + response = self.gh.api.session.head(url, allow_redirects=True) + + # In case where there is a tag and branch with the same name, we might get back + # a "300 Multiple Choices" response, which requires fetching an "alternate" + # link. + if response.status_code == 300: + alternate_url = response.links.get("alternate", {}).get("url") + if alternate_url: + url = alternate_url # Use the alternate URL + response = self.gh.api.session.head(url, allow_redirects=True) + + # Another edge-case, is when the access token we have does not have the + # scopes/permissions to access public links. In that rare case we fallback to a + # non-authenticated request. + if response.status_code == 404: + current_app.logger.warning( + "GitHub zipball URL {url} not found, trying unauthenticated request.", + extra={"url": response.url}, + ) + response = requests.head(url, allow_redirects=True) + # If this response is successful we want to use the finally resolved URL to + # fetch the ZIP from. + if response.status_code == 200: + return response.url + + if response.status_code != 200: + raise ReleaseZipballFetchError() + + if cache: + self._resolved_zipball_url = response.url + + return response.url + + # High level API + + def release_failed(self): + """Set release status to FAILED.""" + self.db_release.status = ReleaseStatus.FAILED + + def release_processing(self): + """Set release status to PROCESSING.""" + self.db_release.status = ReleaseStatus.PROCESSING + + def release_published(self): + """Set release status to PUBLISHED.""" + self.db_release.status = ReleaseStatus.PUBLISHED + + def retrieve_remote_file(self, file_name): + """Retrieves a file from the repository, for the current release, using the github client. + + :param file_name: the name of the file to be retrieved from the repository. + :returns: the file contents or None, if the file if not fetched. + """ + gh_repo_owner = self.repository_payload["owner"]["login"] + gh_repo_name = self.repository_payload["name"] + gh_tag_name = self.release_payload["tag_name"] + try: + content = self.gh.api.repository(gh_repo_owner, gh_repo_name).file_contents( + path=file_name, ref=gh_tag_name + ) + except github3.exceptions.NotFoundError: + # github3 raises a github3.exceptions.NotFoundError if the file is not found + return None + return content + + @contextmanager + def fetch_zipball_file(self): + """Fetch release zipball file using the current github session.""" + session = self.gh.api.session + timeout = current_app.config.get("GITHUB_ZIPBALL_TIMEOUT", 300) + zipball_url = self.resolve_zipball_url() + with session.get(zipball_url, stream=True, timeout=timeout) as resp: + yield resp.raw + + def publish(self): + """Publish a GitHub release.""" + raise NotImplementedError + + def process_release(self): + """Processes a github release.""" + raise NotImplementedError + + def resolve_record(self): + """Resolves a record from the release. To be implemented by the API class implementation.""" + raise NotImplementedError + + def serialize_record(self): + """Serializes the release record.""" + raise NotImplementedError + + @property + @abstractmethod + def badge_title(self): + """Stores a string to render in the record badge title (e.g. 'DOI').""" + return None + + @property + @abstractmethod + def badge_value(self): + """Stores a string to render in the record badge value (e.g. '10.1234/invenio.1234').""" + raise NotImplementedError + + @property + def record_url(self): + """Release self url (e.g. github HTML url).""" + raise NotImplementedError diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index c539d53e..6825c092 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -1,17 +1,23 @@ from copy import deepcopy -from dataclasses import asdict +from flask import current_app from invenio_db import db from invenio_i18n import gettext as _ +from invenio_oauth2server.models import Token as ProviderToken +from sqlalchemy.exc import NoResultFound from werkzeug.utils import cached_property from invenio_vcs.errors import ( RemoteAccountDataNotSet, + RemoteAccountNotFound, RepositoryAccessError, RepositoryNotFoundError, ) from invenio_vcs.models import Release, ReleaseStatus, Repository -from invenio_vcs.providers import GenericRelease, get_provider_by_id +from invenio_vcs.providers import get_provider_by_id +from invenio_vcs.proxies import current_vcs +from invenio_vcs.tasks import sync_hooks as sync_hooks_task +from invenio_vcs.utils import iso_utcnow class VersionControlService: @@ -32,10 +38,9 @@ def list_repositories(self): ) for db_repo in db_repos: if str(db_repo.provider_id) in vcs_repos: - release_instance = self.provider.get_repo_latest_release( - db_repo.provider_id + release_instance = current_vcs.release_api_class( + db_repo.latest_release(), self.provider.factory.id ) - vcs_repos[str(db_repo.github_id)]["instance"] = db_repo vcs_repos[str(db_repo.github_id)]["latest"] = release_instance @@ -50,13 +55,15 @@ def get_repo_latest_release(self, repo): q = repo.releases.filter_by(status=ReleaseStatus.PUBLISHED) release_object = q.order_by(db.desc(Release.created)).first() - return release_object.to_generic() + return current_vcs.release_api_class(release_object, self.provider.factory.id) def list_repo_releases(self, repo): # Retrieve releases and sort them by creation date release_instances = [] for release_object in repo.releases.order_by(Release.created): - release_instances.append(release_object.to_generic()) + release_instances.append( + current_vcs.release_api_class(release_object, self.provider.factory.id) + ) return release_instances def get_repo_default_branch(self, repo_id): @@ -109,7 +116,7 @@ def check_repo_access_permissions(self, repo): if self.provider.remote_account and self.provider.remote_account.extra_data: user_has_remote_access = self.provider.user_available_repositories.get( - str(repo.github_id) + repo.provider_id ) if user_has_remote_access: return True @@ -117,3 +124,153 @@ def check_repo_access_permissions(self, repo): raise RepositoryAccessError( user=self.provider.user_id, repo=repo.name, repo_id=repo.provider_id ) + + def sync(self, hooks=True, async_hooks=True): + """Synchronize user repositories. + + :param bool hooks: True for syncing hooks. + :param bool async_hooks: True for sending of an asynchronous task to + sync hooks. + + .. note:: + + Syncing happens from GitHub's direction only. This means that we + consider the information on GitHub as valid, and we overwrite our + own state based on this information. + """ + vcs_repos = self.provider.list_repositories() + + if hooks: + self._sync_hooks(vcs_repos.keys(), asynchronous=async_hooks) + + # Update changed names for repositories stored in DB + db_repos = Repository.query.filter( + Repository.user_id == self.provider.user_id, + ) + + for repo in db_repos: + vcs_repo = vcs_repos.get(repo.github_id) + if vcs_repo and repo.name != vcs_repo.full_name: + repo.name = vcs_repo.full_name + db.session.add(repo) + + # Remove ownership from repositories that the user has no longer + # 'admin' permissions, or have been deleted. + Repository.query.filter( + Repository.user_id == self.provider.user_id, + ~Repository.provider_id.in_(vcs_repos.keys()), + ).update({"user_id": None, "hook": None}, synchronize_session=False) + + # Update repos and last sync + self.provider.remote_account.extra_data.update( + dict( + repos=vcs_repos, + last_sync=iso_utcnow(), + ) + ) + self.provider.remote_account.extra_data.changed() + db.session.add(self.provider.remote_account) + + def _sync_hooks(self, repo_ids, asynchronous=True): + """Check if a hooks sync task needs to be started.""" + if not asynchronous: + for repo_id in repo_ids: + try: + self.sync_repo_hook(repo_id) + except RepositoryAccessError: + current_app.logger.warning( + str(RepositoryAccessError), exc_info=True + ) + except NoResultFound: + pass # Repository not in DB yet + else: + # If hooks will run asynchronously, we need to commit any changes done so far + db.session.commit() + sync_hooks_task.delay( + self.provider.factory.id, self.provider.user_id, repo_ids + ) + + def sync_repo_hook(self, repo_id): + """Sync a GitHub repo's hook with the locally stored repo.""" + # Get the hook that we may have set in the past + hook = self.provider.get_first_valid_webhook(repo_id) + vcs_repo = self.provider.get_repository(repo_id) + + # If hook on GitHub exists, get or create corresponding db object and + # enable the hook. Otherwise remove the old hook information. + repo = Repository.get(repo_id) + + if hook: + if not repo: + repo = Repository.create( + self.provider.user_id, + self.provider.factory.id, + repo_id, + vcs_repo.full_name, + ) + if not repo.enabled: + self.mark_repo_enabled(repo, hook.id) + else: + if repo: + self.mark_repo_disabled(repo) + + def mark_repo_disabled(self, repo): + """Disables an user repository.""" + repo.hook = None + repo.user_id = None + + def mark_repo_enabled(self, repo, hook): + """Enables an user repository.""" + repo.hook = hook + repo.user_id = self.provider.user_id + + def init_account(self): + """Setup a new GitHub account.""" + if not self.provider.remote_account: + raise RemoteAccountNotFound( + self.provider.user_id, _("Remote account was not found for user.") + ) + + user = self.provider.get_own_user() + # Setup local access tokens to be used by the webhooks + hook_token = ProviderToken.create_personal( + f"{self.provider.factory.id}-webhook", + self.provider.user_id, + scopes=["webhooks:event"], + is_internal=True, + ) + # Initial structure of extra data + self.provider.remote_account.extra_data = dict( + id=user.id, + login=user.login, + name=user.name, + tokens=dict( + webhook=hook_token.id, + ), + repos=dict(), + last_sync=iso_utcnow(), + ) + + db.session.add(self.provider.remote_account) + + def enable_repository(self, repository_id): + repos = self.provider.remote_account.extra_data.get("repos", {}) + if repository_id not in repos: + raise RepositoryNotFoundError( + repository_id, _("Failed to enable repository.") + ) + + return self.provider.create_webhook(repository_id) + + def disable_repository(self, repository_id): + repos = self.provider.remote_account.extra_data.get("repos", {}) + if repository_id not in repos: + raise RepositoryNotFoundError( + repository_id, _("Failed to disable repository.") + ) + + remove_success = False + if repos: + remove_success = self.provider.delete_webhook(repository_id) + + return remove_success diff --git a/invenio_vcs/tasks.py b/invenio_vcs/tasks.py index 4954263f..9accc249 100644 --- a/invenio_vcs/tasks.py +++ b/invenio_vcs/tasks.py @@ -95,18 +95,18 @@ def disconnect_github(access_token, repo_hooks): @shared_task(max_retries=6, default_retry_delay=10 * 60, rate_limit="100/m") -def sync_hooks(user_id, repositories): +def sync_hooks(provider, user_id, repositories): """Sync repository hooks for a user.""" # Local import to avoid circular imports - from .api import GitHubAPI + from .service import VersionControlService try: # Sync hooks - gh = GitHubAPI(user_id=user_id) + svc = VersionControlService(provider, user_id) for repo_id in repositories: try: with db.session.begin_nested(): - gh.sync_repo_hook(repo_id) + svc.sync_repo_hook(repo_id) # We commit per repository, because while the task is running db.session.commit() except RepositoryAccessError as e: diff --git a/invenio_vcs/utils.py b/invenio_vcs/utils.py index 64034ac3..c8f6d126 100644 --- a/invenio_vcs/utils.py +++ b/invenio_vcs/utils.py @@ -19,17 +19,16 @@ """Various utility functions.""" -from datetime import datetime +from datetime import datetime, timezone import dateutil.parser -import pytz import six from werkzeug.utils import import_string def utcnow(): """UTC timestamp (with timezone).""" - return datetime.now(tz=pytz.utc) + return datetime.now(tz=timezone.utc) def iso_utcnow(): @@ -41,7 +40,7 @@ def parse_timestamp(x): """Parse ISO8601 formatted timestamp.""" dt = dateutil.parser.parse(x) if dt.tzinfo is None: - dt = dt.replace(tzinfo=pytz.utc) + dt = dt.replace(tzinfo=timezone.utc) return dt diff --git a/invenio_vcs/views/vcs.py b/invenio_vcs/views/vcs.py index 4d079f3d..be9fe28f 100644 --- a/invenio_vcs/views/vcs.py +++ b/invenio_vcs/views/vcs.py @@ -75,7 +75,9 @@ def create_ui_blueprint(app): def create_api_blueprint(app): """Creates blueprint and registers API endpoints if the integration is enabled.""" - blueprint_api = Blueprint("invenio_github_api", __name__) + blueprint_api = Blueprint( + "invenio_github_api", __name__, url_prefix="/user/vcs/" + ) if app.config.get("GITHUB_INTEGRATION_ENABLED", False): register_api_routes(blueprint_api) return blueprint_api @@ -141,8 +143,8 @@ def register_api_routes(blueprint): @login_required @request_session_token() - @blueprint.route("/user/github/repositories/sync", methods=["POST"]) - def sync_user_repositories(): + @blueprint.route("/repositories/sync", methods=["POST"]) + def sync_user_repositories(provider): """Synchronizes user repos. Currently: @@ -151,8 +153,8 @@ def sync_user_repositories(): POST /account/settings/github/hook """ try: - github = GitHubAPI(user_id=current_user.id) - github.sync(async_hooks=False) + svc = VersionControlService(provider, current_user.id) + svc.sync(async_hooks=False) db.session.commit() except Exception as exc: current_app.logger.exception(str(exc)) @@ -162,13 +164,13 @@ def sync_user_repositories(): @login_required @request_session_token() - @blueprint.route("/user/github/", methods=["POST"]) - def init_user_github(): + @blueprint.route("/", methods=["POST"]) + def init_user_github(provider): """Initialises github account for an user.""" try: - github = GitHubAPI(user_id=current_user.id) - github.init_account() - github.sync(async_hooks=False) + svc = VersionControlService(provider, current_user.id) + svc.init_account() + svc.sync(async_hooks=False) db.session.commit() except Exception as exc: current_app.logger.exception(str(exc)) @@ -177,10 +179,8 @@ def init_user_github(): @login_required @request_session_token() - @blueprint.route( - "/user/github/repositories//enable", methods=["POST"] - ) - def enable_repository(repository_id): + @blueprint.route("/repositories//enable", methods=["POST"]) + def enable_repository(provider, repository_id): """Enables one repository. Currently: @@ -189,18 +189,9 @@ def enable_repository(repository_id): POST /account/settings/github/hook """ try: - github = GitHubAPI(user_id=current_user.id) - - repos = github.account.extra_data.get("repos", {}) - - if str(repository_id) not in repos: - raise RepositoryNotFoundError( - repository_id, _("Failed to enable repository.") - ) + svc = VersionControlService(provider, current_user.id) + create_success = svc.enable_repository(repository_id) - create_success = github.create_hook( - repository_id, repos[str(repository_id)]["full_name"] - ) db.session.commit() if create_success: return "", 201 @@ -218,10 +209,8 @@ def enable_repository(repository_id): @login_required @request_session_token() - @blueprint.route( - "/user/github/repositories//disable", methods=["POST"] - ) - def disable_repository(repository_id): + @blueprint.route("/repositories//disable", methods=["POST"]) + def disable_repository(provider, repository_id): """Disables one repository. Currently: @@ -230,21 +219,10 @@ def disable_repository(repository_id): DELETE /account/settings/github/hook """ try: - github = GitHubAPI(user_id=current_user.id) - - repos = github.account.extra_data.get("repos", {}) - - if str(repository_id) not in repos: - raise RepositoryNotFoundError( - repository_id, _("Failed to disable repository.") - ) + svc = VersionControlService(provider, current_user.id) + remove_success = svc.disable_repository(repository_id) - remove_success = False - if repos: - remove_success = github.remove_hook( - repository_id, repos[str(repository_id)]["full_name"] - ) - db.session.commit() + db.session.commit() if remove_success: return "", 204 else: diff --git a/tests/fixtures.py b/tests/fixtures.py index ad807e9b..1100b533 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -27,11 +27,11 @@ from six import BytesIO -from invenio_vcs.api import GitHubRelease +from invenio_vcs.api import VCSRelease from invenio_vcs.models import ReleaseStatus -class TestGithubRelease(GitHubRelease): +class TestGithubRelease(VCSRelease): """Implements GithubRelease with test methods.""" def publish(self): @@ -39,8 +39,8 @@ def publish(self): Does not create a "real" record, as this only used to test the API. """ - self.release_object.status = ReleaseStatus.PUBLISHED - self.release_object.record_id = "445aaacd-9de1-41ab-af52-25ab6cb93df7" + self.generic_release.status = ReleaseStatus.PUBLISHED + self.generic_release.record_id = "445aaacd-9de1-41ab-af52-25ab6cb93df7" return {} def process_release(self): diff --git a/tests/test_api.py b/tests/test_api.py index 4223bfc4..8b87d4bc 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -11,7 +11,7 @@ import pytest from invenio_webhooks.models import Event -from invenio_vcs.api import GitHubAPI, GitHubRelease +from invenio_vcs.api import GitHubAPI, VCSRelease from invenio_vcs.models import Release, ReleaseStatus from .fixtures import PAYLOAD as github_payload_fixture @@ -58,7 +58,7 @@ def test_release_api(app, test_user, github_api): status=ReleaseStatus.RECEIVED, ) # Idea is to test the public interface of GithubRelease - gh = GitHubRelease(release) + gh = VCSRelease(release) # Validate that public methods raise NotImplementedError with pytest.raises(NotImplementedError): @@ -109,7 +109,7 @@ def test_release_branch_tag_conflict(app, test_user, github_api): status=ReleaseStatus.RECEIVED, ) # Idea is to test the public interface of GithubRelease - rel_api = GitHubRelease(release) + rel_api = VCSRelease(release) resolved_url = rel_api.resolve_zipball_url() ref_tag_url = ( "https://github.com/auser/repo-2/zipball/refs/tags/v1.0-tag-and-branch" From c6731479dd3c3440eb040ed12186f4b702ff5334 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 13 Aug 2025 14:43:23 +0200 Subject: [PATCH 05/19] WIP: start working on webhook receiver, extensions --- .tx/config | 6 +- MANIFEST.in | 16 +- README.rst | 2 +- invenio_vcs/__init__.py | 6 +- invenio_vcs/api.py | 472 ------------------ .../{invenio_github => invenio_vcs}/index.js | 0 invenio_vcs/config.py | 2 +- invenio_vcs/contrib/github.py | 121 ++++- invenio_vcs/ext.py | 45 +- invenio_vcs/oauth/handlers.py | 131 ++--- invenio_vcs/oauth/remote_app.py | 20 - invenio_vcs/providers.py | 279 ++--------- invenio_vcs/proxies.py | 2 +- invenio_vcs/receivers.py | 35 +- invenio_vcs/service.py | 229 ++++++++- invenio_vcs/tasks.py | 46 +- .../{invenio_github => invenio_vcs}/base.html | 0 .../helpers.html | 2 +- .../settings/base.html | 0 .../settings/helpers.html | 2 +- .../settings/index.html | 2 +- .../settings/index_item.html | 2 +- .../settings/view.html | 4 +- invenio_vcs/views/vcs.py | 18 +- invenio_vcs/webpack.py | 2 +- setup.cfg | 6 +- tests/fixtures.py | 2 +- tests/test_api.py | 2 +- tests/test_invenio_github.py | 6 +- 29 files changed, 546 insertions(+), 914 deletions(-) delete mode 100644 invenio_vcs/api.py rename invenio_vcs/assets/semantic-ui/js/{invenio_github => invenio_vcs}/index.js (100%) delete mode 100644 invenio_vcs/oauth/remote_app.py rename invenio_vcs/templates/semantic-ui/{invenio_github => invenio_vcs}/base.html (100%) rename invenio_vcs/templates/semantic-ui/{invenio_github => invenio_vcs}/helpers.html (94%) rename invenio_vcs/templates/semantic-ui/{invenio_github => invenio_vcs}/settings/base.html (100%) rename invenio_vcs/templates/semantic-ui/{invenio_github => invenio_vcs}/settings/helpers.html (98%) rename invenio_vcs/templates/semantic-ui/{invenio_github => invenio_vcs}/settings/index.html (98%) rename invenio_vcs/templates/semantic-ui/{invenio_github => invenio_vcs}/settings/index_item.html (96%) rename invenio_vcs/templates/semantic-ui/{invenio_github => invenio_vcs}/settings/view.html (99%) diff --git a/.tx/config b/.tx/config index 80930907..4b0a93db 100644 --- a/.tx/config +++ b/.tx/config @@ -25,8 +25,8 @@ [main] host = https://app.transifex.com -[o:inveniosoftware:p:invenio:r:invenio-github-messages] -file_filter = invenio_github/translations//LC_MESSAGES/messages.po -source_file = invenio_github/translations/messages.pot +[o:inveniosoftware:p:invenio:r:invenio-vcs-messages] +file_filter = invenio_vcs/translations//LC_MESSAGES/messages.po +source_file = invenio_vcs/translations/messages.pot source_lang = en type = PO diff --git a/MANIFEST.in b/MANIFEST.in index d699aee6..b7bdb3d3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -47,12 +47,12 @@ recursive-include docs *.py recursive-include docs *.rst recursive-include docs *.txt recursive-include docs Makefile -recursive-include invenio_github *.html -recursive-include invenio_github *.js -recursive-include invenio_github *.json -recursive-include invenio_github *.less -recursive-include invenio_github *.mo -recursive-include invenio_github *.po -recursive-include invenio_github *.pot -recursive-include invenio_github *.py +recursive-include invenio_vcs *.html +recursive-include invenio_vcs *.js +recursive-include invenio_vcs *.json +recursive-include invenio_vcs *.less +recursive-include invenio_vcs *.mo +recursive-include invenio_vcs *.po +recursive-include invenio_vcs *.pot +recursive-include invenio_vcs *.py include .git-blame-ignore-revs diff --git a/README.rst b/README.rst index f993df5a..99eb1862 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ as an Intergovernmental Organization or submit itself to any jurisdiction. ================ - Invenio-GitHub + Invenio-VCS ================ .. image:: https://img.shields.io/travis/inveniosoftware/invenio-github.svg diff --git a/invenio_vcs/__init__.py b/invenio_vcs/__init__.py index 0157344c..aa52df5b 100644 --- a/invenio_vcs/__init__.py +++ b/invenio_vcs/__init__.py @@ -25,8 +25,8 @@ """Invenio module that adds GitHub integration to the platform.""" -from .ext import InvenioGitHub +from .ext import InvenioVCS -__version__ = "3.0.1" +__version__ = "4.0.0" -__all__ = ("__version__", "InvenioGitHub") +__all__ = ("__version__", "InvenioVCS") diff --git a/invenio_vcs/api.py b/invenio_vcs/api.py deleted file mode 100644 index 86eb8923..00000000 --- a/invenio_vcs/api.py +++ /dev/null @@ -1,472 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of Invenio. -# Copyright (C) 2023-2025 CERN. -# Copyright (C) 2024 KTH Royal Institute of Technology. -# -# Invenio is free software; you can redistribute it -# and/or modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the -# License, or (at your option) any later version. -# -# Invenio is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Invenio; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, -# MA 02111-1307, USA. -# -# In applying this license, CERN does not -# waive the privileges and immunities granted to it by virtue of its status -# as an Intergovernmental Organization or submit itself to any jurisdiction. - -"""Invenio module that adds GitHub integration to the platform.""" - -import json -from abc import abstractmethod -from contextlib import contextmanager -from copy import deepcopy -from urllib.parse import urlparse - -import github3 -import requests -from flask import current_app -from invenio_access.permissions import authenticated_user -from invenio_access.utils import get_identity -from invenio_db import db -from invenio_i18n import gettext as _ -from invenio_oauth2server.models import Token as ProviderToken -from invenio_oauthclient.handlers import token_getter -from invenio_oauthclient.models import RemoteAccount, RemoteToken -from invenio_oauthclient.proxies import current_oauthclient -from sqlalchemy.orm.exc import NoResultFound -from werkzeug.local import LocalProxy -from werkzeug.utils import cached_property - -from invenio_vcs.models import Release, ReleaseStatus, Repository -from invenio_vcs.proxies import current_vcs -from invenio_vcs.tasks import sync_hooks as sync_hooks_task -from invenio_vcs.utils import iso_utcnow, parse_timestamp, utcnow - -from .errors import ( - ReleaseZipballFetchError, - RemoteAccountDataNotSet, - RemoteAccountNotFound, - RepositoryAccessError, - RepositoryNotFoundError, - UnexpectedProviderResponse, -) - - -class GitHubAPI(object): - """Wrapper for GitHub API.""" - - def __init__(self, remote, user_id=None): - """Create a GitHub API object.""" - self.remote = remote - self.user_id = user_id - - @cached_property - def api(self): - """Return an authenticated GitHub API.""" - return github3.login(token=self.access_token) - - @cached_property - def access_token(self): - """Return OAuth access token's value.""" - token = RemoteToken.get(self.user_id, self.remote.consumer_key) - if not token: - # The token is not yet in DB, it is retrieved from the request session. - return self.remote.get_request_token()[0] - return token.access_token - - @property - def session_token(self): - """Return OAuth session token.""" - session_token = None - if self.user_id is not None: - session_token = token_getter(self.remote) - if session_token: - token = RemoteToken.get( - self.user_id, self.remote.consumer_key, access_token=session_token[0] - ) - return token - return None - - """Return OAuth remote application.""" - - def check_repo_access_permissions(self, repo): - """Checks permissions from user on repo. - - Repo has access if any of the following is True: - - - user is the owner of the repo - - user has access to the repo in GitHub (stored in RemoteAccount.extra_data.repos) - """ - if self.user_id and repo and repo.user_id: - user_is_owner = repo.user_id == int(self.user_id) - if user_is_owner: - return True - - if self.account and self.account.extra_data: - user_has_remote_access = self.user_available_repositories.get( - str(repo.github_id) - ) - if user_has_remote_access: - return True - - raise RepositoryAccessError( - user=self.user_id, repo=repo.name, repo_id=repo.github_id - ) - - @cached_property - def account(self): - """Return remote account.""" - return RemoteAccount.get(self.user_id, self.remote.consumer_key) - - @cached_property - def webhook_url(self): - """Return the url to be used by a GitHub webhook.""" - if not self.account.extra_data.get("tokens", {}).get("webhook"): - raise RemoteAccountDataNotSet( - self.user_id, _("Webhook data not found for user tokens (remote data).") - ) - - webhook_token = ProviderToken.query.filter_by( - id=self.account.extra_data["tokens"]["webhook"] - ).first() - if webhook_token: - wh_url = current_app.config.get("GITHUB_WEBHOOK_RECEIVER_URL") - if wh_url: - return wh_url.format(token=webhook_token.access_token) - else: - raise RuntimeError(_("You must set GITHUB_WEBHOOK_RECEIVER_URL.")) - - def init_account(self): - """Setup a new GitHub account.""" - if not self.account: - raise RemoteAccountNotFound( - self.user_id, _("Remote account was not found for user.") - ) - - ghuser = self.api.me() - # Setup local access tokens to be used by the webhooks - hook_token = ProviderToken.create_personal( - "github-webhook", - self.user_id, - scopes=["webhooks:event"], - is_internal=True, - ) - # Initial structure of extra data - self.account.extra_data = dict( - id=ghuser.id, - login=ghuser.login, - name=ghuser.name, - tokens=dict( - webhook=hook_token.id, - ), - repos=dict(), - last_sync=iso_utcnow(), - ) - db.session.add(self.account) - - def sync(self, hooks=True, async_hooks=True): - """Synchronize user repositories. - - :param bool hooks: True for syncing hooks. - :param bool async_hooks: True for sending of an asynchronous task to - sync hooks. - - .. note:: - - Syncing happens from GitHub's direction only. This means that we - consider the information on GitHub as valid, and we overwrite our - own state based on this information. - """ - github_repos = {} - for repo in self.api.repositories(): - if repo.permissions["admin"]: - github_repos[repo.id] = { - "id": repo.id, - "full_name": repo.full_name, - "description": repo.description, - "default_branch": repo.default_branch, - } - - if hooks: - self._sync_hooks(list(github_repos.keys()), asynchronous=async_hooks) - - # Update changed names for repositories stored in DB - db_repos = Repository.query.filter( - Repository.user_id == self.user_id, - ) - - for repo in db_repos: - gh_repo = github_repos.get(repo.github_id) - if gh_repo and repo.name != gh_repo["full_name"]: - repo.name = gh_repo["full_name"] - db.session.add(repo) - - # Remove ownership from repositories that the user has no longer - # 'admin' permissions, or have been deleted. - Repository.query.filter( - Repository.user_id == self.user_id, - ~Repository.github_id.in_(github_repos.keys()), - ).update({"user_id": None, "hook": None}, synchronize_session=False) - - # Update repos and last sync - self.account.extra_data.update( - dict( - repos=github_repos, - last_sync=iso_utcnow(), - ) - ) - self.account.extra_data.changed() - db.session.add(self.account) - - def _sync_hooks(self, repos, asynchronous=True): - """Check if a hooks sync task needs to be started.""" - if not asynchronous: - for repo_id in repos: - try: - self.sync_repo_hook(repo_id) - except RepositoryAccessError: - current_app.logger.warning( - str(RepositoryAccessError), exc_info=True - ) - except NoResultFound: - pass # Repository not in DB yet - else: - # If hooks will run asynchronously, we need to commit any changes done so far - db.session.commit() - sync_hooks_task.delay(self.user_id, repos) - - def _valid_webhook(self, url): - """Check if webhook url is valid. - - The webhook url is valid if it has the same host as the configured webhook url. - - :param str url: The webhook url to be checked. - :returns: True if the webhook url is valid, False otherwise. - """ - if not url: - return False - configured_host = urlparse(self.webhook_url).netloc - url_host = urlparse(url).netloc - if not (configured_host and url_host): - return False - return configured_host == url_host - - def sync_repo_hook(self, repo_id): - """Sync a GitHub repo's hook with the locally stored repo.""" - # Get the hook that we may have set in the past - gh_repo = self.api.repository_with_id(repo_id) - hooks = ( - hook - for hook in gh_repo.hooks() - if self._valid_webhook(hook.config.get("url", "")) - ) - hook = next(hooks, None) - - # If hook on GitHub exists, get or create corresponding db object and - # enable the hook. Otherwise remove the old hook information. - repo = Repository.get(repo_id, gh_repo.full_name) - - if hook: - if not repo: - repo = Repository.create(self.user_id, repo_id, gh_repo.full_name) - if not repo.enabled: - self.enable_repo(repo, hook.id) - else: - if repo: - self.disable_repo(repo) - - def check_sync(self): - """Check if sync is required based on last sync date.""" - # If refresh interval is not specified, we should refresh every time. - expiration = utcnow() - refresh_td = current_app.config.get("GITHUB_REFRESH_TIMEDELTA") - if refresh_td: - expiration -= refresh_td - last_sync = parse_timestamp(self.account.extra_data["last_sync"]) - return last_sync < expiration - - def create_hook(self, repo_id, repo_name): - """Create repository hook.""" - # Create hook - hook_config = dict( - url=self.webhook_url, - content_type="json", - secret=current_app.config["GITHUB_SHARED_SECRET"], - insecure_ssl="1" if current_app.config["GITHUB_INSECURE_SSL"] else "0", - ) - - ghrepo = self.api.repository_with_id(repo_id) - if ghrepo: - hooks = ( - h - for h in ghrepo.hooks() - if h.config.get("url", "") == hook_config["url"] - ) - hook = next(hooks, None) - - # If hook does not exist, create one. - if not hook: - hook = ghrepo.create_hook( - "web", # GitHub identifier for webhook service - hook_config, - events=["release"], - ) - else: - hook.edit(config=hook_config, events=["release"]) - - if hook: - # Get or create the repo - repo = Repository.get(github_id=repo_id, name=repo_name) - if not repo: - repo = Repository.create(self.user_id, repo_id, repo_name) - - self.enable_repo(repo, hook.id) - return True - - return False - - def remove_hook(self, repo_id, name): - """Remove repository hook.""" - repo = Repository.get(github_id=repo_id, name=name) - - if not repo: - raise RepositoryNotFoundError(repo_id) - - ghrepo = self.api.repository_with_id(repo_id) - if ghrepo: - hooks = ( - h - for h in ghrepo.hooks() - if self._valid_webhook(h.config.get("url", "")) - ) - hook = next(hooks, None) - if not hook or hook.delete(): - self.disable_repo(repo) - return True - return False - - def repo_last_published_release(self, repo): - """Retrieves the repository last release.""" - release_instance = None - release_object = repo.latest_release(ReleaseStatus.PUBLISHED) - if release_object: - release_instance = current_vcs.release_api_class(release_object) - return release_instance - - def get_repository_releases(self, repo): - """Retrieve repository releases. Returns API release objects.""" - self.check_repo_access_permissions(repo) - - # Retrieve releases and sort them by creation date - release_instances = [] - for release_object in repo.releases.order_by(Release.created): - release_instance = current_vcs.release_api_class(release_object) - release_instances.append(release_instance) - - return release_instances - - def get_user_repositories(self): - """Retrieves user repositories, containing db repositories plus remote repositories.""" - repos = deepcopy(self.user_available_repositories) - if repos: - # 'Enhance' our repos dict, from our database model - db_repos = Repository.query.filter( - Repository.github_id.in_( - [int(k) for k in self.user_available_repositories.keys()] - ) - ) - for repo in db_repos: - if str(repo.github_id) in repos: - release_instance = current_vcs.release_api_class( - repo.latest_release() - ) - repos[str(repo.github_id)]["instance"] = repo - repos[str(repo.github_id)]["latest"] = release_instance - return repos - - @property - def user_enabled_repositories(self): - """Retrieve user repositories from the model.""" - return Repository.query.filter(Repository.user_id == self.user_id) - - @property - def user_available_repositories(self): - """Retrieve user repositories from user's remote data.""" - return self.account.extra_data.get("repos", {}) - - def disable_repo(self, repo): - """Disables an user repository if the user has permission to do so.""" - self.check_repo_access_permissions(repo) - - repo.hook = None - repo.user_id = None - - def enable_repo(self, repo, hook): - """Enables an user repository if the user has permission to do so.""" - self.check_repo_access_permissions(repo) - - repo.hook = hook - repo.user_id = self.user_id - - def get_last_sync_time(self): - """Retrieves the last sync delta time from github's client extra data. - - Time is computed as the delta between now and the last sync time. - """ - if not self.account.extra_data.get("last_sync"): - raise RemoteAccountDataNotSet( - self.user_id, _("Last sync data is not set for user (remote data).") - ) - - extra_data = self.account.extra_data - return extra_data["last_sync"] - - def get_repository(self, repo_name=None, repo_github_id=None): - """Retrieves one repository. - - Checks for access permission. - """ - repo = Repository.get(name=repo_name, github_id=repo_github_id) - if not repo: - raise RepositoryNotFoundError(repo_name) - - # Might raise a RepositoryAccessError - self.check_repo_access_permissions(repo) - - return repo - - @classmethod - def _dev_api(cls): - """Get a developer instance for GitHub API access.""" - gh = github3.GitHub() - gh.set_client_id(cls.remote.consumer_key, cls.remote.consumer_secret) - return gh - - @classmethod - def check_token(cls, token): - """Check if an access token is authorized.""" - gh_api = cls._dev_api() - client_id, client_secret = gh_api.session.retrieve_client_credentials() - url = gh_api._build_url("applications", str(client_id), "token") - with gh_api.session.temporary_basic_auth(client_id, client_secret): - response = gh_api._post(url, data={"access_token": token}) - return response.status_code == 200 - - @classmethod - def revoke_token(cls, token): - """Revoke an access token.""" - gh_api = cls._dev_api() - client_id, client_secret = gh_api.session.retrieve_client_credentials() - url = gh_api._build_url("applications", str(client_id), "token") - with gh_api.session.temporary_basic_auth(client_id, client_secret): - response = gh_api._delete(url, data=json.dumps({"access_token": token})) - return response diff --git a/invenio_vcs/assets/semantic-ui/js/invenio_github/index.js b/invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js similarity index 100% rename from invenio_vcs/assets/semantic-ui/js/invenio_github/index.js rename to invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js diff --git a/invenio_vcs/config.py b/invenio_vcs/config.py index 2178f301..23e8c70d 100644 --- a/invenio_vcs/config.py +++ b/invenio_vcs/config.py @@ -87,5 +87,5 @@ GITHUB_CITATION_METADATA_SCHEMA = None """Citation metadata schema.""" -GITHUB_ZIPBALL_TIMEOUT = 300 +VCS_ZIPBALL_TIMEOUT = 300 """Timeout for the zipball download, in seconds.""" diff --git a/invenio_vcs/contrib/github.py b/invenio_vcs/contrib/github.py index 2d9428e0..0306e39e 100644 --- a/invenio_vcs/contrib/github.py +++ b/invenio_vcs/contrib/github.py @@ -1,15 +1,20 @@ +import json from collections import defaultdict +from datetime import datetime import github3 +import requests +from flask import current_app from github3.repos import ShortRepository from invenio_i18n import gettext as _ from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper from werkzeug.utils import cached_property -from invenio_vcs.errors import UnexpectedProviderResponse +from invenio_vcs.errors import ReleaseZipballFetchError, UnexpectedProviderResponse from invenio_vcs.oauth.handlers import account_setup_handler, disconnect_handler from invenio_vcs.providers import ( GenericContributor, + GenericRelease, GenericRepository, GenericUser, GenericWebhook, @@ -47,8 +52,10 @@ def remote_config(self): base_url=self.config["base_url"], app_key=self.config["credentials_key"] ) github_app = helper.remote_app - github_app["disconnect_handler"] = disconnect_handler - github_app["signup_handler"]["setup"] = account_setup_handler + github_app["disconnect_handler"] = self.oauth_handlers.disconnect_handler + github_app["signup_handler"][ + "setup" + ] = self.oauth_handlers.account_setup_handler github_app["params"]["request_token_params"] = request_token_params return github_app @@ -77,6 +84,34 @@ def icon(self): def config(self): return self._config + def webhook_is_create_release_event(self, event_payload): + action = event_payload.get("action") + is_draft_release = event_payload.get("release", {}).get("draft") + + # Draft releases do not create releases on invenio + is_create_release_event = ( + action in ("published", "released", "created") and not is_draft_release + ) + return is_create_release_event + + def webhook_event_to_generic(self, event_payload): + release = GenericRelease( + str(event_payload["release"]["id"]), + event_payload["release"]["name"], + event_payload["release"]["tag_name"], + event_payload["release"]["tarball_url"], + event_payload["release"]["zipball_url"], + datetime.fromisoformat(event_payload["release"]["created_at"]), + ) + repo = GenericRepository( + str(event_payload["repository"]["id"]), + event_payload["repository"]["full_name"], + event_payload["repository"]["description"], + event_payload["repository"]["default_branch"], + ) + + return (release, repo) + class GitHubProvider(RepositoryServiceProvider): @cached_property @@ -155,7 +190,7 @@ def create_webhook(self, repository_id): return True - def delete_webhook(self, repository_id): + def delete_webhook(self, repository_id, hook_id=None): assert repository_id.isdigit() if self._gh is None: return False @@ -164,10 +199,16 @@ def delete_webhook(self, repository_id): if repo is None: return False - hooks = ( - h for h in repo.hooks() if self.is_valid_webhook(h.config.get("url", "")) - ) - hook = next(hooks, None) + if hook_id is not None: + hook = repo.hook(hook_id) + else: + hooks = ( + h + for h in repo.hooks() + if self.is_valid_webhook(h.config.get("url", "")) + ) + hook = next(hooks, None) + if not hook or hook.delete(): return True return False @@ -226,3 +267,67 @@ def get_repository_owner(self, repository_id): return None return GenericUser(repo.owner.id, repo.owner.login, repo.owner.full_name) + + def resolve_release_zipball_url(self, release_zipball_url): + if self._gh is None: + return None + + url = release_zipball_url + + # Execute a HEAD request to the zipball url to test if it is accessible. + response = self._gh.session.head(url, allow_redirects=True) + + # In case where there is a tag and branch with the same name, we might get back + # a "300 Multiple Choices" response, which requires fetching an "alternate" + # link. + if response.status_code == 300: + alternate_url = response.links.get("alternate", {}).get("url") + if alternate_url: + url = alternate_url # Use the alternate URL + response = self._gh.session.head(url, allow_redirects=True) + + # Another edge-case, is when the access token we have does not have the + # scopes/permissions to access public links. In that rare case we fallback to a + # non-authenticated request. + if response.status_code == 404: + current_app.logger.warning( + "GitHub zipball URL {url} not found, trying unauthenticated request.", + extra={"url": response.url}, + ) + response = requests.head(url, allow_redirects=True) + # If this response is successful we want to use the finally resolved URL to + # fetch the ZIP from. + if response.status_code == 200: + return response.url + + if response.status_code != 200: + raise ReleaseZipballFetchError() + + return response.url + + def fetch_release_zipball(self, release_zipball_url, timeout): + with self._gh.session.get( + release_zipball_url, stream=True, timeout=timeout + ) as resp: + yield resp.raw + + def retrieve_remote_file(self, repository_id, tag_name, file_name): + assert repository_id.isdigit() + if self._gh is None: + return None + + try: + return self._gh.repository_with_id(repository_id).file_contents( + path=file_name, ref=tag_name + ) + except github3.exceptions.NotFoundError: + return None + + def revoke_token(self, access_token): + client_id, client_secret = self._gh.session.retrieve_client_credentials() + url = self._gh._build_url("applications", str(client_id), "token") + with self._gh.session.temporary_basic_auth(client_id, client_secret): + response = self._gh._delete( + url, data=json.dumps({"access_token": access_token}) + ) + return response diff --git a/invenio_vcs/ext.py b/invenio_vcs/ext.py index c6e6660b..c93cd11f 100644 --- a/invenio_vcs/ext.py +++ b/invenio_vcs/ext.py @@ -33,13 +33,15 @@ from six import string_types from werkzeug.utils import cached_property, import_string -from invenio_vcs.providers import VCSRelease, get_provider_list +from invenio_vcs.providers import get_provider_list +from invenio_vcs.receivers import VCSReceiver +from invenio_vcs.service import VCSRelease from invenio_vcs.utils import obj_or_import_string from . import config -class InvenioGitHub(object): +class InvenioVCS(object): """Invenio-GitHub extension.""" def __init__(self, app=None): @@ -84,23 +86,32 @@ def init_config(self, app): def finalize_app(app): """Finalize app.""" - init_menu(app) + if app.config.get("VCS_INTEGRATION_ENABLED", False): + init_menu(app) + init_webhooks(app) def init_menu(app): """Init menu.""" - if app.config.get("VCS_INTEGRATION_ENABLED", False): - for provider in get_provider_list(): - current_menu.submenu(f"settings.{provider.id}").register( - endpoint="invenio_vcs.get_repositories", - endpoint_arguments_constructor=lambda: {"provider": provider.id}, - text=_( - "%(icon)s $(provider)", - icon=LazyString( - lambda: f'' - ), - provider=LazyString(lambda: provider.name), + for provider in get_provider_list(app): + current_menu.submenu(f"settings.{provider.id}").register( + endpoint="invenio_vcs.get_repositories", + endpoint_arguments_constructor=lambda: {"provider": provider.id}, + text=_( + "%(icon)s $(provider)", + icon=LazyString( + lambda: f'' ), - order=10, - active_when=lambda: request.endpoint.startswith("invenio_vcs."), - ) + provider=LazyString(lambda: provider.name), + ), + order=10, + active_when=lambda: request.endpoint.startswith("invenio_vcs."), + ) + + +def init_webhooks(app): + for provider in get_provider_list(app): + # Procedurally register the webhook receivers instead of including them as an entry point, since + # they are defined in the VCS provider config list rather than in the instance's setup.cfg file. + # TODO: is this an okay thing to do? It reduces duplication and work for instance maintainers but is a little unusual + app.extensions["invenio-webhooks"].register(provider.id, VCSReceiver) diff --git a/invenio_vcs/oauth/handlers.py b/invenio_vcs/oauth/handlers.py index 483b865a..c9009ca6 100644 --- a/invenio_vcs/oauth/handlers.py +++ b/invenio_vcs/oauth/handlers.py @@ -22,66 +22,83 @@ """Implement OAuth client handler.""" +import typing + from flask import current_app, redirect, url_for from flask_login import current_user from invenio_db import db from invenio_oauth2server.models import Token as ProviderToken from invenio_oauthclient import oauth_unlink_external_id -from invenio_vcs.api import GitHubAPI -from invenio_vcs.tasks import disconnect_github - - -def account_setup_handler(remote, token, resp): - """Perform post initialization.""" - try: - gh = GitHubAPI(user_id=token.remote_account.user_id) - gh.init_account() - gh.sync() - db.session.commit() - except Exception as e: - current_app.logger.warning(str(e), exc_info=True) - - -def disconnect_handler(remote): - """Disconnect callback handler for GitHub.""" - # User must be authenticated - if not current_user.is_authenticated: - return current_app.login_manager.unauthorized() - - external_method = "github" - external_ids = [ - i.id for i in current_user.external_identifiers if i.method == external_method - ] - if external_ids: - oauth_unlink_external_id(dict(id=external_ids[0], method=external_method)) - - github = GitHubAPI(user_id=int(current_user.id)) - token = github.session_token - - if token: - extra_data = token.remote_account.extra_data - - # Delete the token that we issued for GitHub to deliver webhooks - webhook_token_id = extra_data.get("tokens", {}).get("webhook") - ProviderToken.query.filter_by(id=webhook_token_id).delete() - - # Disable every GitHub webhooks from our side - repos = github.user_enabled_repositories.all() - repos_with_hooks = [] - for repo in repos: - if repo.hook: - repos_with_hooks.append((repo.github_id, repo.hook)) - github.disable_repo(repo) - - # Commit any changes before running the ascynhronous task - db.session.commit() - - # Send Celery task for webhooks removal and token revocation - disconnect_github.delay(token.access_token, repos_with_hooks) - - # Delete the RemoteAccount (along with the associated RemoteToken) - token.remote_account.delete() - db.session.commit() - - return redirect(url_for("invenio_oauthclient_settings.index")) +from invenio_vcs.service import VCSService +from invenio_vcs.tasks import disconnect_provider + +if typing.TYPE_CHECKING: + from invenio_vcs.providers import RepositoryServiceProviderFactory + + +class OAuthHandlers: + def __init__(self, provider_factory: "RepositoryServiceProviderFactory") -> None: + self.provider_factory = provider_factory + + def account_setup_handler(self, remote, token, resp): + """Perform post initialization.""" + try: + svc = VCSService( + self.provider_factory.for_user(token.remote_account.user_id) + ) + svc.init_account() + svc.sync() + db.session.commit() + except Exception as e: + current_app.logger.warning(str(e), exc_info=True) + + def disconnect_handler(self, remote): + """Disconnect callback handler for GitHub.""" + # User must be authenticated + if not current_user.is_authenticated: + return current_app.login_manager.unauthorized() + + external_method = self.provider_factory.id + external_ids = [ + i.id + for i in current_user.external_identifiers + if i.method == external_method + ] + if external_ids: + oauth_unlink_external_id(dict(id=external_ids[0], method=external_method)) + + svc = VCSService(self.provider_factory.for_user(current_user.id)) + token = svc.provider.session_token + + if token: + extra_data = token.remote_account.extra_data + + # Delete the token that we issued for GitHub to deliver webhooks + webhook_token_id = extra_data.get("tokens", {}).get("webhook") + ProviderToken.query.filter_by(id=webhook_token_id).delete() + + # Disable every GitHub webhooks from our side + repos = svc.user_enabled_repositories.all() + repos_with_hooks = [] + for repo in repos: + if repo.hook: + repos_with_hooks.append((repo.provider_id, repo.hook)) + svc.disable_repository(repo.id) + + # Commit any changes before running the ascynhronous task + db.session.commit() + + # Send Celery task for webhooks removal and token revocation + disconnect_provider.delay( + self.provider_factory.id, + current_user.id, + token.access_token, + repos_with_hooks, + ) + + # Delete the RemoteAccount (along with the associated RemoteToken) + token.remote_account.delete() + db.session.commit() + + return redirect(url_for("invenio_oauthclient_settings.index")) diff --git a/invenio_vcs/oauth/remote_app.py b/invenio_vcs/oauth/remote_app.py deleted file mode 100644 index d30347b0..00000000 --- a/invenio_vcs/oauth/remote_app.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2023 CERN. -# -# Invenio-Github is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. -"""Github oauth app implementation for github integration.""" - - -from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper - -from invenio_vcs.oauth.handlers import account_setup_handler, disconnect_handler - -request_token_params = {"scope": "read:user,user:email,admin:repo_hook,read:org"} - -helper = GitHubOAuthSettingsHelper() -github_app = helper.remote_app -github_app["disconnect_handler"] = disconnect_handler -github_app["signup_handler"]["setup"] = account_setup_handler -github_app["params"]["request_token_params"] = request_token_params diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py index aba6f806..92c415a1 100644 --- a/invenio_vcs/providers.py +++ b/invenio_vcs/providers.py @@ -4,8 +4,6 @@ from urllib.parse import urlparse from flask import current_app -from invenio_access.permissions import authenticated_user -from invenio_access.utils import get_identity from invenio_i18n import gettext as _ from invenio_oauth2server.models import Token as ProviderToken from invenio_oauthclient import current_oauthclient @@ -15,7 +13,7 @@ from werkzeug.utils import cached_property from invenio_vcs.errors import RemoteAccountDataNotSet -from invenio_vcs.models import Repository +from invenio_vcs.oauth.handlers import OAuthHandlers @dataclass @@ -70,6 +68,10 @@ def __init__( def remote_config(self): raise NotImplementedError + @property + def oauth_handlers(self): + return OAuthHandlers(self) + @cached_property def remote(self): return LocalProxy(lambda: current_oauthclient.oauth.remote_apps[self.id]) @@ -104,28 +106,40 @@ def icon(self) -> str: def config(self) -> dict: raise NotImplementedError + @abstractmethod + def webhook_is_create_release_event(self, event_payload): + raise NotImplementedError + + @abstractmethod + def webhook_event_to_generic(self, event_payload): + raise NotImplementedError + def for_user(self, user_id: str): return self.provider(self, user_id) + def for_access_token(self, user_id: str, access_token: str): + return self.provider(self, user_id, access_token=access_token) + class RepositoryServiceProvider(ABC): - def __init__(self, factory: RepositoryServiceProviderFactory, user_id: str) -> None: + def __init__( + self, factory: RepositoryServiceProviderFactory, user_id: str, access_token=None + ) -> None: self.factory = factory self.user_id = user_id + self._access_token = access_token @cached_property def remote_account(self): """Return remote account.""" return RemoteAccount.get(self.user_id, self.factory.remote.consumer_key) - @cached_property - def user_available_repositories(self): - """Retrieve user repositories from user's remote data.""" - return self.remote_account.extra_data.get("repos", {}) - @cached_property def access_token(self): """Return OAuth access token's value.""" + if self._access_token is not None: + return self._access_token + token = RemoteToken.get(self.user_id, self.factory.remote.consumer_key) if not token: # The token is not yet in DB, it is retrieved from the request session. @@ -209,248 +223,37 @@ def create_webhook(self, repository_id): raise NotImplementedError @abstractmethod - def delete_webhook(self, repository_id): + def delete_webhook(self, repository_id, hook_id=None): raise NotImplementedError @abstractmethod def get_own_user(self): raise NotImplementedError - -def get_provider_list() -> list[RepositoryServiceProviderFactory]: - return current_app.config["VCS_PROVIDERS"] - - -def get_provider_by_id(id: str) -> RepositoryServiceProviderFactory: - providers = get_provider_list() - for provider in providers: - if id == provider.id: - return provider - raise Exception(f"VCS provider with ID {id} not registered") - - -class VCSRelease: - """A GitHub release.""" - - def __init__(self, release, provider: RepositoryServiceProvider): - """Constructor.""" - self.db_release = release - self.provider = provider - self._resolved_zipball_url = None - - @cached_property - def record(self): - """Release record.""" - return self.resolve_record() - - @cached_property - def event(self): - """Get release event.""" - return self.db_release.event - - @cached_property - def payload(self): - """Return event payload.""" - return self.event.payload - - @cached_property - def release_payload(self): - """Return release metadata.""" - return self.payload["release"] - - @cached_property - def repository_payload(self): - """Return repository metadata.""" - return self.payload["repository"] - - @cached_property - def repository_object(self): - """Return repository model from database.""" - if self.db_release.repository_id: - repository = self.db_release.repository - else: - repository = Repository.query.filter_by( - user_id=self.event.user_id, provider_id=self.provider.factory.id - ).one() - return repository - - @cached_property - def release_file_name(self): - """Returns release zipball file name.""" - tag_name = self.release_payload["tag_name"] - repo_name = self.repository_payload["full_name"] - filename = f"{repo_name}-{tag_name}.zip" - return filename - - @cached_property - def release_zipball_url(self): - """Returns the release zipball URL.""" - return self.release_payload["zipball_url"] - - @cached_property - def user_identity(self): - """Generates release owner's user identity.""" - identity = get_identity(self.repository_object.user) - identity.provides.add(authenticated_user) - identity.user = self.repository_object.user - return identity - - @cached_property - def contributors(self): - """Get list of contributors to a repository. - - The list of contributors is fetched from Github API, filtered for type "User" and sorted by contributions. - - :returns: a generator of objects that contains contributors information. - :raises UnexpectedGithubResponse: when Github API returns a status code other than 200. - """ - max_contributors = current_app.config.get("VCS_MAX_CONTRIBUTORS_NUMBER", 30) - return self.provider.list_repository_contributors( - self.repository_object.id, max=max_contributors - ) - - @cached_property - def owner(self): - """Get owner of repository as a creator.""" - try: - owner = self.gh.api.repository_with_id( - self.repository_object.github_id - ).owner - return owner - except Exception: - return None - - # Helper functions - - def is_first_release(self): - """Checks whether the current release is the first release of the repository.""" - latest_release = self.repository_object.latest_release(ReleaseStatus.PUBLISHED) - return True if not latest_release else False - - def test_zipball(self): - """Test if the zipball URL is accessible and return the resolved URL.""" - return self.resolve_zipball_url() - - def resolve_zipball_url(self, cache=True): - """Resolve the zipball URL. - - This method will try to resolve the zipball URL by making a HEAD request, - handling the following edge cases: - - - In the case of a 300 Multiple Choices response, which can happen when a tag - and branch have the same name, it will try to fetch an "alternate" link. - - If the access token does not have the required scopes/permissions to access - public links, it will fallback to a non-authenticated request. - """ - if self._resolved_zipball_url and cache: - return self._resolved_zipball_url - - url = self.release_zipball_url - - # Execute a HEAD request to the zipball url to test if it is accessible. - response = self.gh.api.session.head(url, allow_redirects=True) - - # In case where there is a tag and branch with the same name, we might get back - # a "300 Multiple Choices" response, which requires fetching an "alternate" - # link. - if response.status_code == 300: - alternate_url = response.links.get("alternate", {}).get("url") - if alternate_url: - url = alternate_url # Use the alternate URL - response = self.gh.api.session.head(url, allow_redirects=True) - - # Another edge-case, is when the access token we have does not have the - # scopes/permissions to access public links. In that rare case we fallback to a - # non-authenticated request. - if response.status_code == 404: - current_app.logger.warning( - "GitHub zipball URL {url} not found, trying unauthenticated request.", - extra={"url": response.url}, - ) - response = requests.head(url, allow_redirects=True) - # If this response is successful we want to use the finally resolved URL to - # fetch the ZIP from. - if response.status_code == 200: - return response.url - - if response.status_code != 200: - raise ReleaseZipballFetchError() - - if cache: - self._resolved_zipball_url = response.url - - return response.url - - # High level API - - def release_failed(self): - """Set release status to FAILED.""" - self.db_release.status = ReleaseStatus.FAILED - - def release_processing(self): - """Set release status to PROCESSING.""" - self.db_release.status = ReleaseStatus.PROCESSING - - def release_published(self): - """Set release status to PUBLISHED.""" - self.db_release.status = ReleaseStatus.PUBLISHED - - def retrieve_remote_file(self, file_name): - """Retrieves a file from the repository, for the current release, using the github client. - - :param file_name: the name of the file to be retrieved from the repository. - :returns: the file contents or None, if the file if not fetched. - """ - gh_repo_owner = self.repository_payload["owner"]["login"] - gh_repo_name = self.repository_payload["name"] - gh_tag_name = self.release_payload["tag_name"] - try: - content = self.gh.api.repository(gh_repo_owner, gh_repo_name).file_contents( - path=file_name, ref=gh_tag_name - ) - except github3.exceptions.NotFoundError: - # github3 raises a github3.exceptions.NotFoundError if the file is not found - return None - return content - - @contextmanager - def fetch_zipball_file(self): - """Fetch release zipball file using the current github session.""" - session = self.gh.api.session - timeout = current_app.config.get("GITHUB_ZIPBALL_TIMEOUT", 300) - zipball_url = self.resolve_zipball_url() - with session.get(zipball_url, stream=True, timeout=timeout) as resp: - yield resp.raw - - def publish(self): - """Publish a GitHub release.""" + @abstractmethod + def resolve_release_zipball_url(self, release_zipball_url): raise NotImplementedError - def process_release(self): - """Processes a github release.""" + @abstractmethod + def fetch_release_zipball(self, release_zipball_url, timeout): raise NotImplementedError - def resolve_record(self): - """Resolves a record from the release. To be implemented by the API class implementation.""" + @abstractmethod + def retrieve_remote_file(self, repository_id, tag_name, file_name): raise NotImplementedError - def serialize_record(self): - """Serializes the release record.""" + @abstractmethod + def revoke_token(self, access_token): raise NotImplementedError - @property - @abstractmethod - def badge_title(self): - """Stores a string to render in the record badge title (e.g. 'DOI').""" - return None - @property - @abstractmethod - def badge_value(self): - """Stores a string to render in the record badge value (e.g. '10.1234/invenio.1234').""" - raise NotImplementedError +def get_provider_list(app=current_app) -> list[RepositoryServiceProviderFactory]: + return app.config["VCS_PROVIDERS"] - @property - def record_url(self): - """Release self url (e.g. github HTML url).""" - raise NotImplementedError + +def get_provider_by_id(id: str) -> RepositoryServiceProviderFactory: + providers = get_provider_list() + for provider in providers: + if id == provider.id: + return provider + raise Exception(f"VCS provider with ID {id} not registered") diff --git a/invenio_vcs/proxies.py b/invenio_vcs/proxies.py index 6fe7d73c..7dc9f40d 100644 --- a/invenio_vcs/proxies.py +++ b/invenio_vcs/proxies.py @@ -27,4 +27,4 @@ from flask import current_app from werkzeug.local import LocalProxy -current_vcs = LocalProxy(lambda: current_app.extensions["invenio-github"]) +current_vcs = LocalProxy(lambda: current_app.extensions["invenio-vcs"]) diff --git a/invenio_vcs/receivers.py b/invenio_vcs/receivers.py index 4f0ab426..dd455031 100644 --- a/invenio_vcs/receivers.py +++ b/invenio_vcs/receivers.py @@ -26,6 +26,7 @@ from invenio_webhooks.models import Receiver from invenio_vcs.models import Release, ReleaseStatus, Repository +from invenio_vcs.providers import get_provider_by_id from invenio_vcs.tasks import process_release from .errors import ( @@ -37,9 +38,13 @@ ) -class GitHubReceiver(Receiver): +class VCSReceiver(Receiver): """Handle incoming notification from GitHub on a new release.""" + def __init__(self, receiver_id): + super().__init__(receiver_id) + self.provider_factory = get_provider_by_id(receiver_id) + def run(self, event): """Process an event. @@ -53,43 +58,37 @@ def run(self, event): def _handle_event(self, event): """Handles an incoming github event.""" - action = event.payload.get("action") - is_draft_release = event.payload.get("release", {}).get("draft") - - # Draft releases do not create releases on invenio - is_create_release_event = ( - action in ("published", "released", "created") and not is_draft_release + is_create_release_event = self.provider_factory.webhook_is_create_release_event( + event.payload ) if is_create_release_event: self._handle_create_release(event) - else: - pass def _handle_create_release(self, event): """Creates a release in invenio.""" try: - release_id = event.payload["release"]["id"] + generic_release, generic_repo = ( + self.provider_factory.webhook_event_to_generic(event.payload) + ) # Check if the release already exists existing_release = Release.query.filter_by( - release_id=release_id, + provider_id=generic_release.id, ).first() if existing_release: raise ReleaseAlreadyReceivedError(release=existing_release) # Create the Release - repo_id = event.payload["repository"]["id"] - repo_name = event.payload["repository"]["name"] - repo = Repository.get(repo_id, repo_name) + repo = Repository.get(generic_repo.id, generic_repo.full_name) if not repo: - raise RepositoryNotFoundError(repo_name) + raise RepositoryNotFoundError(generic_repo.full_name) if repo.enabled: release = Release( - release_id=release_id, - tag=event.payload["release"]["tag_name"], + provider_id=generic_release.id, + tag=generic_release.tag_name, repository=repo, event=event, status=ReleaseStatus.RECEIVED, @@ -101,7 +100,7 @@ def _handle_create_release(self, event): # Process the release # Since 'process_release' is executed asynchronously, we commit the current state of session db.session.commit() - process_release.delay(release.release_id) + process_release.delay(release.provider_id) except (ReleaseAlreadyReceivedError, RepositoryDisabledError) as e: event.response_code = 409 diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index 6825c092..c6ccecc2 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -1,6 +1,9 @@ +from contextlib import contextmanager from copy import deepcopy from flask import current_app +from invenio_access.permissions import authenticated_user +from invenio_access.utils import get_identity from invenio_db import db from invenio_i18n import gettext as _ from invenio_oauth2server.models import Token as ProviderToken @@ -14,23 +17,43 @@ RepositoryNotFoundError, ) from invenio_vcs.models import Release, ReleaseStatus, Repository -from invenio_vcs.providers import get_provider_by_id +from invenio_vcs.providers import RepositoryServiceProvider, get_provider_by_id from invenio_vcs.proxies import current_vcs from invenio_vcs.tasks import sync_hooks as sync_hooks_task from invenio_vcs.utils import iso_utcnow -class VersionControlService: - def __init__(self, provider: str, user_id: str) -> None: - self.provider = get_provider_by_id(provider).for_user(user_id) +class VCSService: + def __init__(self, provider: RepositoryServiceProvider) -> None: + self.provider = provider + + @staticmethod + def for_provider_and_user(provider_id: str, user_id: str): + return VCSService(get_provider_by_id(provider_id).for_user(user_id)) + + @staticmethod + def for_provider_and_token(provider_id: str, user_id: str, access_token: str): + return VCSService( + get_provider_by_id(provider_id).for_access_token(user_id, access_token) + ) @cached_property def is_authenticated(self): return self.provider.session_token is not None + @property + def user_available_repositories(self): + """Retrieve user repositories from user's remote data.""" + return self.provider.remote_account.extra_data.get("repos", {}) + + @property + def user_enabled_repositories(self): + """Retrieve user repositories from the model.""" + return Repository.query.filter(Repository.user_id == self.provider.user_id) + def list_repositories(self): """Retrieves user repositories, containing db repositories plus remote repositories.""" - vcs_repos = deepcopy(self.provider.user_available_repositories) + vcs_repos = deepcopy(self.user_available_repositories) if vcs_repos: # 'Enhance' our repos dict, from our database model db_repos = Repository.query.filter( @@ -67,10 +90,8 @@ def list_repo_releases(self, repo): return release_instances def get_repo_default_branch(self, repo_id): - return ( - self.provider.remote_account.extra_data.get("repos", {}) - .get(repo_id, None) - .get("default_branch", None) + return self.user_available_repositories.get(repo_id, None).get( + "default_branch", None ) def get_last_sync_time(self): @@ -115,7 +136,7 @@ def check_repo_access_permissions(self, repo): return True if self.provider.remote_account and self.provider.remote_account.extra_data: - user_has_remote_access = self.provider.user_available_repositories.get( + user_has_remote_access = self.user_available_repositories.get( repo.provider_id ) if user_has_remote_access: @@ -254,23 +275,193 @@ def init_account(self): db.session.add(self.provider.remote_account) def enable_repository(self, repository_id): - repos = self.provider.remote_account.extra_data.get("repos", {}) - if repository_id not in repos: + if repository_id not in self.user_available_repositories: raise RepositoryNotFoundError( repository_id, _("Failed to enable repository.") ) return self.provider.create_webhook(repository_id) - def disable_repository(self, repository_id): - repos = self.provider.remote_account.extra_data.get("repos", {}) - if repository_id not in repos: + def disable_repository(self, repository_id, hook_id=None): + if hook_id is None and repository_id not in self.user_available_repositories: raise RepositoryNotFoundError( repository_id, _("Failed to disable repository.") ) - remove_success = False - if repos: - remove_success = self.provider.delete_webhook(repository_id) + return self.provider.delete_webhook(repository_id, hook_id) + + +class VCSRelease: + """A GitHub release.""" + + def __init__(self, release, provider: RepositoryServiceProvider): + """Constructor.""" + self.db_release = release + self.provider = provider + self._resolved_zipball_url = None + + @cached_property + def record(self): + """Release record.""" + return self.resolve_record() + + @cached_property + def event(self): + """Get release event.""" + return self.db_release.event + + @cached_property + def payload(self): + """Return event payload.""" + return self.event.payload + + @cached_property + def generic_release(self): + """Return release metadata.""" + return self.provider.factory.webhook_event_to_generic(self.payload)[0] + + @cached_property + def generic_repo(self): + """Return repo metadata.""" + return self.provider.factory.webhook_event_to_generic(self.payload)[1] + + @cached_property + def db_repo(self): + """Return repository model from database.""" + if self.db_release.repository_id: + repository = self.db_release.repository + else: + repository = Repository.query.filter_by( + user_id=self.event.user_id, provider_id=self.provider.factory.id + ).one() + return repository + + @cached_property + def release_file_name(self): + """Returns release zipball file name.""" + tag_name = self.generic_release.tag_name + repo_name = self.generic_repo.full_name + filename = f"{repo_name}-{tag_name}.zip" + return filename + + @cached_property + def release_zipball_url(self): + """Returns the release zipball URL.""" + return self.generic_release["zipball_url"] + + @cached_property + def user_identity(self): + """Generates release owner's user identity.""" + identity = get_identity(self.db_repo.user) + identity.provides.add(authenticated_user) + identity.user = self.db_repo.user + return identity - return remove_success + @cached_property + def contributors(self): + """Get list of contributors to a repository. + + The list of contributors is fetched from Github API, filtered for type "User" and sorted by contributions. + + :returns: a generator of objects that contains contributors information. + :raises UnexpectedGithubResponse: when Github API returns a status code other than 200. + """ + max_contributors = current_app.config.get("VCS_MAX_CONTRIBUTORS_NUMBER", 30) + return self.provider.list_repository_contributors( + self.db_repo.id, max=max_contributors + ) + + @cached_property + def owner(self): + """Get owner of repository as a creator.""" + try: + return self.provider.get_repository_owner(self.db_repo.id) + except Exception: + return None + + # Helper functions + + def is_first_release(self): + """Checks whether the current release is the first release of the repository.""" + latest_release = self.db_repo.latest_release(ReleaseStatus.PUBLISHED) + return True if not latest_release else False + + def test_zipball(self): + """Test if the zipball URL is accessible and return the resolved URL.""" + return self.resolve_zipball_url() + + def resolve_zipball_url(self, cache=True): + """Resolve the zipball URL. + + This method will try to resolve the zipball URL by making a HEAD request, + handling the following edge cases: + + - In the case of a 300 Multiple Choices response, which can happen when a tag + and branch have the same name, it will try to fetch an "alternate" link. + - If the access token does not have the required scopes/permissions to access + public links, it will fallback to a non-authenticated request. + """ + if self._resolved_zipball_url and cache: + return self._resolved_zipball_url + + url = self.release_zipball_url + url = self.provider.resolve_release_zipball_url(url) + + if cache: + self._resolved_zipball_url = url + + return url + + # High level API + + def release_failed(self): + """Set release status to FAILED.""" + self.db_release.status = ReleaseStatus.FAILED + + def release_processing(self): + """Set release status to PROCESSING.""" + self.db_release.status = ReleaseStatus.PROCESSING + + def release_published(self): + """Set release status to PUBLISHED.""" + self.db_release.status = ReleaseStatus.PUBLISHED + + @contextmanager + def fetch_zipball_file(self): + """Fetch release zipball file using the current github session.""" + timeout = current_app.config.get("VCS_ZIPBALL_TIMEOUT", 300) + zipball_url = self.resolve_zipball_url() + return self.provider.fetch_release_zipball(zipball_url, timeout) + + def publish(self): + """Publish a GitHub release.""" + raise NotImplementedError + + def process_release(self): + """Processes a github release.""" + raise NotImplementedError + + def resolve_record(self): + """Resolves a record from the release. To be implemented by the API class implementation.""" + raise NotImplementedError + + def serialize_record(self): + """Serializes the release record.""" + raise NotImplementedError + + @property + @abstractmethod + def badge_title(self): + """Stores a string to render in the record badge title (e.g. 'DOI').""" + return None + + @property + @abstractmethod + def badge_value(self): + """Stores a string to render in the record badge value (e.g. '10.1234/invenio.1234').""" + raise NotImplementedError + + @property + def record_url(self): + """Release self url (e.g. github HTML url).""" + raise NotImplementedError diff --git a/invenio_vcs/tasks.py b/invenio_vcs/tasks.py index 9accc249..e572ab11 100644 --- a/invenio_vcs/tasks.py +++ b/invenio_vcs/tasks.py @@ -25,7 +25,6 @@ import datetime -import github3 from celery import shared_task from flask import current_app, g from invenio_db import db @@ -35,6 +34,7 @@ from invenio_vcs.errors import CustomGitHubMetadataError, RepositoryAccessError from invenio_vcs.models import Release, ReleaseStatus +from invenio_vcs.providers import get_provider_by_id from invenio_vcs.proxies import current_vcs @@ -65,44 +65,43 @@ def release_default_exception_handler(release, ex): @shared_task(max_retries=6, default_retry_delay=10 * 60, rate_limit="100/m") -def disconnect_github(access_token, repo_hooks): +def disconnect_provider(provider_id, user_id, access_token, repo_hooks): """Uninstall webhooks.""" # Note at this point the remote account and all associated data have # already been deleted. The celery task is passed the access_token to make # some last cleanup and afterwards delete itself remotely. # Local import to avoid circular imports - from .api import GitHubAPI + from .service import VCSService try: # Create a nested transaction to make sure that hook deletion + token revoke is atomic with db.session.begin_nested(): - gh = github3.login(token=access_token) + svc = VCSService.for_provider_and_token(provider_id, user_id, access_token) + for repo_id, repo_hook in repo_hooks: - ghrepo = gh.repository_with_id(repo_id) - if ghrepo: - hook = ghrepo.hook(repo_hook) - if hook and hook.delete(): - current_app.logger.info( - _("Deleted hook from github repository."), - extra={"hook": hook.id, "repo": ghrepo.full_name}, - ) + if svc.disable_repository(repo_id, repo_hook): + current_app.logger.info( + _("Deleted hook from github repository."), + extra={"hook": repo_hook, "repo": repo_id}, + ) + # If we finished our clean-up successfully, we can revoke the token - GitHubAPI.revoke_token(access_token) + svc.provider.revoke_token(access_token) except Exception as exc: # Retry in case GitHub may be down... - disconnect_github.retry(exc=exc) + disconnect_provider.retry(exc=exc) @shared_task(max_retries=6, default_retry_delay=10 * 60, rate_limit="100/m") def sync_hooks(provider, user_id, repositories): """Sync repository hooks for a user.""" # Local import to avoid circular imports - from .service import VersionControlService + from .service import VCSService try: # Sync hooks - svc = VersionControlService(provider, user_id) + svc = VCSService.for_provider_and_user(provider, user_id) for repo_id in repositories: try: with db.session.begin_nested(): @@ -118,14 +117,17 @@ def sync_hooks(provider, user_id, repositories): @shared_task(ignore_result=True, max_retries=5, default_retry_delay=10 * 60) -def process_release(release_id): +def process_release(provider_id, release_id): """Process a received Release.""" release_model = Release.query.filter( - Release.release_id == release_id, + Release.provider_id == release_id, Release.status.in_([ReleaseStatus.RECEIVED, ReleaseStatus.FAILED]), ).one() - release = current_vcs.release_api_class(release_model) + provider = get_provider_by_id(provider_id).for_user( + release_model.repository.user_id + ) + release = current_vcs.release_api_class(release_model, provider) matched_error_cls = None matched_ex = None @@ -154,9 +156,9 @@ def refresh_accounts(expiration_threshold=None): :param expiration_threshold: Dictionary containing timedelta parameters referring to the maximum inactivity time. """ - expiration_date = datetime.datetime.utcnow() - datetime.timedelta( - **(expiration_threshold or {"days": 6 * 30}) - ) + expiration_date = datetime.datetime.now( + tz=datetime.timezone.utc + ) - datetime.timedelta(**(expiration_threshold or {"days": 6 * 30})) remote = current_oauthclient.oauth.remote_apps["github"] remote_accounts_to_be_updated = RemoteAccount.query.filter( diff --git a/invenio_vcs/templates/semantic-ui/invenio_github/base.html b/invenio_vcs/templates/semantic-ui/invenio_vcs/base.html similarity index 100% rename from invenio_vcs/templates/semantic-ui/invenio_github/base.html rename to invenio_vcs/templates/semantic-ui/invenio_vcs/base.html diff --git a/invenio_vcs/templates/semantic-ui/invenio_github/helpers.html b/invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html similarity index 94% rename from invenio_vcs/templates/semantic-ui/invenio_github/helpers.html rename to invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html index 024e049d..6b2f7535 100644 --- a/invenio_vcs/templates/semantic-ui/invenio_github/helpers.html +++ b/invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html @@ -10,7 +10,7 @@ {%- macro doi_badge(doi, doi_url, github_id) %} {%- block doi_badge scoped %} - {% set image_url = url_for('invenio_github_badge.index', repo_github_id=github_id, _external=True) %} + {% set image_url = url_for('invenio_vcs_badge.index', repo_github_id=github_id, _external=True) %} Date: Thu, 14 Aug 2025 14:34:39 +0200 Subject: [PATCH 06/19] WIP: start adapting jinja templates --- .../semantic-ui/js/invenio_vcs/index.js | 17 ++- invenio_vcs/config.py | 37 ++++-- invenio_vcs/contrib/github.py | 108 ++++++++++++++---- invenio_vcs/ext.py | 12 +- invenio_vcs/models.py | 18 +-- invenio_vcs/providers.py | 79 ++++++++----- invenio_vcs/receivers.py | 8 +- invenio_vcs/service.py | 74 +++++++----- invenio_vcs/tasks.py | 2 +- .../semantic-ui/invenio_vcs/helpers.html | 10 +- .../invenio_vcs/settings/helpers.html | 5 +- .../invenio_vcs/settings/index.html | 33 +++--- .../invenio_vcs/settings/index_item.html | 6 +- .../invenio_vcs/settings/view.html | 2 +- invenio_vcs/views/badge.py | 49 ++++---- invenio_vcs/views/vcs.py | 21 ++-- package-lock.json | 6 + setup.cfg | 2 +- 18 files changed, 310 insertions(+), 179 deletions(-) create mode 100644 package-lock.json diff --git a/invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js b/invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js index 4573c878..804c63ff 100644 --- a/invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js +++ b/invenio_vcs/assets/semantic-ui/js/invenio_vcs/index.js @@ -42,8 +42,9 @@ if (sync_button) { const buttonTextElem = document.getElementById("sync_repos_btn_text"); const buttonText = buttonTextElem.innerHTML; const loadingText = sync_button.dataset.loadingText; + const provider = sync_button.dataset.provider; - const url = "/api/user/github/repositories/sync"; + const url = `/api/user/vcs/${provider}/repositories/sync`; const request = new Request(url, { method: "POST", headers: REQUEST_HEADERS, @@ -129,18 +130,16 @@ if (repositories) { } function sendEnableDisableRequest(checked, repo) { - const repo_id = repo - .querySelector("input[data-repo-id]") - .getAttribute("data-repo-id"); + const input = repo.querySelector("input[data-repo-id]"); + const repo_id= input.getAttribute("data-repo-id"); + const provider = input.getAttribute("data-provider"); const switchMessage = repo.querySelector(".repo-switch-message"); let url; if (checked === true) { - url = "/api/user/github/repositories/" + repo_id + "/enable"; - } else { - if (checked === false) { - url = "/api/user/github/repositories/" + repo_id + "/disable"; - } + url = `/api/user/vcs/${provider}/repositories/${repo_id}/enable`; + } else if (checked === false) { + url = `/api/user/vcs/${provider}/repositories/${repo_id}/disable`; } const request = new Request(url, { diff --git a/invenio_vcs/config.py b/invenio_vcs/config.py index 23e8c70d..b5ef9493 100644 --- a/invenio_vcs/config.py +++ b/invenio_vcs/config.py @@ -23,15 +23,16 @@ """Configuration for GitHub module.""" from datetime import timedelta +from typing import TYPE_CHECKING -from invenio_vcs.contrib.github import GitHubProvider +from flask import current_app -VCS_PROVIDERS = [] +if TYPE_CHECKING: + from invenio_vcs.providers import RepositoryServiceProviderFactory -GITHUB_WEBHOOK_RECEIVER_ID = "github" -"""Local name of webhook receiver.""" +VCS_PROVIDERS = [] -GITHUB_WEBHOOK_RECEIVER_URL = None +# GITHUB_WEBHOOK_RECEIVER_URL = None """URL format to be used when creating a webhook on GitHub. This configuration variable must be set explicitly. Example:: @@ -45,7 +46,7 @@ context, doesn't work as expected. """ -GITHUB_SHARED_SECRET = "CHANGEME" +# GITHUB_SHARED_SECRET = "CHANGEME" """Shared secret between you and GitHub. Used to make GitHub sign webhook requests with HMAC. @@ -53,7 +54,7 @@ See http://developer.github.com/v3/repos/hooks/#example """ -GITHUB_INSECURE_SSL = False +# GITHUB_INSECURE_SSL = False """Determine if the GitHub webhook request will check the SSL certificate. Never set to True in a production environment, but can be useful for @@ -63,13 +64,13 @@ GITHUB_REFRESH_TIMEDELTA = timedelta(days=1) """Time period after which a GitHub account sync should be initiated.""" -GITHUB_RELEASE_CLASS = "invenio_github.api:GitHubRelease" +VCS_RELEASE_CLASS = "invenio_vcs.service:VCSRelease" """GitHubRelease class to be used for release handling.""" -GITHUB_TEMPLATE_INDEX = "invenio_github/settings/index.html" +VCS_TEMPLATE_INDEX = "invenio_vcs/settings/index.html" """Repositories list template.""" -GITHUB_TEMPLATE_VIEW = "invenio_github/settings/view.html" +VCS_TEMPLATE_VIEW = "invenio_vcs/settings/view.html" """Repository detail view template.""" GITHUB_ERROR_HANDLERS = None @@ -81,11 +82,23 @@ VCS_INTEGRATION_ENABLED = False """Enables the github integration.""" -GITHUB_CITATION_FILE = None +VCS_CITATION_FILE = None """Citation file name.""" -GITHUB_CITATION_METADATA_SCHEMA = None +VCS_CITATION_METADATA_SCHEMA = None """Citation metadata schema.""" VCS_ZIPBALL_TIMEOUT = 300 """Timeout for the zipball download, in seconds.""" + + +def get_provider_list(app=current_app) -> list["RepositoryServiceProviderFactory"]: + return app.config["VCS_PROVIDERS"] + + +def get_provider_by_id(id: str) -> "RepositoryServiceProviderFactory": + providers = get_provider_list() + for provider in providers: + if id == provider.id: + return provider + raise Exception(f"VCS provider with ID {id} not registered") diff --git a/invenio_vcs/contrib/github.py b/invenio_vcs/contrib/github.py index 0306e39e..0f29dadf 100644 --- a/invenio_vcs/contrib/github.py +++ b/invenio_vcs/contrib/github.py @@ -11,7 +11,6 @@ from werkzeug.utils import cached_property from invenio_vcs.errors import ReleaseZipballFetchError, UnexpectedProviderResponse -from invenio_vcs.oauth.handlers import account_setup_handler, disconnect_handler from invenio_vcs.providers import ( GenericContributor, GenericRelease, @@ -26,21 +25,22 @@ class GitHubProviderFactory(RepositoryServiceProviderFactory): def __init__( self, + base_url, webhook_receiver_url, id="github", name="GitHub", config={}, ): - super().__init__(GitHubProvider, webhook_receiver_url) + super().__init__(GitHubProvider, base_url, webhook_receiver_url) self._id = id self._name = name - self._config = defaultdict( - config, - base_url="https://github.com", + self._config = dict() + self._config.update( credentials_key="GITHUB_APP_CREDENTIALS", shared_secret="", insecure_ssl=False, ) + self._config.update(config) @property def remote_config(self): @@ -49,7 +49,7 @@ def remote_config(self): } helper = GitHubOAuthSettingsHelper( - base_url=self.config["base_url"], app_key=self.config["credentials_key"] + base_url=self.base_url, app_key=self.config["credentials_key"] ) github_app = helper.remote_app github_app["disconnect_handler"] = self.oauth_handlers.disconnect_handler @@ -95,28 +95,65 @@ def webhook_is_create_release_event(self, event_payload): return is_create_release_event def webhook_event_to_generic(self, event_payload): + release_published_at = event_payload["release"].get("published_at") + if release_published_at is not None: + release_published_at = datetime.fromisoformat(release_published_at) + release = GenericRelease( - str(event_payload["release"]["id"]), - event_payload["release"]["name"], - event_payload["release"]["tag_name"], - event_payload["release"]["tarball_url"], - event_payload["release"]["zipball_url"], - datetime.fromisoformat(event_payload["release"]["created_at"]), + id=str(event_payload["release"]["id"]), + name=event_payload["release"].get("name"), + tag_name=event_payload["release"]["tag_name"], + tarball_url=event_payload["release"].get("tarball_url"), + zipball_url=event_payload["release"].get("zipball_url"), + body=event_payload["release"].get("body"), + created_at=datetime.fromisoformat(event_payload["release"]["created_at"]), + published_at=release_published_at, ) + + license_spdx = event_payload["repository"].get("license") + if license_spdx is not None: + license_spdx = filter_license_spdx(license_spdx["spdx_id"]) + repo = GenericRepository( - str(event_payload["repository"]["id"]), - event_payload["repository"]["full_name"], - event_payload["repository"]["description"], - event_payload["repository"]["default_branch"], + id=str(event_payload["repository"]["id"]), + full_name=event_payload["repository"]["full_name"], + html_url=event_payload["repository"]["html_url"], + description=event_payload["repository"].get("description"), + default_branch=event_payload["repository"]["default_branch"], + license_spdx=license_spdx, ) return (release, repo) + def url_for_tag(self, repository_name, tag_name): + return "{}/{}/tree/{}".format(self.base_url, repository_name, tag_name) + class GitHubProvider(RepositoryServiceProvider): @cached_property def _gh(self): - return github3.login(token=self.access_token(self.user_id)) + if self.factory.base_url == "https://github.com": + return github3.login(token=self.access_token) + else: + return github3.enterprise_login( + url=self.factory.base_url, token=self.access_token + ) + + @staticmethod + def _extract_license(repo): + # The GitHub API returns the `license` as a simple key of the ShortRepository. + # But for some reason github3py does not include a mapping for this. + # So the only way to access it without making an additional request is to convert + # the repo to a dict. + repo_dict = repo.as_dict() + license_obj = repo_dict["license"] + if license_obj is not None: + spdx = license_obj["spdx_id"] + if spdx == "NOASSERTION": + # For 'other' type of licenses, Github sets the spdx_id to NOASSERTION + return None + return spdx + return None def list_repositories(self): if self._gh is None: @@ -128,10 +165,12 @@ def list_repositories(self): if repo.permissions["admin"]: repos[str(repo.id)] = GenericRepository( - str(repo.id), - repo.full_name, - repo.description, - repo.default_branch, + id=str(repo.id), + full_name=repo.full_name, + description=repo.description, + html_url=repo.html_url, + default_branch=repo.default_branch, + license_spdx=GitHubProvider._extract_license(repo), ) return repos @@ -147,7 +186,11 @@ def list_repository_webhooks(self, repository_id): hooks = [] for hook in repo.hooks(): hooks.append( - GenericWebhook(str(hook.id), repository_id, hook.config.get("url", "")) + GenericWebhook( + id=str(hook.id), + repository_id=repository_id, + url=hook.config.get("url"), + ) ) return hooks @@ -161,7 +204,12 @@ def get_repository(self, repository_id): return None return GenericRepository( - str(repo.id), repo.full_name, repo.description, repo.default_branch + id=str(repo.id), + full_name=repo.full_name, + description=repo.description, + html_url=repo.html_url, + default_branch=repo.default_branch, + license_spdx=GitHubProvider._extract_license(repo), ) def create_webhook(self, repository_id): @@ -245,7 +293,13 @@ def list_repository_contributors(self, repository_id, max): ) contributors = [ - GenericContributor(x.id, x.login, x.full_name, x.contributions_count) + GenericContributor( + id=x.id, + username=x.login, + display_name=x.full_name, + contributions_count=x.contributions_count, + company=x.refresh().company, + ) for x in sorted_contributors ] return contributors @@ -266,7 +320,11 @@ def get_repository_owner(self, repository_id): if repo is None: return None - return GenericUser(repo.owner.id, repo.owner.login, repo.owner.full_name) + return GenericUser( + id=repo.owner.id, + username=repo.owner.login, + display_name=repo.owner.full_name, + ) def resolve_release_zipball_url(self, release_zipball_url): if self._gh is None: diff --git a/invenio_vcs/ext.py b/invenio_vcs/ext.py index c93cd11f..b4d4a5a6 100644 --- a/invenio_vcs/ext.py +++ b/invenio_vcs/ext.py @@ -33,7 +33,7 @@ from six import string_types from werkzeug.utils import cached_property, import_string -from invenio_vcs.providers import get_provider_list +from invenio_vcs.config import get_provider_list from invenio_vcs.receivers import VCSReceiver from invenio_vcs.service import VCSRelease from invenio_vcs.utils import obj_or_import_string @@ -75,12 +75,12 @@ def init_app(self, app): def init_config(self, app): """Initialize configuration.""" app.config.setdefault( - "GITHUB_SETTINGS_TEMPLATE", - app.config.get("SETTINGS_TEMPLATE", "invenio_github/settings/base.html"), + "VCS_SETTINGS_TEMPLATE", + app.config.get("SETTINGS_TEMPLATE", "invenio_vcs/settings/base.html"), ) for k in dir(config): - if k.startswith("GITHUB_") or k.startswith("VCS_"): + if k.startswith("VCS_"): app.config.setdefault(k, getattr(config, k)) @@ -98,11 +98,11 @@ def init_menu(app): endpoint="invenio_vcs.get_repositories", endpoint_arguments_constructor=lambda: {"provider": provider.id}, text=_( - "%(icon)s $(provider)", + "%(icon)s %(provider)s", icon=LazyString( lambda: f'' ), - provider=LazyString(lambda: provider.name), + provider=provider.name, ), order=10, active_when=lambda: request.endpoint.startswith("invenio_vcs."), diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index acf2a777..bcd4956c 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -35,8 +35,6 @@ from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType -from invenio_vcs.providers import GenericRelease - RELEASE_STATUS_TITLES = { "RECEIVED": _("Received"), "PROCESSING": _("Processing"), @@ -174,7 +172,7 @@ def create(cls, user_id, provider, provider_id=None, name=None, **kwargs): return obj @classmethod - def get(cls, provider_id=None, name=None): + def get(cls, provider, provider_id=None, name=None): """Return a repository given its name or github id. :param integer github_id: GitHub repository identifier. @@ -188,9 +186,13 @@ def get(cls, provider_id=None, name=None): """ repo = None if provider_id: - repo = cls.query.filter(Repository.provider_id == provider_id).one_or_none() + repo = cls.query.filter( + Repository.provider_id == provider_id, Repository.provider == provider + ).one_or_none() if not repo and name is not None: - repo = cls.query.filter(Repository.name == name).one_or_none() + repo = cls.query.filter( + Repository.name == name, Repository.provider == provider + ).one_or_none() return repo @@ -210,13 +212,13 @@ def latest_release(self, status=None): def __repr__(self): """Get repository representation.""" - return "".format(self=self) + return "".format(self=self) class Release(db.Model, Timestamp): """Information about a GitHub release.""" - __tablename__ = "github_releases" + __tablename__ = "vcs_releases" id = db.Column( UUIDType, @@ -268,4 +270,4 @@ class Release(db.Model, Timestamp): def __repr__(self): """Get release representation.""" - return f"" + return f"" diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py index 92c415a1..86d31299 100644 --- a/invenio_vcs/providers.py +++ b/invenio_vcs/providers.py @@ -1,9 +1,11 @@ +from __future__ import annotations + +import types from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime from urllib.parse import urlparse -from flask import current_app from invenio_i18n import gettext as _ from invenio_oauth2server.models import Token as ProviderToken from invenio_oauthclient import current_oauthclient @@ -20,47 +22,56 @@ class GenericWebhook: id: str repository_id: str - url: str + url: str | types.NoneType = None @dataclass class GenericRepository: id: str full_name: str - description: str default_branch: str + html_url: str + description: str | types.NoneType = None + license_spdx: str | types.NoneType = None @dataclass class GenericRelease: id: str - name: str tag_name: str - tarball_url: str - zipball_url: str created_at: datetime + name: str | types.NoneType = None + body: str | types.NoneType = None + tarball_url: str | types.NoneType = None + zipball_url: str | types.NoneType = None + published_at: datetime | types.NoneType = None @dataclass class GenericUser: id: str username: str - display_name: str + display_name: str | types.NoneType = None @dataclass class GenericContributor: id: str username: str - display_name: str + company: str | None contributions_count: int + display_name: str | types.NoneType = None class RepositoryServiceProviderFactory(ABC): def __init__( - self, provider: type["RepositoryServiceProvider"], webhook_receiver_url: str + self, + provider: type["RepositoryServiceProvider"], + base_url: str, + webhook_receiver_url: str, ): self.provider = provider + self.base_url = base_url self.webhook_receiver_url = webhook_receiver_url @property @@ -114,12 +125,26 @@ def webhook_is_create_release_event(self, event_payload): def webhook_event_to_generic(self, event_payload): raise NotImplementedError + @abstractmethod + def url_for_tag(self, repository_name, tag_name): + raise NotImplementedError + def for_user(self, user_id: str): return self.provider(self, user_id) def for_access_token(self, user_id: str, access_token: str): return self.provider(self, user_id, access_token=access_token) + @property + def vocabulary(self): + return { + "id": self.id, + "name": self.name, + "repository_name": self.repository_name, + "repository_name_plural": self.repository_name_plural, + "icon": self.icon, + } + class RepositoryServiceProvider(ABC): def __init__( @@ -192,46 +217,50 @@ def is_valid_webhook(self, url): return configured_host == url_host @abstractmethod - def list_repositories(self): + def list_repositories(self) -> dict[str, GenericRepository] | None: raise NotImplementedError @abstractmethod - def list_repository_webhooks(self, repository_id): + def list_repository_webhooks(self, repository_id) -> list[GenericWebhook] | None: raise NotImplementedError - def get_first_valid_webhook(self, repository_id): + def get_first_valid_webhook(self, repository_id) -> GenericWebhook | None: webhooks = self.list_repository_webhooks(repository_id) + if webhooks is None: + return None for hook in webhooks: if self.is_valid_webhook(hook.url): return hook return None @abstractmethod - def get_repository(self, repository_id): + def get_repository(self, repository_id) -> GenericRepository | None: raise NotImplementedError @abstractmethod - def list_repository_contributors(self, repository_id, max): + def list_repository_contributors( + self, repository_id, max + ) -> list[GenericContributor] | None: raise NotImplementedError @abstractmethod - def get_repository_owner(self, repository_id): + def get_repository_owner(self, repository_id) -> GenericUser | None: raise NotImplementedError @abstractmethod - def create_webhook(self, repository_id): + def create_webhook(self, repository_id) -> bool: raise NotImplementedError @abstractmethod - def delete_webhook(self, repository_id, hook_id=None): + def delete_webhook(self, repository_id, hook_id=None) -> bool: raise NotImplementedError @abstractmethod - def get_own_user(self): + def get_own_user(self) -> GenericUser | None: raise NotImplementedError @abstractmethod - def resolve_release_zipball_url(self, release_zipball_url): + def resolve_release_zipball_url(self, release_zipball_url) -> str | None: raise NotImplementedError @abstractmethod @@ -245,15 +274,3 @@ def retrieve_remote_file(self, repository_id, tag_name, file_name): @abstractmethod def revoke_token(self, access_token): raise NotImplementedError - - -def get_provider_list(app=current_app) -> list[RepositoryServiceProviderFactory]: - return app.config["VCS_PROVIDERS"] - - -def get_provider_by_id(id: str) -> RepositoryServiceProviderFactory: - providers = get_provider_list() - for provider in providers: - if id == provider.id: - return provider - raise Exception(f"VCS provider with ID {id} not registered") diff --git a/invenio_vcs/receivers.py b/invenio_vcs/receivers.py index dd455031..8c78e7ae 100644 --- a/invenio_vcs/receivers.py +++ b/invenio_vcs/receivers.py @@ -25,8 +25,8 @@ from invenio_db import db from invenio_webhooks.models import Receiver +from invenio_vcs.config import get_provider_by_id from invenio_vcs.models import Release, ReleaseStatus, Repository -from invenio_vcs.providers import get_provider_by_id from invenio_vcs.tasks import process_release from .errors import ( @@ -81,7 +81,11 @@ def _handle_create_release(self, event): raise ReleaseAlreadyReceivedError(release=existing_release) # Create the Release - repo = Repository.get(generic_repo.id, generic_repo.full_name) + repo = Repository.get( + self.provider_factory.id, + provider_id=generic_repo.id, + name=generic_repo.full_name, + ) if not repo: raise RepositoryNotFoundError(generic_repo.full_name) diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index c6ccecc2..262bb62d 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -1,5 +1,8 @@ +from abc import abstractmethod from contextlib import contextmanager from copy import deepcopy +from dataclasses import asdict +from typing import TYPE_CHECKING from flask import current_app from invenio_access.permissions import authenticated_user @@ -10,6 +13,7 @@ from sqlalchemy.exc import NoResultFound from werkzeug.utils import cached_property +from invenio_vcs.config import get_provider_by_id from invenio_vcs.errors import ( RemoteAccountDataNotSet, RemoteAccountNotFound, @@ -17,14 +21,20 @@ RepositoryNotFoundError, ) from invenio_vcs.models import Release, ReleaseStatus, Repository -from invenio_vcs.providers import RepositoryServiceProvider, get_provider_by_id from invenio_vcs.proxies import current_vcs from invenio_vcs.tasks import sync_hooks as sync_hooks_task from invenio_vcs.utils import iso_utcnow +if TYPE_CHECKING: + from invenio_vcs.providers import ( + GenericRelease, + GenericRepository, + RepositoryServiceProvider, + ) + class VCSService: - def __init__(self, provider: RepositoryServiceProvider) -> None: + def __init__(self, provider: "RepositoryServiceProvider") -> None: self.provider = provider @staticmethod @@ -57,15 +67,15 @@ def list_repositories(self): if vcs_repos: # 'Enhance' our repos dict, from our database model db_repos = Repository.query.filter( - Repository.provider_id.in_([int(k) for k in vcs_repos.keys()]) + Repository.provider_id.in_([k for k in vcs_repos.keys()]) ) for db_repo in db_repos: - if str(db_repo.provider_id) in vcs_repos: + if db_repo.provider_id in vcs_repos: release_instance = current_vcs.release_api_class( db_repo.latest_release(), self.provider.factory.id ) - vcs_repos[str(db_repo.github_id)]["instance"] = db_repo - vcs_repos[str(db_repo.github_id)]["latest"] = release_instance + vcs_repos[db_repo.provider_id]["instance"] = db_repo + vcs_repos[db_repo.provider_id]["latest"] = release_instance return vcs_repos @@ -108,12 +118,14 @@ def get_last_sync_time(self): return extra_data["last_sync"] - def get_repository(self, repo_id): + def get_repository(self, repo_id=None, repo_name=None): """Retrieves one repository. Checks for access permission. """ - repo = Repository.get(provider_id=repo_id) + repo = Repository.get( + self.provider.factory.id, provider_id=repo_id, name=repo_name + ) if not repo: raise RepositoryNotFoundError(repo_id) @@ -160,6 +172,8 @@ def sync(self, hooks=True, async_hooks=True): own state based on this information. """ vcs_repos = self.provider.list_repositories() + if vcs_repos is None: + vcs_repos = {} if hooks: self._sync_hooks(vcs_repos.keys(), asynchronous=async_hooks) @@ -170,7 +184,7 @@ def sync(self, hooks=True, async_hooks=True): ) for repo in db_repos: - vcs_repo = vcs_repos.get(repo.github_id) + vcs_repo = vcs_repos.get(repo.provider_id) if vcs_repo and repo.name != vcs_repo.full_name: repo.name = vcs_repo.full_name db.session.add(repo) @@ -185,7 +199,7 @@ def sync(self, hooks=True, async_hooks=True): # Update repos and last sync self.provider.remote_account.extra_data.update( dict( - repos=vcs_repos, + repos={k: asdict(v) for k, v in vcs_repos.items()}, last_sync=iso_utcnow(), ) ) @@ -219,21 +233,21 @@ def sync_repo_hook(self, repo_id): # If hook on GitHub exists, get or create corresponding db object and # enable the hook. Otherwise remove the old hook information. - repo = Repository.get(repo_id) + db_repo = Repository.get(self.provider.factory.id, provider_id=repo_id) if hook: - if not repo: - repo = Repository.create( + if not db_repo: + db_repo = Repository.create( self.provider.user_id, self.provider.factory.id, repo_id, vcs_repo.full_name, ) - if not repo.enabled: - self.mark_repo_enabled(repo, hook.id) + if not db_repo.enabled: + self.mark_repo_enabled(db_repo, hook.id) else: - if repo: - self.mark_repo_disabled(repo) + if db_repo: + self.mark_repo_disabled(db_repo) def mark_repo_disabled(self, repo): """Disables an user repository.""" @@ -253,6 +267,10 @@ def init_account(self): ) user = self.provider.get_own_user() + if user is None: + # TODO: create a reasonable exception here + raise Exception("TODO") + # Setup local access tokens to be used by the webhooks hook_token = ProviderToken.create_personal( f"{self.provider.factory.id}-webhook", @@ -263,8 +281,8 @@ def init_account(self): # Initial structure of extra data self.provider.remote_account.extra_data = dict( id=user.id, - login=user.login, - name=user.name, + login=user.username, + name=user.display_name, tokens=dict( webhook=hook_token.id, ), @@ -294,7 +312,7 @@ def disable_repository(self, repository_id, hook_id=None): class VCSRelease: """A GitHub release.""" - def __init__(self, release, provider: RepositoryServiceProvider): + def __init__(self, release: Release, provider: "RepositoryServiceProvider"): """Constructor.""" self.db_release = release self.provider = provider @@ -316,17 +334,21 @@ def payload(self): return self.event.payload @cached_property - def generic_release(self): + def _generic_release_and_repo(self): + return self.provider.factory.webhook_event_to_generic(self.payload) + + @property + def generic_release(self) -> "GenericRelease": """Return release metadata.""" - return self.provider.factory.webhook_event_to_generic(self.payload)[0] + return self._generic_release_and_repo[0] @cached_property - def generic_repo(self): + def generic_repo(self) -> "GenericRepository": """Return repo metadata.""" - return self.provider.factory.webhook_event_to_generic(self.payload)[1] + return self._generic_release_and_repo[1] @cached_property - def db_repo(self): + def db_repo(self) -> Repository: """Return repository model from database.""" if self.db_release.repository_id: repository = self.db_release.repository @@ -347,7 +369,7 @@ def release_file_name(self): @cached_property def release_zipball_url(self): """Returns the release zipball URL.""" - return self.generic_release["zipball_url"] + return self.generic_release.zipball_url @cached_property def user_identity(self): diff --git a/invenio_vcs/tasks.py b/invenio_vcs/tasks.py index e572ab11..92437d3e 100644 --- a/invenio_vcs/tasks.py +++ b/invenio_vcs/tasks.py @@ -32,9 +32,9 @@ from invenio_oauthclient.models import RemoteAccount from invenio_oauthclient.proxies import current_oauthclient +from invenio_vcs.config import get_provider_by_id from invenio_vcs.errors import CustomGitHubMetadataError, RepositoryAccessError from invenio_vcs.models import Release, ReleaseStatus -from invenio_vcs.providers import get_provider_by_id from invenio_vcs.proxies import current_vcs diff --git a/invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html b/invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html index 6b2f7535..da997bd2 100644 --- a/invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html +++ b/invenio_vcs/templates/semantic-ui/invenio_vcs/helpers.html @@ -8,14 +8,14 @@ #} {% from "semantic-ui/invenio_formatter/macros/badges.html" import badges_formats_list %} -{%- macro doi_badge(doi, doi_url, github_id) %} +{%- macro doi_badge(doi, doi_url, provider_id, provider) %} {%- block doi_badge scoped %} - {% set image_url = url_for('invenio_vcs_badge.index', repo_github_id=github_id, _external=True) %} + {% set image_url = url_for('invenio_vcs_badge.index', provider=provider, repo_provider_id=provider_id, _external=True) %} @@ -59,8 +58,8 @@

helpers.panel_start( _('Releases'), btn_text=_('Create release'), - btn_icon='github icon', - btn_href=github_rel_url, + btn_icon=vocabulary["icon"] + ' icon', + btn_href=new_release_url, ) }}
@@ -70,9 +69,9 @@

{%- block enabled_repo_get_started scoped %} @@ -93,7 +92,7 @@

1 {{ _("Flip the switch") }}

{{ _("Toggle the switch below to turn on/off automatic preservation of your repository.") }}

- {{ helpers.repo_switch(repo, repo.github_id) }} + {{ helpers.repo_switch(repo, repo.provider_id) }}
@@ -185,13 +179,13 @@

2 {{ _("Create a release") }}

{%- block release_status scoped %}
- + - {{ release.release_object.status.title }} + {{ release.db_release.status.title }}

- {{ release.release_object.created|naturaltime }} + {{ release.db_release.created|naturaltime }}

{%- endblock release_status %} @@ -215,7 +209,7 @@

2 {{ _("Create a release") }}

{%- block metadata_tab scoped %} {%- endblock metadata_tab %} - {%- if release.release_object.errors %} + {%- if release.db_release.errors %} @@ -229,11 +223,10 @@

2 {{ _("Create a release") }}

{%- block releasetab_cff %} {% set repo_name = value %} - {% set citation_cff_create_link = 'https://github.com/{0}/new/{1}?filename=CITATION.cff'.format(repo.name, (default_branch or 'master')) %}

@@ -263,8 +256,7 @@

{{ _("Citation File") }}

{%- block releasetab_payload %} {%- if release.event %}
-

{{ _("GitHub Payload") }}

- +

{{ _("%(name)s Payload", name=vocabulary["name"]) }}

{{ _("Received") }} {{ release.event.created|datetimeformat }}.
@@ -282,7 +274,7 @@

{{ _("GitHub Payload") }}

{%- block releasetab_errors %} - {%- if release.release_object.errors %} + {%- if release.db_release.errors %}
@@ -292,7 +284,7 @@

{{ _("Errors") }}

-
{{ release.release_object.errors|tojson(indent=4) }}
+
{{ release.db_release.errors|tojson(indent=4) }}
diff --git a/invenio_vcs/views/badge.py b/invenio_vcs/views/badge.py index 63cdf213..7c8e7166 100644 --- a/invenio_vcs/views/badge.py +++ b/invenio_vcs/views/badge.py @@ -26,7 +26,9 @@ from __future__ import absolute_import from flask import Blueprint, abort, redirect, url_for +from flask_login import current_user +from invenio_vcs.config import get_provider_by_id from invenio_vcs.models import ReleaseStatus, Repository from invenio_vcs.proxies import current_vcs from invenio_vcs.service import VCSService @@ -53,7 +55,9 @@ def index(provider, repo_provider_id): if not latest_release: abort(404) + provider = get_provider_by_id(provider).for_user(current_user.id) release = current_vcs.release_api_class(latest_release, provider) + # release.badge_title points to "DOI" # release.badge_value points to the record "pids.doi.identifier" badge_url = url_for( @@ -79,7 +83,9 @@ def index_old(provider, user_id, repo_name): if not latest_release: abort(404) + provider = get_provider_by_id(provider).for_user(current_user.id) release = current_vcs.release_api_class(latest_release, provider) + # release.badge_title points to "DOI" # release.badge_value points to the record "pids.doi.identifier" badge_url = url_for( @@ -106,6 +112,7 @@ def latest_doi(provider, provider_id): if not latest_release: abort(404) + provider = get_provider_by_id(provider).for_user(current_user.id) release = current_vcs.release_api_class(latest_release, provider) # record.url points to DOI url or HTML url if Datacite is not enabled. diff --git a/invenio_vcs/views/vcs.py b/invenio_vcs/views/vcs.py index e0d8d075..72531d05 100644 --- a/invenio_vcs/views/vcs.py +++ b/invenio_vcs/views/vcs.py @@ -125,12 +125,21 @@ def get_repository(provider, repo_id): latest_release = svc.get_repo_latest_release(repo) default_branch = svc.get_repo_default_branch(repo_id) releases = svc.list_repo_releases(repo) + new_release_url = svc.provider.factory.url_for_new_release(repo.name) + new_citation_file_url = svc.provider.factory.url_for_new_file( + repo.name, default_branch or "main", "CITATION.cff" + ) + return render_template( current_app.config["VCS_TEMPLATE_VIEW"], latest_release=latest_release, + provider=provider, repo=repo, releases=releases, default_branch=default_branch, + new_release_url=new_release_url, + new_citation_file_url=new_citation_file_url, + vocabulary=svc.provider.factory.vocabulary, ) except RepositoryAccessError: abort(403) From 1354e805268736a9c7a56568aa9eb44f1c572268 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 21 Aug 2025 15:22:38 +0200 Subject: [PATCH 09/19] WIP: GitLab provider --- invenio_vcs/contrib/github.py | 94 ++++----- invenio_vcs/contrib/gitlab.py | 380 ++++++++++++++++++++++++++++++++++ invenio_vcs/ext.py | 15 +- invenio_vcs/providers.py | 103 +++++---- invenio_vcs/tasks.py | 14 +- setup.cfg | 1 + 6 files changed, 496 insertions(+), 111 deletions(-) create mode 100644 invenio_vcs/contrib/gitlab.py diff --git a/invenio_vcs/contrib/github.py b/invenio_vcs/contrib/github.py index 12197c80..1dc4796d 100644 --- a/invenio_vcs/contrib/github.py +++ b/invenio_vcs/contrib/github.py @@ -8,11 +8,13 @@ from github3.repos import ShortRepository from invenio_i18n import gettext as _ from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper -from werkzeug.utils import cached_property +from werkzeug.utils import cached_property, import_string from invenio_vcs.errors import ReleaseZipballFetchError, UnexpectedProviderResponse from invenio_vcs.providers import ( GenericContributor, + GenericOwner, + GenericOwnerType, GenericRelease, GenericRepository, GenericUser, @@ -29,14 +31,25 @@ def __init__( webhook_receiver_url, id="github", name="GitHub", + description="Automatically archive your repositories", + credentials_key="GITHUB_APP_CREDENTIALS", config={}, ): - super().__init__(GitHubProvider, base_url, webhook_receiver_url) - self._id = id - self._name = name + super().__init__( + GitHubProvider, + base_url=base_url, + webhook_receiver_url=webhook_receiver_url, + id=id, + name=name, + description=description, + credentials_key=credentials_key, + icon="github", + repository_name="repository", + repository_name_plural="repositories", + ) + self._config = dict() self._config.update( - credentials_key="GITHUB_APP_CREDENTIALS", shared_secret="", insecure_ssl=False, ) @@ -49,7 +62,11 @@ def remote_config(self): } helper = GitHubOAuthSettingsHelper( - base_url=self.base_url, app_key=self.config["credentials_key"] + title=self.name, + icon="fa fa-{}".format(self.icon), + description=self.description, + base_url=self.base_url, + app_key=self.credentials_key, ) github_app = helper.remote_app github_app["disconnect_handler"] = self.oauth_handlers.disconnect_handler @@ -60,26 +77,6 @@ def remote_config(self): return github_app - @property - def id(self): - return self._id - - @property - def name(self): - return self._name - - @property - def repository_name(self): - return "repository" - - @property - def repository_name_plural(self): - return "repositories" - - @property - def icon(self): - return "github" - @property def config(self): return self._config @@ -156,17 +153,18 @@ def url_for_new_file(self, repository_name, branch_name, file_name): class GitHubProvider(RepositoryServiceProvider): @cached_property def _gh(self): + _gh = None if self.factory.base_url == "https://github.com": - return github3.login(token=self.access_token) + _gh = github3.login(token=self.access_token) else: - return github3.enterprise_login( + _gh = github3.enterprise_login( url=self.factory.base_url, token=self.access_token ) - def list_repositories(self): - if self._gh is None: - return None + assert _gh is not None + return _gh + def list_repositories(self): repos: dict[str, GenericRepository] = {} for repo in self._gh.repositories(): assert isinstance(repo, ShortRepository) @@ -185,8 +183,6 @@ def list_repositories(self): def list_repository_webhooks(self, repository_id): assert repository_id.isdigit() - if self._gh is None: - return None repo = self._gh.repository_with_id(int(repository_id)) if repo is None: return None @@ -204,8 +200,6 @@ def list_repository_webhooks(self, repository_id): def get_repository(self, repository_id): assert repository_id.isdigit() - if self._gh is None: - return None repo = self._gh.repository_with_id(int(repository_id)) if repo is None: @@ -222,8 +216,6 @@ def get_repository(self, repository_id): def create_webhook(self, repository_id): assert repository_id.isdigit() - if self._gh is None: - return None hook_config = dict( url=self.webhook_url, @@ -248,8 +240,6 @@ def create_webhook(self, repository_id): def delete_webhook(self, repository_id, hook_id=None): assert repository_id.isdigit() - if self._gh is None: - return False repo = self._gh.repository_with_id(int(repository_id)) if repo is None: @@ -270,9 +260,6 @@ def delete_webhook(self, repository_id, hook_id=None): return False def get_own_user(self): - if self._gh is None: - return None - user = self._gh.me() if user is not None: return GenericUser(user.id, user.login, user.name) @@ -281,8 +268,6 @@ def get_own_user(self): def list_repository_contributors(self, repository_id, max): assert repository_id.isdigit() - if self._gh is None: - return None repo = self._gh.repository_with_id(repository_id) if repo is None: @@ -325,23 +310,25 @@ def list_repository_contributors(self, repository_id, max): def get_repository_owner(self, repository_id): assert repository_id.isdigit() - if self._gh is None: - return None repo = self._gh.repository_with_id(repository_id) if repo is None: return None - return GenericUser( + owner_type = ( + GenericOwnerType.Person + if repo.owner.type == "User" + else GenericOwnerType.Organization + ) + + return GenericOwner( id=repo.owner.id, - username=repo.owner.login, + path_name=repo.owner.login, display_name=repo.owner.full_name, + type=owner_type, ) def resolve_release_zipball_url(self, release_zipball_url): - if self._gh is None: - return None - url = release_zipball_url # Execute a HEAD request to the zipball url to test if it is accessible. @@ -383,13 +370,12 @@ def fetch_release_zipball(self, release_zipball_url, timeout): def retrieve_remote_file(self, repository_id, tag_name, file_name): assert repository_id.isdigit() - if self._gh is None: - return None try: - return self._gh.repository_with_id(repository_id).file_contents( + resp = self._gh.repository_with_id(repository_id).file_contents( path=file_name, ref=tag_name ) + return resp.decoded except github3.exceptions.NotFoundError: return None diff --git a/invenio_vcs/contrib/gitlab.py b/invenio_vcs/contrib/gitlab.py new file mode 100644 index 00000000..420038a8 --- /dev/null +++ b/invenio_vcs/contrib/gitlab.py @@ -0,0 +1,380 @@ +from __future__ import annotations + +from typing import Any + +import dateutil +import gitlab +import gitlab.const +import requests +from flask import current_app +from invenio_oauthclient import current_oauthclient +from werkzeug.utils import cached_property + +from invenio_vcs.providers import ( + GenericContributor, + GenericOwner, + GenericOwnerType, + GenericRelease, + GenericRepository, + GenericUser, + GenericWebhook, + RepositoryServiceProvider, + RepositoryServiceProviderFactory, +) + + +def _gl_response_error_handler(f): + def inner_function(*args, **kwargs): + try: + return f(*args, **kwargs) + except gitlab.GitlabGetError as e: + if e.response_code == 404: + return None + else: + raise e + except gitlab.GitlabCreateError as e: + if e.response_code == 404: + return None + else: + raise e + + return inner_function + + +class GitLabProviderFactory(RepositoryServiceProviderFactory): + def __init__( + self, + base_url: str, + webhook_receiver_url: str, + id="gitlab", + name="GitLab", + description="Automatically archive your repositories", + credentials_key="GITLAB_APP_CREDENTIALS", + config={}, + ): + super().__init__( + GitLabProvider, + base_url=base_url, + webhook_receiver_url=webhook_receiver_url, + id=id, + name=name, + description=description, + credentials_key=credentials_key, + icon="gitlab", + repository_name="project", + repository_name_plural="projects", + ) + self._config = dict() + self._config.update(shared_validation_token="") + self._config.update(config) + + def _account_info_handler(self, remote, resp: dict): + gl = gitlab.Gitlab( + self.base_url, + oauth_token=resp["access_token"], + ) + gl.auth() + user_attrs = gl.user.attributes + handlers = current_oauthclient.signup_handlers[remote.name] + return handlers["info_serializer"](resp, user_attrs) + + def _account_info_serializer(self, remote, resp, user_info, **kwargs): + return dict( + user=dict( + email=user_info["email"], + profile=dict( + username=user_info["username"], + full_name=user_info["name"], + ), + ), + external_id=str(user_info["id"]), + external_method="gitlab", + ) + + @property + def remote_config(self): + return dict( + title=self.name, + description=self.description, + icon="fa fa-{}".format(self.icon), + authorized_handler="invenio_oauthclient.handlers:authorized_signup_handler", + disconnect_handler=self.oauth_handlers.disconnect_handler, + signup_handler=dict( + info=self._account_info_handler, + info_serializer=self._account_info_serializer, + setup=self.oauth_handlers.account_setup_handler, + view="invenio_oauthclient.handlers:signup_handler", + ), + params=dict( + base_url="{}/api/v4/".format(self.base_url), + request_token_url=None, + access_token_url="{}/oauth/token".format(self.base_url), + access_token_method="POST", + authorize_url="{}/oauth/authorize".format(self.base_url), + app_key=self.credentials_key, + ), + ) + + @property + def config(self): + return self._config + + def url_for_tag(self, repository_name, tag_name) -> str: + return "{}/{}/-/tags/{}".format(self.base_url, repository_name, tag_name) + + def url_for_new_file(self, repository_name, branch_name, file_name) -> str: + return "{}/{}/-/new/{}/?file_name={}".format( + self.base_url, repository_name, branch_name, file_name + ) + + def url_for_new_release(self, repository_name) -> str: + return "{}/{}/-/releases/new".format(self.base_url, repository_name) + + def webhook_is_create_release_event(self, event_payload: dict[str, Any]): + # https://archives.docs.gitlab.com/17.11/user/project/integrations/webhook_events/#release-events + + # GitLab does not have unpublished/draft releases the way GitHub does. However, it does have + # "upcoming releases" (https://archives.docs.gitlab.com/17.11/api/releases/#upcoming-releases) + # meaning ones with a release date in the future. + # TODO: do we want to return False for upcoming releases? + + object_kind = event_payload.get("object_kind") + action = event_payload.get("action") + + # existing `invenio-gitlab` instead uses the `tag_push` event which is more general than the `release` + # event (https://codebase.helmholtz.cloud/rodare/invenio-gitlab/-/blob/d66181697b8a34383b333306b559d13cd6fa829a/invenio_gitlab/receivers.py#L41). + # TODO: I recommend using the `release` event as this is a more 'formal' manual action and better corresponds to the release event in GitHub. Is this okay? + return object_kind == "release" and action == "create" + + def webhook_event_to_generic( + self, event_payload: dict[str, Any] + ) -> tuple[GenericRelease, GenericRepository]: + # https://archives.docs.gitlab.com/17.11/user/project/integrations/webhook_events/#release-events + + zipball_url: str | None = None + tarball_url: str | None = None + + for source in event_payload["sources"]: + format = source["format"] + url = source["url"] + if format == "zip": + zipball_url = url + elif format == "tar": + tarball_url = url + + release = GenericRelease( + # GitLab does not expose the in-database ID of releases through the webhook payload or the REST API + # It does exist internally but it's never sent to us + id=event_payload["tag"], + tag_name=event_payload["tag"], + html_url=event_payload["url"], + name=event_payload["name"], + body=event_payload["description"], + zipball_url=zipball_url, + tarball_url=tarball_url, + created_at=dateutil.parser.parse(event_payload["created_at"]), + published_at=dateutil.parser.parse(event_payload["released_at"]), + ) + + repo = GitLabProviderFactory._proj_to_generic(event_payload["project"]) + return (release, repo) + + @staticmethod + def _extract_license(proj_attrs: dict[str, Any]): + license_obj = proj_attrs.get("license") + if license_obj is not None: + return license_obj["key"].upper() + return None + + @staticmethod + def _proj_to_generic(proj_attrs: dict[str, Any]): + return GenericRepository( + id=str(proj_attrs["id"]), + full_name=proj_attrs["path_with_namespace"], + default_branch=proj_attrs["default_branch"], + html_url=proj_attrs["web_url"], + description=proj_attrs["description"], + license_spdx=GitLabProviderFactory._extract_license(proj_attrs), + ) + + +class GitLabProvider(RepositoryServiceProvider): + @cached_property + def _gl(self): + gl = gitlab.Gitlab(self.factory.base_url, oauth_token=self.access_token) + gl.auth() + return gl + + @_gl_response_error_handler + def list_repositories(self) -> dict[str, GenericRepository] | None: + repos: dict[str, GenericRepository] = {} + for project in self._gl.projects.list( + iterator=True, + simple=False, + min_access_level=gitlab.const.MAINTAINER_ACCESS, + ): + repos[str(project.id)] = GenericRepository( + id=str(project.id), + full_name=project.path_with_namespace, + default_branch=project.default_branch, + html_url=project.web_url, + description=project.description, + # TODO: license is not returned in the projects list (only when querying an individual project). + # This would be super slow. Do we really need license here? + license_spdx=None, + ) + return repos + + @_gl_response_error_handler + def get_repository(self, repository_id: str) -> GenericRepository | None: + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id)) + return GitLabProviderFactory._proj_to_generic(proj.asdict()) + + @_gl_response_error_handler + def list_repository_contributors( + self, repository_id: str, max: int + ) -> list[GenericContributor] | None: + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id), lazy=True) + + contribs: list[GenericContributor] = [] + for index, contrib in enumerate( + proj.repository_contributors(iterator=True, order_by="commits", sort="desc") + ): + email = contrib["email"] + contrib_count = contrib["commits"] + + # repository_contributors returns a very small amount of data (not even the username) + # See here https://archives.docs.gitlab.com/17.11/api/repositories/#contributors + # So we try to enrich the data by searching for the user with the matching email. + # We will fail to find it if a) the user doesn't exist (e.g. repos imported/forked from somewhere else) + # or b) if the user has not made their email address public. + # By default, email addresses on GitLab are private, so this is unlikely to succeed. + matching_users = self._gl.users.list(search=email) + if len(matching_users) == 0: + contribs.append( + GenericContributor( + id=email, + username=email, + display_name=contrib["name"], + contributions_count=contrib_count, + ) + ) + else: + matching_user = matching_users[0] + contribs.append( + GenericContributor( + id=str(matching_user.id), + username=matching_user.username, + display_name=matching_user.name, + contributions_count=contrib_count, + ) + ) + + if index + 1 == max: + break + + return contribs + + @_gl_response_error_handler + def get_repository_owner(self, repository_id: str): + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id)) + return GenericOwner( + id=str(proj.namespace.id), + path_name=proj.namespace.path, + display_name=proj.namespace.name, + type=( + GenericOwnerType.Person + if proj.namespace.kind == "user" + else GenericOwnerType.Organization + ), + ) + + @_gl_response_error_handler + def list_repository_webhooks( + self, repository_id: str + ) -> list[GenericWebhook] | None: + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id), lazy=True) + hooks: list[GenericWebhook] = [] + for hook in proj.hooks.list(iterator=True): + hooks.append( + GenericWebhook( + id=str(hook.id), + repository_id=str(hook.project_id), + url=hook.url, + ) + ) + return hooks + + @_gl_response_error_handler + def create_webhook(self, repository_id: str) -> str | None: + assert repository_id.isdigit() + proj = self._gl.projects.get(int(repository_id), lazy=True) + + hook_data = { + "url": self.webhook_url, + "token": self.factory.config.get("shared_validation_token"), + "releases_events": True, + "description": "Managed by {}".format( + current_app.config.get("THEME_SITENAME", "Invenio") + ), + } + + resp = proj.hooks.create(hook_data) + return str(resp.id) + + @_gl_response_error_handler + def delete_webhook(self, repository_id: str, hook_id=None) -> bool: + assert repository_id.isdigit() + if hook_id is not None: + assert hook_id.isdigit() + + proj = self._gl.projects.get(int(repository_id), lazy=True) + if hook_id is None: + first_valid = self.get_first_valid_webhook(repository_id) + if first_valid is None: + return True + + proj.hooks.delete(int(first_valid.id)) + else: + proj.hooks.delete(int(hook_id)) + + return True + + @_gl_response_error_handler + def get_own_user(self) -> GenericUser | None: + user = self._gl.user + if user is None: + return None + return GenericUser( + id=str(user.id), + username=user.username, + display_name=user.name, + ) + + def resolve_release_zipball_url(self, release_zipball_url: str) -> str | None: + # No further resolution needs to be done for GitLab, so this is a no-op + return release_zipball_url + + @_gl_response_error_handler + def fetch_release_zipball(self, release_zipball_url: str, timeout: int): + resp = self._gl.http_get( + release_zipball_url, raw=True, streamed=True, timeout=timeout + ) + assert isinstance(resp, requests.Response) + with resp: + yield resp.raw + + @_gl_response_error_handler + def retrieve_remote_file(self, repository_id: str, tag_name: str, file_name: str): + assert repository_id.isdigit() + proj = self._gl.projects.get(repository_id, lazy=True) + file = proj.files.get(file_path=file_name, ref=tag_name) + return file.decode() + + def revoke_token(self, access_token: str): + # TODO: GitLab implements RFC7009 for OAuth Token Revocation. We might need to do this via OAuth instead of the GitLab API. + pass diff --git a/invenio_vcs/ext.py b/invenio_vcs/ext.py index 08c2ba2d..53e6f0d7 100644 --- a/invenio_vcs/ext.py +++ b/invenio_vcs/ext.py @@ -30,7 +30,6 @@ from invenio_i18n import LazyString from invenio_i18n import gettext as _ from invenio_theme.proxies import current_theme_icons -from invenio_webhooks import current_webhooks from six import string_types from werkzeug.utils import cached_property, import_string @@ -101,9 +100,17 @@ def finalize_app_api(app): def init_menu(app): """Init menu.""" for provider in get_provider_list(app): - current_menu.submenu(f"settings.{provider.id}").register( + id = provider.id + + def is_active(): + return ( + request.endpoint.startswith("invenio_vcs.") + and request.view_args.get("provider", "") == id + ) + + current_menu.submenu(f"settings.vcs_{id}").register( endpoint="invenio_vcs.get_repositories", - endpoint_arguments_constructor=lambda: {"provider": provider.id}, + endpoint_arguments_constructor=lambda: {"provider": id}, text=_( "%(icon)s %(provider)s", icon=LazyString( @@ -112,7 +119,7 @@ def init_menu(app): provider=provider.name, ), order=10, - active_when=lambda: request.endpoint.startswith("invenio_vcs."), + active_when=is_active, ) diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py index eb1c8931..64ba7fa6 100644 --- a/invenio_vcs/providers.py +++ b/invenio_vcs/providers.py @@ -4,6 +4,8 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime +from enum import Enum +from typing import Any, Generator from urllib.parse import urlparse from invenio_i18n import gettext as _ @@ -11,6 +13,7 @@ from invenio_oauthclient import current_oauthclient from invenio_oauthclient.handlers import token_getter from invenio_oauthclient.models import RemoteAccount, RemoteToken +from urllib3 import HTTPResponse from werkzeug.local import LocalProxy from werkzeug.utils import cached_property @@ -22,7 +25,7 @@ class GenericWebhook: id: str repository_id: str - url: str | types.NoneType = None + url: str @dataclass @@ -55,12 +58,25 @@ class GenericUser: display_name: str | types.NoneType = None +class GenericOwnerType(Enum): + Person = 1 + Organization = 2 + + +@dataclass +class GenericOwner: + id: str + path_name: str + type: GenericOwnerType + display_name: str | types.NoneType = None + + @dataclass class GenericContributor: id: str username: str - company: str | None - contributions_count: int + company: str | None = None + contributions_count: int | None = None display_name: str | types.NoneType = None @@ -70,10 +86,24 @@ def __init__( provider: type["RepositoryServiceProvider"], base_url: str, webhook_receiver_url: str, + id: str, + name: str, + description: str, + icon: str, + credentials_key: str, + repository_name: str, + repository_name_plural: str, ): self.provider = provider self.base_url = base_url self.webhook_receiver_url = webhook_receiver_url + self.id = id + self.name = name + self.description = description + self.icon = icon + self.credentials_key = credentials_key + self.repository_name = repository_name + self.repository_name_plural = repository_name_plural @property @abstractmethod @@ -90,56 +120,31 @@ def remote(self): @property @abstractmethod - def id(self) -> str: - raise NotImplementedError - - @property - @abstractmethod - def name(self) -> str: + def config(self) -> dict: raise NotImplementedError - @property @abstractmethod - def repository_name(self) -> str: + def url_for_tag(self, repository_name, tag_name) -> str: raise NotImplementedError - @property @abstractmethod - def repository_name_plural(self) -> str: - raise NotImplementedError - - @property - @abstractmethod - def icon(self) -> str: + def url_for_new_release(self, repository_name) -> str: raise NotImplementedError - @property @abstractmethod - def config(self) -> dict: + def url_for_new_file(self, repository_name, branch_name, file_name) -> str: raise NotImplementedError @abstractmethod - def webhook_is_create_release_event(self, event_payload): + def webhook_is_create_release_event(self, event_payload: dict[str, Any]): raise NotImplementedError @abstractmethod def webhook_event_to_generic( - self, event_payload + self, event_payload: dict[str, Any] ) -> tuple[GenericRelease, GenericRepository]: raise NotImplementedError - @abstractmethod - def url_for_tag(self, repository_name, tag_name) -> str: - raise NotImplementedError - - @abstractmethod - def url_for_new_release(self, repository_name) -> str: - raise NotImplementedError - - @abstractmethod - def url_for_new_file(self, repository_name, branch_name, file_name) -> str: - raise NotImplementedError - def for_user(self, user_id: str): return self.provider(self, user_id) @@ -211,7 +216,7 @@ def webhook_url(self): token=webhook_token.access_token ) - def is_valid_webhook(self, url): + def is_valid_webhook(self, url: str | None): """Check if webhook url is valid. The webhook url is valid if it has the same host as the configured webhook url. @@ -232,10 +237,12 @@ def list_repositories(self) -> dict[str, GenericRepository] | None: raise NotImplementedError @abstractmethod - def list_repository_webhooks(self, repository_id) -> list[GenericWebhook] | None: + def list_repository_webhooks( + self, repository_id: str + ) -> list[GenericWebhook] | None: raise NotImplementedError - def get_first_valid_webhook(self, repository_id) -> GenericWebhook | None: + def get_first_valid_webhook(self, repository_id: str) -> GenericWebhook | None: webhooks = self.list_repository_webhooks(repository_id) if webhooks is None: return None @@ -245,25 +252,25 @@ def get_first_valid_webhook(self, repository_id) -> GenericWebhook | None: return None @abstractmethod - def get_repository(self, repository_id) -> GenericRepository | None: + def get_repository(self, repository_id: str) -> GenericRepository | None: raise NotImplementedError @abstractmethod def list_repository_contributors( - self, repository_id, max + self, repository_id: str, max: int ) -> list[GenericContributor] | None: raise NotImplementedError @abstractmethod - def get_repository_owner(self, repository_id) -> GenericUser | None: + def get_repository_owner(self, repository_id: str) -> GenericOwner | None: raise NotImplementedError @abstractmethod - def create_webhook(self, repository_id) -> str | None: + def create_webhook(self, repository_id: str) -> str | None: raise NotImplementedError @abstractmethod - def delete_webhook(self, repository_id, hook_id=None) -> bool: + def delete_webhook(self, repository_id: str, hook_id: str | None = None) -> bool: raise NotImplementedError @abstractmethod @@ -271,17 +278,21 @@ def get_own_user(self) -> GenericUser | None: raise NotImplementedError @abstractmethod - def resolve_release_zipball_url(self, release_zipball_url) -> str | None: + def resolve_release_zipball_url(self, release_zipball_url: str) -> str | None: raise NotImplementedError @abstractmethod - def fetch_release_zipball(self, release_zipball_url, timeout): + def fetch_release_zipball( + self, release_zipball_url: str, timeout: int + ) -> Generator[HTTPResponse]: raise NotImplementedError @abstractmethod - def retrieve_remote_file(self, repository_id, tag_name, file_name): + def retrieve_remote_file( + self, repository_id: str, tag_name: str, file_name: str + ) -> bytes | None: raise NotImplementedError @abstractmethod - def revoke_token(self, access_token): + def revoke_token(self, access_token: str): raise NotImplementedError diff --git a/invenio_vcs/tasks.py b/invenio_vcs/tasks.py index e3a97423..c521df19 100644 --- a/invenio_vcs/tasks.py +++ b/invenio_vcs/tasks.py @@ -152,7 +152,7 @@ def process_release(provider, release_id): @shared_task(ignore_result=True) -def refresh_accounts(expiration_threshold=None): +def refresh_accounts(provider, expiration_threshold=None): """Refresh stale accounts, avoiding token expiration. :param expiration_threshold: Dictionary containing timedelta parameters @@ -162,22 +162,22 @@ def refresh_accounts(expiration_threshold=None): tz=datetime.timezone.utc ) - datetime.timedelta(**(expiration_threshold or {"days": 6 * 30})) - remote = current_oauthclient.oauth.remote_apps["github"] + remote = current_oauthclient.oauth.remote_apps[provider] remote_accounts_to_be_updated = RemoteAccount.query.filter( RemoteAccount.updated < expiration_date, RemoteAccount.client_id == remote.consumer_key, ) for remote_account in remote_accounts_to_be_updated: - sync_account.delay(remote_account.user_id) + sync_account.delay(provider, remote_account.user_id) @shared_task(ignore_result=True) -def sync_account(user_id): +def sync_account(provider, user_id): """Sync a user account.""" # Local import to avoid circular imports - from .api import GitHubAPI + from .service import VCSService # Start a nested transaction so every data writing inside sync is executed atomically with db.session.begin_nested(): - gh = GitHubAPI(user_id=user_id) - gh.sync(hooks=False, async_hooks=False) + svc = VCSService.for_provider_and_user(provider, user_id) + svc.sync(hooks=False, async_hooks=False) diff --git a/setup.cfg b/setup.cfg index 1451bbf9..5f534355 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,6 +47,7 @@ install_requires = PyYAML>=5.4.1 email-validator>=1.0.5 github3.py>=4.0.1,<5.0.0 + python-gitlab>=6.2.0,<7.0.0 humanize>=0.5.1 python-dateutil>=2.9.0,<3.0.0 invenio-assets>=4.0.0,<5.0.0 From 5089072d78b6fdf974bbf3443eaed107d9b1c982 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 22 Aug 2025 11:45:28 +0200 Subject: [PATCH 10/19] WIP: bug fixes --- invenio_vcs/errors.py | 33 ++++++++++++++++++++------------- invenio_vcs/oauth/handlers.py | 23 ++++------------------- invenio_vcs/service.py | 6 +++--- 3 files changed, 27 insertions(+), 35 deletions(-) diff --git a/invenio_vcs/errors.py b/invenio_vcs/errors.py index ec9fb400..e746f9c0 100644 --- a/invenio_vcs/errors.py +++ b/invenio_vcs/errors.py @@ -28,11 +28,11 @@ from invenio_i18n import gettext as _ -class GitHubError(Exception): +class VCSError(Exception): """General GitHub error.""" -class RepositoryAccessError(GitHubError): +class RepositoryAccessError(VCSError): """Repository access permissions error.""" message = _("The user cannot access the github repository") @@ -46,7 +46,7 @@ def __init__(self, user=None, repo=None, repo_id=None, message=None): self.repo_id = repo_id -class RepositoryDisabledError(GitHubError): +class RepositoryDisabledError(VCSError): """Repository access permissions error.""" message = _("This repository is not enabled for webhooks.") @@ -57,7 +57,7 @@ def __init__(self, repo=None, message=None): self.repo = repo -class RepositoryNotFoundError(GitHubError): +class RepositoryNotFoundError(VCSError): """Repository not found error.""" message = _("The repository does not exist.") @@ -68,7 +68,7 @@ def __init__(self, repo=None, message=None): self.repo = repo -class InvalidSenderError(GitHubError): +class InvalidSenderError(VCSError): """Invalid release sender error.""" message = _("Invalid sender for event") @@ -80,7 +80,7 @@ def __init__(self, event=None, user=None, message=None): self.user = user -class ReleaseAlreadyReceivedError(GitHubError): +class ReleaseAlreadyReceivedError(VCSError): """Invalid release sender error.""" message = _("The release has already been received.") @@ -91,7 +91,7 @@ def __init__(self, release=None, message=None): self.release = release -class CustomGitHubMetadataError(GitHubError): +class CustomGitHubMetadataError(VCSError): """Invalid Custom GitHub Metadata file.""" message = _("The metadata file is not valid JSON.") @@ -102,7 +102,7 @@ def __init__(self, file=None, message=None): self.file = file -class GithubTokenNotFound(GitHubError): +class GithubTokenNotFound(VCSError): """Oauth session token was not found.""" message = _("The oauth session token was not found.") @@ -113,7 +113,7 @@ def __init__(self, user=None, message=None): self.user = user -class RemoteAccountNotFound(GitHubError): +class RemoteAccountNotFound(VCSError): """Remote account for the user is not setup.""" message = _("RemoteAccount not found for user") @@ -124,7 +124,7 @@ def __init__(self, user=None, message=None): self.user = user -class RemoteAccountDataNotSet(GitHubError): +class RemoteAccountDataNotSet(VCSError): """Remote account extra data for the user is not set.""" message = _("RemoteAccount extra data not set for user.") @@ -135,7 +135,7 @@ def __init__(self, user=None, message=None): self.user = user -class ReleaseNotFound(GitHubError): +class ReleaseNotFound(VCSError): """Release does not exist.""" message = _("Release does not exist.") @@ -145,7 +145,7 @@ def __init__(self, message=None): super().__init__(message or self.message) -class UnexpectedProviderResponse(GitHubError): +class UnexpectedProviderResponse(VCSError): """Request to Github API returned an unexpected error.""" message = _("Provider API returned an unexpected error.") @@ -155,7 +155,7 @@ def __init__(self, message=None): super().__init__(message or self.message) -class ReleaseZipballFetchError(GitHubError): +class ReleaseZipballFetchError(VCSError): """Error fetching release zipball file.""" message = _("Error fetching release zipball file.") @@ -163,3 +163,10 @@ class ReleaseZipballFetchError(GitHubError): def __init__(self, message=None): """Constructor.""" super().__init__(message or self.message) + + +class UserInfoNoneError(VCSError): + message = _("Provider did not return user profile information.") + + def __init__(self, message=None) -> None: + super().__init__(message or self.message) diff --git a/invenio_vcs/oauth/handlers.py b/invenio_vcs/oauth/handlers.py index c9009ca6..3f5f505f 100644 --- a/invenio_vcs/oauth/handlers.py +++ b/invenio_vcs/oauth/handlers.py @@ -1,24 +1,9 @@ # -*- coding: utf-8 -*- -# # This file is part of Invenio. -# Copyright (C) 2023 CERN. -# -# Invenio is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Invenio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Invenio. If not, see . +# Copyright (C) 2025 CERN. # -# In applying this licence, CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. """Implement OAuth client handler.""" @@ -84,7 +69,7 @@ def disconnect_handler(self, remote): for repo in repos: if repo.hook: repos_with_hooks.append((repo.provider_id, repo.hook)) - svc.disable_repository(repo.id) + svc.disable_repository(repo.provider_id) # Commit any changes before running the ascynhronous task db.session.commit() diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index 29be89ef..72eec5d5 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -20,6 +20,7 @@ RepositoryAccessError, RepositoryDisabledError, RepositoryNotFoundError, + UserInfoNoneError, ) from invenio_vcs.models import Release, ReleaseStatus, Repository from invenio_vcs.proxies import current_vcs @@ -226,7 +227,7 @@ def _sync_hooks(self, repo_ids, asynchronous=True): # If hooks will run asynchronously, we need to commit any changes done so far db.session.commit() sync_hooks_task.delay( - self.provider.factory.id, self.provider.user_id, repo_ids + self.provider.factory.id, self.provider.user_id, list(repo_ids) ) def sync_repo_hook(self, repo_id): @@ -272,8 +273,7 @@ def init_account(self): user = self.provider.get_own_user() if user is None: - # TODO: create a reasonable exception here - raise Exception("TODO") + raise UserInfoNoneError # Setup local access tokens to be used by the webhooks hook_token = ProviderToken.create_personal( From a112f6979d6cb441a0c809a9091e285c50f16a8f Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 27 Aug 2025 10:28:15 +0200 Subject: [PATCH 11/19] WIP: move repo data to table instead of `extra_data` --- ...54318294_switch_to_generic_git_services.py | 76 +++++++++- invenio_vcs/contrib/github.py | 14 +- invenio_vcs/contrib/gitlab.py | 11 +- invenio_vcs/generic_models.py | 77 ++++++++++ invenio_vcs/models.py | 60 +++++++- invenio_vcs/providers.py | 69 +-------- invenio_vcs/service.py | 132 ++++++++++-------- .../invenio_vcs/settings/index_item.html | 6 +- 8 files changed, 309 insertions(+), 136 deletions(-) create mode 100644 invenio_vcs/generic_models.py diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index 5f3bd44d..2e24e92a 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -13,6 +13,9 @@ # revision identifiers, used by Alembic. revision = "1754318294" down_revision = "b0eaee37b545" +# You cannot rename an Alembic branch. So we will have to keep +# the branch label `invenio-github` despite changing the module +# to `invenio-vcs`. branch_labels = () depends_on = None @@ -38,7 +41,37 @@ def upgrade(): existing_nullable=True, ) op.add_column( - "vcs_repositories", sa.Column("provider", sa.String(255), nullable=False) + "vcs_repositories", + sa.Column("provider", sa.String(255), nullable=False), + ) + op.add_column( + "vcs_repositories", + sa.Column("default_branch", sa.String(255), nullable=False, default="master"), + ) + op.add_column( + "vcs_repositories", sa.Column("description", sa.String(10000), nullable=True) + ) + op.add_column( + "vcs_repositories", sa.Column("html_url", sa.String(10000), nullable=False) + ) + op.add_column( + "vcs_repositories", sa.Column("license_spdx", sa.String(255), nullable=True) + ) + op.drop_index("ix_github_repositories_name") + op.drop_index("ix_github_repositories_github_id") + + # Because they rely on the `provider` column, these are automatically + # deleted when downgrading so we don't need a separate drop command + # for them. + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_repositories_provider_provider_id"), + table_name="vcs_repositories", + columns=["provider", "provider_id"], + ) + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_repositories_provider_provider_id_name"), + table_name="vcs_repositories", + columns=["provider", "provider_id", "name"], ) op.rename_table("github_releases", "vcs_releases") @@ -51,6 +84,24 @@ def upgrade(): existing_type=sa.Integer(), existing_nullable=True, ) + op.add_column("vcs_releases", sa.Column("provider", sa.String(255), nullable=False)) + + op.drop_constraint( + op.f("uq_github_releases_release_id"), table_name="vcs_releases", type_="unique" + ) + # A given provider cannot have duplicate repository IDs. + # These constraints are also inherently deleted when the `provider` column is dropped + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_releases_provider_id_provider"), + table_name="vcs_releases", + columns=["provider_id", "provider"], + ) + # A specific repository from a given provider cannot have multiple releases of the same tag + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_releases_provider_id_provider_tag"), + table_name="vcs_releases", + columns=["provider_id", "provider", "tag"], + ) # ### end Alembic commands ### @@ -74,8 +125,25 @@ def downgrade(): nullable=True, existing_type=sa.String(length=255), existing_nullable=True, + postgresql_using="hook::integer", ) op.drop_column("github_repositories", "provider") + op.drop_column("github_repositories", "description") + op.drop_column("github_repositories", "html_url") + op.drop_column("github_repositories", "license_spdx") + op.drop_column("github_repositories", "default_branch") + op.create_index( + op.f("ix_github_repositories_github_id"), + "github_repositories", + ["github_id"], + unique=True, + ) + op.create_index( + op.f("ix_github_repositories_name"), + "github_repositories", + ["name"], + unique=True, + ) op.rename_table("vcs_releases", "github_releases") op.alter_column( @@ -88,4 +156,10 @@ def downgrade(): existing_nullable=False, postgresql_using="provider_id::integer", ) + op.drop_column("github_releases", "provider") + op.create_unique_constraint( + op.f("uq_github_releases_release_id"), + table_name="github_releases", + columns=["release_id"], + ) # ### end Alembic commands ### diff --git a/invenio_vcs/contrib/github.py b/invenio_vcs/contrib/github.py index 1dc4796d..50c4654a 100644 --- a/invenio_vcs/contrib/github.py +++ b/invenio_vcs/contrib/github.py @@ -1,5 +1,11 @@ +# -*- coding: utf-8 -*- +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + import json -from datetime import datetime import dateutil import github3 @@ -8,10 +14,10 @@ from github3.repos import ShortRepository from invenio_i18n import gettext as _ from invenio_oauthclient.contrib.github import GitHubOAuthSettingsHelper -from werkzeug.utils import cached_property, import_string +from werkzeug.utils import cached_property from invenio_vcs.errors import ReleaseZipballFetchError, UnexpectedProviderResponse -from invenio_vcs.providers import ( +from invenio_vcs.generic_models import ( GenericContributor, GenericOwner, GenericOwnerType, @@ -19,6 +25,8 @@ GenericRepository, GenericUser, GenericWebhook, +) +from invenio_vcs.providers import ( RepositoryServiceProvider, RepositoryServiceProviderFactory, ) diff --git a/invenio_vcs/contrib/gitlab.py b/invenio_vcs/contrib/gitlab.py index 420038a8..cbcd2e76 100644 --- a/invenio_vcs/contrib/gitlab.py +++ b/invenio_vcs/contrib/gitlab.py @@ -1,3 +1,10 @@ +# -*- coding: utf-8 -*- +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + from __future__ import annotations from typing import Any @@ -10,7 +17,7 @@ from invenio_oauthclient import current_oauthclient from werkzeug.utils import cached_property -from invenio_vcs.providers import ( +from invenio_vcs.generic_models import ( GenericContributor, GenericOwner, GenericOwnerType, @@ -18,6 +25,8 @@ GenericRepository, GenericUser, GenericWebhook, +) +from invenio_vcs.providers import ( RepositoryServiceProvider, RepositoryServiceProviderFactory, ) diff --git a/invenio_vcs/generic_models.py b/invenio_vcs/generic_models.py new file mode 100644 index 00000000..d941af60 --- /dev/null +++ b/invenio_vcs/generic_models.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from enum import Enum + +from invenio_vcs.models import Repository + + +@dataclass +class GenericWebhook: + id: str + repository_id: str + url: str + + +@dataclass +class GenericRepository: + id: str + full_name: str + default_branch: str + html_url: str + description: str | None = None + license_spdx: str | None = None + + @staticmethod + def from_model(model: Repository): + return GenericRepository( + id=model.provider_id, + full_name=model.name, + default_branch=model.default_branch, + html_url=model.html_url, + description=model.description, + license_spdx=model.license_spdx, + ) + + +@dataclass +class GenericRelease: + id: str + tag_name: str + created_at: datetime + html_url: str + name: str | None = None + body: str | None = None + tarball_url: str | None = None + zipball_url: str | None = None + published_at: datetime | None = None + + +@dataclass +class GenericUser: + id: str + username: str + display_name: str | None = None + + +class GenericOwnerType(Enum): + Person = 1 + Organization = 2 + + +@dataclass +class GenericOwner: + id: str + path_name: str + type: GenericOwnerType + display_name: str | None = None + + +@dataclass +class GenericContributor: + id: str + username: str + company: str | None = None + contributions_count: int | None = None + display_name: str | None = None diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index bcd4956c..7f60df56 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -31,6 +31,7 @@ from invenio_db import db from invenio_i18n import lazy_gettext as _ from invenio_webhooks.models import Event +from sqlalchemy import Index, UniqueConstraint from sqlalchemy.dialects import postgresql from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType @@ -112,6 +113,21 @@ class Repository(db.Model, Timestamp): __tablename__ = "vcs_repositories" + __table_args__ = ( + UniqueConstraint( + "provider", + "provider_id", + "name", + name="uq_vcs_repositories_provider_provider_id_name", + ), + UniqueConstraint( + "provider", + "provider_id", + name="uq_vcs_repositories_provider_provider_id", + ), + # Index("ix_vcs_repositories_provider_provider_id", "provider", "provider_id"), + ) + id = db.Column( UUIDType, primary_key=True, @@ -121,7 +137,6 @@ class Repository(db.Model, Timestamp): provider_id = db.Column( db.String(255), - index=True, nullable=False, ) """Unique GitHub identifier for a repository. @@ -144,13 +159,18 @@ class Repository(db.Model, Timestamp): provider = db.Column(db.String(255), nullable=False) """Which VCS provider the repository is hosted by (and therefore the context in which to consider the provider_id)""" - name = db.Column(db.String(255), unique=True, index=True, nullable=False) + description = db.Column(db.String(10000), nullable=True) + html_url = db.Column(db.String(10000), nullable=False) + license_spdx = db.Column(db.String(255), nullable=True) + default_branch = db.Column(db.String(255), nullable=False) + + name = db.Column(db.String(255), nullable=False) """Fully qualified name of the repository including user/organization.""" user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) """Reference user that can manage this repository.""" - hook = db.Column(db.String(255)) + hook = db.Column(db.String(255), nullable=True) """Hook identifier.""" # @@ -159,13 +179,28 @@ class Repository(db.Model, Timestamp): user = db.relationship(User) @classmethod - def create(cls, user_id, provider, provider_id=None, name=None, **kwargs): + def create( + cls, + user_id, + provider, + provider_id, + html_url, + default_branch, + name=None, + description=None, + license_spdx=None, + **kwargs, + ): """Create the repository.""" obj = cls( user_id=user_id, provider=provider, provider_id=provider_id, name=name, + html_url=html_url, + default_branch=default_branch, + description=description, + license_spdx=license_spdx, **kwargs, ) db.session.add(obj) @@ -220,6 +255,20 @@ class Release(db.Model, Timestamp): __tablename__ = "vcs_releases" + __table_args__ = ( + UniqueConstraint( + "provider", + "provider_id", + name="uq_vcs_releases_provider_id_provider", + ), + UniqueConstraint( + "provider_id", + "provider", + "tag", + name="uq_vcs_releases_provider_id_provider_tag", + ), + ) + id = db.Column( UUIDType, primary_key=True, @@ -230,6 +279,9 @@ class Release(db.Model, Timestamp): provider_id = db.Column(db.String(255), nullable=True) """Unique GitHub release identifier.""" + provider = db.Column(db.String(255), nullable=False) + """Which VCS provider the release is hosted by (and therefore the context in which to consider the provider_id)""" + tag = db.Column(db.String(255)) """Release tag.""" diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py index 64ba7fa6..3fb06da1 100644 --- a/invenio_vcs/providers.py +++ b/invenio_vcs/providers.py @@ -1,10 +1,6 @@ from __future__ import annotations -import types from abc import ABC, abstractmethod -from dataclasses import dataclass -from datetime import datetime -from enum import Enum from typing import Any, Generator from urllib.parse import urlparse @@ -21,65 +17,6 @@ from invenio_vcs.oauth.handlers import OAuthHandlers -@dataclass -class GenericWebhook: - id: str - repository_id: str - url: str - - -@dataclass -class GenericRepository: - id: str - full_name: str - default_branch: str - html_url: str - description: str | types.NoneType = None - license_spdx: str | types.NoneType = None - - -@dataclass -class GenericRelease: - id: str - tag_name: str - created_at: datetime - html_url: str - name: str | types.NoneType = None - body: str | types.NoneType = None - tarball_url: str | types.NoneType = None - zipball_url: str | types.NoneType = None - published_at: datetime | types.NoneType = None - - -@dataclass -class GenericUser: - id: str - username: str - display_name: str | types.NoneType = None - - -class GenericOwnerType(Enum): - Person = 1 - Organization = 2 - - -@dataclass -class GenericOwner: - id: str - path_name: str - type: GenericOwnerType - display_name: str | types.NoneType = None - - -@dataclass -class GenericContributor: - id: str - username: str - company: str | None = None - contributions_count: int | None = None - display_name: str | types.NoneType = None - - class RepositoryServiceProviderFactory(ABC): def __init__( self, @@ -145,10 +82,10 @@ def webhook_event_to_generic( ) -> tuple[GenericRelease, GenericRepository]: raise NotImplementedError - def for_user(self, user_id: str): + def for_user(self, user_id: int): return self.provider(self, user_id) - def for_access_token(self, user_id: str, access_token: str): + def for_access_token(self, user_id: int, access_token: str): return self.provider(self, user_id, access_token=access_token) @property @@ -164,7 +101,7 @@ def vocabulary(self): class RepositoryServiceProvider(ABC): def __init__( - self, factory: RepositoryServiceProviderFactory, user_id: str, access_token=None + self, factory: RepositoryServiceProviderFactory, user_id: int, access_token=None ) -> None: self.factory = factory self.user_id = user_id diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index 72eec5d5..1034b91f 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -1,6 +1,5 @@ from abc import abstractmethod from contextlib import contextmanager -from copy import deepcopy from dataclasses import asdict from typing import TYPE_CHECKING @@ -22,6 +21,7 @@ RepositoryNotFoundError, UserInfoNoneError, ) +from invenio_vcs.generic_models import GenericRelease, GenericRepository from invenio_vcs.models import Release, ReleaseStatus, Repository from invenio_vcs.proxies import current_vcs from invenio_vcs.tasks import sync_hooks as sync_hooks_task @@ -29,8 +29,6 @@ if TYPE_CHECKING: from invenio_vcs.providers import ( - GenericRelease, - GenericRepository, RepositoryServiceProvider, ) @@ -40,11 +38,11 @@ def __init__(self, provider: "RepositoryServiceProvider") -> None: self.provider = provider @staticmethod - def for_provider_and_user(provider_id: str, user_id: str): + def for_provider_and_user(provider_id: str, user_id: int): return VCSService(get_provider_by_id(provider_id).for_user(user_id)) @staticmethod - def for_provider_and_token(provider_id: str, user_id: str, access_token: str): + def for_provider_and_token(provider_id: str, user_id: int, access_token: str): return VCSService( get_provider_by_id(provider_id).for_access_token(user_id, access_token) ) @@ -56,7 +54,10 @@ def is_authenticated(self): @property def user_available_repositories(self): """Retrieve user repositories from user's remote data.""" - return self.provider.remote_account.extra_data.get("repos", {}) + return Repository.query.filter( + Repository.user_id == self.provider.user_id, + Repository.provider == self.provider.factory.id, + ) @property def user_enabled_repositories(self): @@ -64,25 +65,21 @@ def user_enabled_repositories(self): return Repository.query.filter( Repository.user_id == self.provider.user_id, Repository.provider == self.provider.factory.id, + Repository.hook != None, ) def list_repositories(self): """Retrieves user repositories, containing db repositories plus remote repositories.""" - vcs_repos = deepcopy(self.user_available_repositories) - if vcs_repos: - # 'Enhance' our repos dict, from our database model - db_repos = Repository.query.filter( - Repository.provider_id.in_([k for k in vcs_repos.keys()]) + repos = {} + for db_repo in self.user_available_repositories: + repos[db_repo.provider_id] = asdict(GenericRepository.from_model(db_repo)) + release_instance = current_vcs.release_api_class( + db_repo.latest_release(), self.provider ) - for db_repo in db_repos: - if db_repo.provider_id in vcs_repos: - release_instance = current_vcs.release_api_class( - db_repo.latest_release(), self.provider - ) - vcs_repos[db_repo.provider_id]["instance"] = db_repo - vcs_repos[db_repo.provider_id]["latest"] = release_instance + repos[db_repo.provider_id]["instance"] = db_repo + repos[db_repo.provider_id]["latest"] = release_instance - return vcs_repos + return repos def get_repo_latest_release(self, repo): """Retrieves the repository last release.""" @@ -105,9 +102,14 @@ def list_repo_releases(self, repo): return release_instances def get_repo_default_branch(self, repo_id): - return self.user_available_repositories.get(repo_id, None).get( - "default_branch", None - ) + db_repo = self.user_available_repositories.filter( + Repository.provider_id == repo_id + ).first() + + if db_repo is None: + return None + + return db_repo.default_branch def get_last_sync_time(self): """Retrieves the last sync delta time from github's client extra data. @@ -139,7 +141,7 @@ def get_repository(self, repo_id=None, repo_name=None): return repo - def check_repo_access_permissions(self, repo): + def check_repo_access_permissions(self, repo: Repository): """Checks permissions from user on repo. Repo has access if any of the following is True: @@ -153,10 +155,10 @@ def check_repo_access_permissions(self, repo): return True if self.provider.remote_account and self.provider.remote_account.extra_data: - user_has_remote_access = self.user_available_repositories.get( - repo.provider_id - ) - if user_has_remote_access: + user_has_remote_access_count = self.user_available_repositories.filter( + Repository.provider_id == repo.provider_id + ).count() + if user_has_remote_access_count == 1: return True raise RepositoryAccessError( @@ -186,25 +188,43 @@ def sync(self, hooks=True, async_hooks=True): # Update changed names for repositories stored in DB db_repos = Repository.query.filter( Repository.user_id == self.provider.user_id, - ) + Repository.provider == self.provider.factory.id, + ).all() - for repo in db_repos: - vcs_repo = vcs_repos.get(repo.provider_id) - if vcs_repo and repo.name != vcs_repo.full_name: - repo.name = vcs_repo.full_name - db.session.add(repo) + for db_repo in db_repos: + vcs_repo = vcs_repos.get(db_repo.provider_id) + if vcs_repo and db_repo.name != vcs_repo.full_name: + db_repo.name = vcs_repo.full_name + db.session.add(db_repo) # Remove ownership from repositories that the user has no longer # 'admin' permissions, or have been deleted. Repository.query.filter( Repository.user_id == self.provider.user_id, + Repository.provider == self.provider.factory.id, ~Repository.provider_id.in_(vcs_repos.keys()), ).update({"user_id": None, "hook": None}, synchronize_session=False) - # Update repos and last sync + # Add new repos from VCS to the DB (without the hook activated) + for _, vcs_repo in vcs_repos.items(): + if any(r.provider_id == vcs_repo.id for r in db_repos): + # We have already added this to our DB + continue + + Repository.create( + user_id=self.provider.user_id, + provider=self.provider.factory.id, + provider_id=vcs_repo.id, + html_url=vcs_repo.html_url, + default_branch=vcs_repo.default_branch, + name=vcs_repo.full_name, + description=vcs_repo.description, + license_spdx=vcs_repo.license_spdx, + ) + + # Update last sync self.provider.remote_account.extra_data.update( dict( - repos={k: asdict(v) for k, v in vcs_repos.items()}, last_sync=iso_utcnow(), ) ) @@ -235,6 +255,7 @@ def sync_repo_hook(self, repo_id): # Get the hook that we may have set in the past hook = self.provider.get_first_valid_webhook(repo_id) vcs_repo = self.provider.get_repository(repo_id) + assert vcs_repo is not None # If hook on GitHub exists, get or create corresponding db object and # enable the hook. Otherwise remove the old hook information. @@ -243,10 +264,14 @@ def sync_repo_hook(self, repo_id): if hook: if not db_repo: db_repo = Repository.create( - self.provider.user_id, - self.provider.factory.id, - repo_id, - vcs_repo.full_name, + user_id=self.provider.user_id, + provider=self.provider.factory.id, + provider_id=repo_id, + html_url=vcs_repo.html_url, + default_branch=vcs_repo.default_branch, + name=vcs_repo.full_name, + description=vcs_repo.description, + license_spdx=vcs_repo.license_spdx, ) if not db_repo.enabled: self.mark_repo_enabled(db_repo, hook.id) @@ -257,7 +282,6 @@ def sync_repo_hook(self, repo_id): def mark_repo_disabled(self, repo): """Disables an user repository.""" repo.hook = None - repo.user_id = None def mark_repo_enabled(self, repo, hook): """Enables an user repository.""" @@ -290,15 +314,16 @@ def init_account(self): tokens=dict( webhook=hook_token.id, ), - repos=dict(), last_sync=iso_utcnow(), ) db.session.add(self.provider.remote_account) def enable_repository(self, repository_id): - vcs_repo = self.user_available_repositories.get(repository_id) - if vcs_repo is None: + db_repo = self.user_available_repositories.filter( + Repository.provider_id == repository_id + ).first() + if db_repo is None: raise RepositoryNotFoundError( repository_id, _("Failed to enable repository.") ) @@ -307,29 +332,20 @@ def enable_repository(self, repository_id): if hook_id is None: return False - db_repo = Repository.get( - provider=self.provider.factory.id, provider_id=repository_id - ) - if not db_repo: - db_repo = Repository.create( - provider=self.provider.factory.id, - user_id=self.provider.user_id, - provider_id=repository_id, - name=vcs_repo["full_name"], - ) self.mark_repo_enabled(db_repo, hook_id) return True def disable_repository(self, repository_id, hook_id=None): - if hook_id is None and repository_id not in self.user_available_repositories: + db_repo = self.user_available_repositories.filter( + Repository.provider_id == repository_id + ).first() + + if db_repo is None: raise RepositoryNotFoundError( repository_id, _("Failed to disable repository.") ) - db_repo = Repository.get( - provider=self.provider.factory.id, provider_id=repository_id - ) - if db_repo is None: + if not db_repo.enabled: raise RepositoryDisabledError(repository_id) if not self.provider.delete_webhook(repository_id, hook_id): diff --git a/invenio_vcs/templates/semantic-ui/invenio_vcs/settings/index_item.html b/invenio_vcs/templates/semantic-ui/invenio_vcs/settings/index_item.html index 007108b5..f95dc1d4 100644 --- a/invenio_vcs/templates/semantic-ui/invenio_vcs/settings/index_item.html +++ b/invenio_vcs/templates/semantic-ui/invenio_vcs/settings/index_item.html @@ -17,11 +17,11 @@
- + Date: Wed, 27 Aug 2025 15:39:41 +0200 Subject: [PATCH 12/19] WIP: bug fixes --- ...1754318294_switch_to_generic_git_services.py | 4 ++-- invenio_vcs/contrib/gitlab.py | 4 +++- invenio_vcs/generic_models.py | 17 +++++++++++++++-- invenio_vcs/models.py | 17 ++++++++--------- invenio_vcs/providers.py | 8 ++++++++ invenio_vcs/receivers.py | 3 ++- invenio_vcs/service.py | 15 +++++++++------ .../semantic-ui/invenio_vcs/settings/view.html | 6 +++--- invenio_vcs/views/badge.py | 2 +- invenio_vcs/views/vcs.py | 4 ++-- tests/test_webhook.py | 4 ++-- 11 files changed, 55 insertions(+), 29 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index 2e24e92a..0c143983 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -69,9 +69,9 @@ def upgrade(): columns=["provider", "provider_id"], ) op.create_unique_constraint( - constraint_name=op.f("uq_vcs_repositories_provider_provider_id_name"), + constraint_name=op.f("uq_vcs_repositories_provider_name"), table_name="vcs_repositories", - columns=["provider", "provider_id", "name"], + columns=["provider", "name"], ) op.rename_table("github_releases", "vcs_releases") diff --git a/invenio_vcs/contrib/gitlab.py b/invenio_vcs/contrib/gitlab.py index cbcd2e76..4817c6bf 100644 --- a/invenio_vcs/contrib/gitlab.py +++ b/invenio_vcs/contrib/gitlab.py @@ -158,12 +158,14 @@ def webhook_is_create_release_event(self, event_payload: dict[str, Any]): def webhook_event_to_generic( self, event_payload: dict[str, Any] ) -> tuple[GenericRelease, GenericRepository]: + # https://archives.docs.gitlab.com/18.0/user/project/integrations/webhook_events/#release-events # https://archives.docs.gitlab.com/17.11/user/project/integrations/webhook_events/#release-events + # https://archives.docs.gitlab.com/16.11/ee/user/project/integrations/webhook_events.html#release-events zipball_url: str | None = None tarball_url: str | None = None - for source in event_payload["sources"]: + for source in event_payload["assets"]["sources"]: format = source["format"] url = source["url"] if format == "zip": diff --git a/invenio_vcs/generic_models.py b/invenio_vcs/generic_models.py index d941af60..d468a777 100644 --- a/invenio_vcs/generic_models.py +++ b/invenio_vcs/generic_models.py @@ -1,6 +1,6 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import asdict, dataclass from datetime import datetime from enum import Enum @@ -27,13 +27,26 @@ class GenericRepository: def from_model(model: Repository): return GenericRepository( id=model.provider_id, - full_name=model.name, + full_name=model.full_name, default_branch=model.default_branch, html_url=model.html_url, description=model.description, license_spdx=model.license_spdx, ) + def to_model(self, model: Repository): + changed = False + for key, value in asdict(self).items(): + if key in ["id"]: + continue + + db_value = getattr(model, key) + if db_value != value: + changed = True + setattr(model, key, value) + + return changed + @dataclass class GenericRelease: diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index 7f60df56..562a6602 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -116,9 +116,8 @@ class Repository(db.Model, Timestamp): __table_args__ = ( UniqueConstraint( "provider", - "provider_id", "name", - name="uq_vcs_repositories_provider_provider_id_name", + name="uq_vcs_repositories_provider_name", ), UniqueConstraint( "provider", @@ -164,7 +163,7 @@ class Repository(db.Model, Timestamp): license_spdx = db.Column(db.String(255), nullable=True) default_branch = db.Column(db.String(255), nullable=False) - name = db.Column(db.String(255), nullable=False) + full_name = db.Column("name", db.String(255), nullable=False) """Fully qualified name of the repository including user/organization.""" user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) @@ -186,7 +185,7 @@ def create( provider_id, html_url, default_branch, - name=None, + full_name=None, description=None, license_spdx=None, **kwargs, @@ -196,7 +195,7 @@ def create( user_id=user_id, provider=provider, provider_id=provider_id, - name=name, + full_name=full_name, html_url=html_url, default_branch=default_branch, description=description, @@ -207,7 +206,7 @@ def create( return obj @classmethod - def get(cls, provider, provider_id=None, name=None): + def get(cls, provider, provider_id=None, full_name=None): """Return a repository given its name or github id. :param integer github_id: GitHub repository identifier. @@ -224,9 +223,9 @@ def get(cls, provider, provider_id=None, name=None): repo = cls.query.filter( Repository.provider_id == provider_id, Repository.provider == provider ).one_or_none() - if not repo and name is not None: + if not repo and full_name is not None: repo = cls.query.filter( - Repository.name == name, Repository.provider == provider + Repository.full_name == full_name, Repository.provider == provider ).one_or_none() return repo @@ -247,7 +246,7 @@ def latest_release(self, status=None): def __repr__(self): """Get repository representation.""" - return "".format(self=self) + return "".format(self=self) class Release(db.Model, Timestamp): diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py index 3fb06da1..11cc79c6 100644 --- a/invenio_vcs/providers.py +++ b/invenio_vcs/providers.py @@ -14,6 +14,14 @@ from werkzeug.utils import cached_property from invenio_vcs.errors import RemoteAccountDataNotSet +from invenio_vcs.generic_models import ( + GenericContributor, + GenericOwner, + GenericRelease, + GenericRepository, + GenericUser, + GenericWebhook, +) from invenio_vcs.oauth.handlers import OAuthHandlers diff --git a/invenio_vcs/receivers.py b/invenio_vcs/receivers.py index 1e9c2c0b..0220e387 100644 --- a/invenio_vcs/receivers.py +++ b/invenio_vcs/receivers.py @@ -84,7 +84,7 @@ def _handle_create_release(self, event): repo = Repository.get( self.provider_factory.id, provider_id=generic_repo.id, - name=generic_repo.full_name, + full_name=generic_repo.full_name, ) if not repo: raise RepositoryNotFoundError(generic_repo.full_name) @@ -92,6 +92,7 @@ def _handle_create_release(self, event): if repo.enabled: release = Release( provider_id=generic_release.id, + provider=self.provider_factory.id, tag=generic_release.tag_name, repository=repo, event=event, diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index 1034b91f..025fd799 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -131,7 +131,7 @@ def get_repository(self, repo_id=None, repo_name=None): Checks for access permission. """ repo = Repository.get( - self.provider.factory.id, provider_id=repo_id, name=repo_name + self.provider.factory.id, provider_id=repo_id, full_name=repo_name ) if not repo: raise RepositoryNotFoundError(repo_id) @@ -162,7 +162,7 @@ def check_repo_access_permissions(self, repo: Repository): return True raise RepositoryAccessError( - user=self.provider.user_id, repo=repo.name, repo_id=repo.provider_id + user=self.provider.user_id, repo=repo.full_name, repo_id=repo.provider_id ) def sync(self, hooks=True, async_hooks=True): @@ -193,8 +193,11 @@ def sync(self, hooks=True, async_hooks=True): for db_repo in db_repos: vcs_repo = vcs_repos.get(db_repo.provider_id) - if vcs_repo and db_repo.name != vcs_repo.full_name: - db_repo.name = vcs_repo.full_name + if not vcs_repo: + continue + + changed = vcs_repo.to_model(db_repo) + if changed: db.session.add(db_repo) # Remove ownership from repositories that the user has no longer @@ -217,7 +220,7 @@ def sync(self, hooks=True, async_hooks=True): provider_id=vcs_repo.id, html_url=vcs_repo.html_url, default_branch=vcs_repo.default_branch, - name=vcs_repo.full_name, + full_name=vcs_repo.full_name, description=vcs_repo.description, license_spdx=vcs_repo.license_spdx, ) @@ -269,7 +272,7 @@ def sync_repo_hook(self, repo_id): provider_id=repo_id, html_url=vcs_repo.html_url, default_branch=vcs_repo.default_branch, - name=vcs_repo.full_name, + full_name=vcs_repo.full_name, description=vcs_repo.description, license_spdx=vcs_repo.license_spdx, ) diff --git a/invenio_vcs/templates/semantic-ui/invenio_vcs/settings/view.html b/invenio_vcs/templates/semantic-ui/invenio_vcs/settings/view.html index 86781f62..ec9bdd61 100644 --- a/invenio_vcs/templates/semantic-ui/invenio_vcs/settings/view.html +++ b/invenio_vcs/templates/semantic-ui/invenio_vcs/settings/view.html @@ -33,7 +33,7 @@

- {{ repo.name }} + {{ repo.full_name }}

@@ -72,7 +72,7 @@

{{ _("Get started!") }}

{{ _("Go to %(name)s and make your first release.", name=vocabulary["name"]) }}

- {{ repo.name }} + {{ repo.full_name }}
{%- endblock enabled_repo_get_started %} @@ -113,7 +113,7 @@

2 {{ _("Create a release") }}

diff --git a/invenio_vcs/views/badge.py b/invenio_vcs/views/badge.py index 7c8e7166..ee9db80e 100644 --- a/invenio_vcs/views/badge.py +++ b/invenio_vcs/views/badge.py @@ -74,7 +74,7 @@ def index(provider, repo_provider_id): def index_old(provider, user_id, repo_name): """Generate a badge for a specific GitHub repository (by name).""" repo = Repository.query.filter( - Repository.name == repo_name, Repository.provider == provider + Repository.full_name == repo_name, Repository.provider == provider ).one_or_none() if not repo: abort(404) diff --git a/invenio_vcs/views/vcs.py b/invenio_vcs/views/vcs.py index 72531d05..16f1a7d4 100644 --- a/invenio_vcs/views/vcs.py +++ b/invenio_vcs/views/vcs.py @@ -125,9 +125,9 @@ def get_repository(provider, repo_id): latest_release = svc.get_repo_latest_release(repo) default_branch = svc.get_repo_default_branch(repo_id) releases = svc.list_repo_releases(repo) - new_release_url = svc.provider.factory.url_for_new_release(repo.name) + new_release_url = svc.provider.factory.url_for_new_release(repo.full_name) new_citation_file_url = svc.provider.factory.url_for_new_file( - repo.name, default_branch or "main", "CITATION.cff" + repo.full_name, default_branch or "main", "CITATION.cff" ) return render_template( diff --git a/tests/test_webhook.py b/tests/test_webhook.py index a04212aa..2fa404b3 100644 --- a/tests/test_webhook.py +++ b/tests/test_webhook.py @@ -40,7 +40,7 @@ def test_webhook_post(app, db, tester_id, remote_token, github_api): hook = 1234 tag = "v1.0" - repo = Repository.get(github_id=repo_id, name=repo_name) + repo = Repository.get(github_id=repo_id, full_name=repo_name) if not repo: repo = Repository.create(tester_id, repo_id, repo_name) @@ -79,7 +79,7 @@ def test_webhook_post_fail(app, tester_id, remote_token, github_api): hook = 1234 # Create a repository - repo = Repository.get(github_id=repo_id, name=repo_name) + repo = Repository.get(github_id=repo_id, full_name=repo_name) if not repo: repo = Repository.create(tester_id, repo_id, repo_name) From d91bb928085bdd9b75bc47a94bf79e2c9e78560f Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 29 Aug 2025 16:22:26 +0100 Subject: [PATCH 13/19] WIP: add documentation comments --- invenio_vcs/generic_models.py | 16 ++++ invenio_vcs/providers.py | 144 +++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 4 deletions(-) diff --git a/invenio_vcs/generic_models.py b/invenio_vcs/generic_models.py index d468a777..f7975a85 100644 --- a/invenio_vcs/generic_models.py +++ b/invenio_vcs/generic_models.py @@ -1,3 +1,19 @@ +# -*- coding: utf-8 -*- +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +""" +Generic dataclass models to represent the bare minimum necessary data +from VCS providers. These are essentially the "lowest common factor" of +the otherwise large, complex, and heterogenous responses returned by APIs. + +These are used by higher-level calls to have a common set of data to +operate on. Provider implementations are responsible for converting API +responses into these generic classes. +""" + from __future__ import annotations from dataclasses import asdict, dataclass diff --git a/invenio_vcs/providers.py b/invenio_vcs/providers.py index 11cc79c6..1cb2c575 100644 --- a/invenio_vcs/providers.py +++ b/invenio_vcs/providers.py @@ -26,6 +26,14 @@ class RepositoryServiceProviderFactory(ABC): + """ + A factory to create user-specific VCS providers. This class is instantiated once per instance, + usually in the `invenio.cfg` file. It contains general settings and methods that are impossible + to generalise and must be specified on a provider-specific level. + + All methods within this class (except the constructor) should be pure functions. + """ + def __init__( self, provider: type["RepositoryServiceProvider"], @@ -52,7 +60,12 @@ def __init__( @property @abstractmethod - def remote_config(self): + def remote_config(self) -> dict[str, Any]: + """ + Returns a dictionary as the config of the OAuth remote app for this provider. + The config of the app is usually based on the config variables provided + in the constructor. + """ raise NotImplementedError @property @@ -66,28 +79,54 @@ def remote(self): @property @abstractmethod def config(self) -> dict: + """ + Returns a configuration dictionary with options that are specific to a gvien provider. + """ raise NotImplementedError @abstractmethod def url_for_tag(self, repository_name, tag_name) -> str: + """ + Generates the URL for the UI page showing the file tree for the latest commit with a + given named tag. This is not the page showing the details of a corresponding release. + """ raise NotImplementedError @abstractmethod def url_for_new_release(self, repository_name) -> str: + """ + Generates the URL for the UI page through which the user can create a new release + for a specific repository. + """ raise NotImplementedError @abstractmethod def url_for_new_file(self, repository_name, branch_name, file_name) -> str: + """ + Generates the URL for the UI pages through which a new file with a specific name + on a specific branch in a specific repository can be created. Usually, + this allows the user to type the file contents directly or upload an existing + file. + """ raise NotImplementedError @abstractmethod def webhook_is_create_release_event(self, event_payload: dict[str, Any]): + """ + Returns whether the raw JSON payload of a webhook event is an event corresponding + to the publication of a webhook. Returning False will end further processing of the + event. + """ raise NotImplementedError @abstractmethod def webhook_event_to_generic( self, event_payload: dict[str, Any] ) -> tuple[GenericRelease, GenericRepository]: + """ + Returns the data of the release and repository as extracted from the raw JSON payload + of a webhook event, in generic form. + """ raise NotImplementedError def for_user(self, user_id: int): @@ -108,6 +147,17 @@ def vocabulary(self): class RepositoryServiceProvider(ABC): + """ + The methods to interact with the API of a VCS provider. This class is user-specific + and is always created from a `RepositoryServiceProviderFactory`. + + While some of the default method implementations (such as `access_token`) make access to + the DB, overrides of the unimplemented methods should avoid doing so to minimise + unexpected behaviour. Interaction should be solely with the API of the VCS provider. + + Providers must currently support all of these operations. + """ + def __init__( self, factory: RepositoryServiceProviderFactory, user_id: int, access_token=None ) -> None: @@ -117,7 +167,10 @@ def __init__( @cached_property def remote_account(self): - """Return remote account.""" + """ + Returns the OAuth Remote Account corresponding to the user's authentication + with the provider + """ return RemoteAccount.get(self.user_id, self.factory.remote.consumer_key) @cached_property @@ -147,7 +200,11 @@ def session_token(self): @cached_property def webhook_url(self): - """Return the url to be used by a GitHub webhook.""" + """ + Returns a formatted version of the webhook receiver URL specified in the provider + factory. The `{token}` variable in this URL string is replaced with the user-specific + webhook token. + """ if not self.remote_account.extra_data.get("tokens", {}).get("webhook"): raise RemoteAccountDataNotSet( self.user_id, _("Webhook data not found for user tokens (remote data).") @@ -179,12 +236,26 @@ def is_valid_webhook(self, url: str | None): @abstractmethod def list_repositories(self) -> dict[str, GenericRepository] | None: + """ + Returns a dictionary of {repository_id: GenericRepository} for the current + user. This should return _all_ repositories for which the user has permission + to create and delete webhooks. + + This means this function could return extremely large dictionaries in some cases, + but it will only be called during irregular sync events and stored in the DB. + """ raise NotImplementedError @abstractmethod def list_repository_webhooks( self, repository_id: str ) -> list[GenericWebhook] | None: + """ + Returns an arbitrarily ordered list of the current webhooks of a repository. + This list should only include active webhooks which generate events for which + the corresponding `RepositoryServiceProviderFactory.webhook_is_create_release_event` + would return True. + """ raise NotImplementedError def get_first_valid_webhook(self, repository_id: str) -> GenericWebhook | None: @@ -198,44 +269,109 @@ def get_first_valid_webhook(self, repository_id: str) -> GenericWebhook | None: @abstractmethod def get_repository(self, repository_id: str) -> GenericRepository | None: + """ + Returns the details of a specific repository by ID, or None if the + repository does not exist or the user has no permission to view it. + """ raise NotImplementedError @abstractmethod def list_repository_contributors( self, repository_id: str, max: int ) -> list[GenericContributor] | None: + """ + Returns the list of entities that have contributed to a given repository. + This list may contain entities that are not currently or never have been + registered users of the VCS provider (e.g. in the case of repos imported + from a remote source). + + Returns None if the repository does not exist or the user has no permission + to view it or its contributors. + """ + raise NotImplementedError + + @abstractmethod + def list_repository_user_ids(self, repository_id: str) -> list[str] | None: + """ + Returns a list of the IDs of valid users registered with the VCS provider + that have sufficient permission to create/delete webhooks on the given + repository. This list should contain all users for which the corresponding + repo would be included in a `list_repositories` call. + + Returns None if the repository does not exist or the user has no permission + to view it or its member users. + """ raise NotImplementedError @abstractmethod def get_repository_owner(self, repository_id: str) -> GenericOwner | None: + """ + Returns the 'owner' of a repository, which is either a user or a group/organization. + Returns None if the repository does not exist or the user does not have permission + to find out its owner. + """ raise NotImplementedError @abstractmethod def create_webhook(self, repository_id: str) -> str | None: + """ + Creates a new webhook for a given repository, trigerred by a "create release" event. + The URL destination is specified by `RepositoryServiceProvider.webhook_url`. + Events must be delivered via an HTTP POST request with a JSON payload. + + Returns the ID of the new webhook as returned by the provider, or None if the + creation failed due to the repository not existing or the user not having permission + to create a webhook. + """ raise NotImplementedError @abstractmethod def delete_webhook(self, repository_id: str, hook_id: str | None = None) -> bool: + """ + Deletes a webhook from the specified repository. + If `hook_id` is specified, the webhook with that ID must be deleted. + Otherwise, all webhooks with URLs for which `is_valid_webhook` would return + True should be deleted. + + Returns True if the deletion was successful, and False if it failed due to + the repository not existing or the user not having permission to delete its + webhooks. + """ raise NotImplementedError @abstractmethod def get_own_user(self) -> GenericUser | None: + """ + Returns information about the user for which this class has been instantiated, + or None if the user does not exist (e.g. if the user ID is incorrectly specified). + """ raise NotImplementedError @abstractmethod def resolve_release_zipball_url(self, release_zipball_url: str) -> str | None: + # TODO: why do we have this raise NotImplementedError @abstractmethod def fetch_release_zipball( self, release_zipball_url: str, timeout: int ) -> Generator[HTTPResponse]: + """ + Returns the HTTP response for downloading the contents of a zipball from a given release. + This is provider-specific functionality as it will require attaching an auth token + to the request for private repos (and even public repos to avoid rate limits sometimes). + """ raise NotImplementedError @abstractmethod def retrieve_remote_file( - self, repository_id: str, tag_name: str, file_name: str + self, repository_id: str, ref_name: str, file_name: str ) -> bytes | None: + """ + Downloads the contents of a specific file in a repo for a given ref (which could be + a tag, a commit ref, a branch name, etc). Returns the raw bytes, or None if the + repo/file does not exist or the user doesn't have permission to view it. + """ raise NotImplementedError @abstractmethod From 9271aa78a16b0e47cffc45b021b652f4b18cf4d4 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 3 Sep 2025 12:12:03 +0200 Subject: [PATCH 14/19] WIP: store all repos in vcs_repositories, add user-repo association table --- ...54318294_switch_to_generic_git_services.py | 25 +++++ invenio_vcs/contrib/github.py | 19 +++- invenio_vcs/contrib/gitlab.py | 9 ++ invenio_vcs/ext.py | 3 +- invenio_vcs/models.py | 39 +++++-- invenio_vcs/service.py | 100 +++++++++++++----- 6 files changed, 161 insertions(+), 34 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index 0c143983..faf61fe7 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -9,6 +9,7 @@ import sqlalchemy as sa from alembic import op +from sqlalchemy_utils import UUIDType # revision identifiers, used by Alembic. revision = "1754318294" @@ -102,11 +103,35 @@ def upgrade(): table_name="vcs_releases", columns=["provider_id", "provider", "tag"], ) + + op.create_table( + "vcs_repository_users", + sa.Column("repository_id", UUIDType(), primary_key=True), + sa.Column("user_id", sa.Integer(), primary_key=True), + sa.ForeignKeyConstraint( + ["repository_id"], + ["vcs_repositories.id"], + name=op.f("fk_vcs_repository_users_repository_id_vcs_repositories"), + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["accounts_user.id"], + name=op.f("fk_vcs_repository_users_user_id_accounts_user"), + ), + ) + op.alter_column("vcs_repositories", "user_id", new_column_name="enabled_by_id") # ### end Alembic commands ### def downgrade(): """Downgrade database.""" + op.alter_column( + "vcs_repositories", + "enabled_by_id", + new_column_name="user_id", + ) + op.drop_table("vcs_repository_users") + op.rename_table("vcs_repositories", "github_repositories") op.alter_column( "github_repositories", diff --git a/invenio_vcs/contrib/github.py b/invenio_vcs/contrib/github.py index 50c4654a..ba45e86a 100644 --- a/invenio_vcs/contrib/github.py +++ b/invenio_vcs/contrib/github.py @@ -66,7 +66,9 @@ def __init__( @property def remote_config(self): request_token_params = { - "scope": "read:user,user:email,admin:repo_hook,read:org" + # General `repo` scope is required for reading collaborators + # https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/scopes-for-oauth-apps + "scope": "read:user,user:email,admin:repo_hook,read:org,repo" } helper = GitHubOAuthSettingsHelper( @@ -206,6 +208,21 @@ def list_repository_webhooks(self, repository_id): ) return hooks + def list_repository_user_ids(self, repository_id: str): + assert repository_id.isdigit() + repo = self._gh.repository_with_id(int(repository_id)) + if repo is None: + return None + + user_ids: list[str] = [] + for collaborator in repo.collaborators(): + if not collaborator.permissions["admin"]: + continue + + user_ids.append(str(collaborator.id)) + + return user_ids + def get_repository(self, repository_id): assert repository_id.isdigit() diff --git a/invenio_vcs/contrib/gitlab.py b/invenio_vcs/contrib/gitlab.py index 4817c6bf..9eb278c4 100644 --- a/invenio_vcs/contrib/gitlab.py +++ b/invenio_vcs/contrib/gitlab.py @@ -320,6 +320,15 @@ def list_repository_webhooks( ) return hooks + def list_repository_user_ids(self, repository_id: str) -> list[str] | None: + # https://docs.gitlab.com/api/members/#list-all-members-of-a-group-or-project-including-inherited-and-invited-members + user_ids: list[str] = [] + for member in self._gl.projects.get(repository_id, lazy=True).members_all.list( + iterator=True + ): + user_ids.append(str(member.id)) + return user_ids + @_gl_response_error_handler def create_webhook(self, repository_id: str) -> str | None: assert repository_id.isdigit() diff --git a/invenio_vcs/ext.py b/invenio_vcs/ext.py index 53e6f0d7..1a9d4b8d 100644 --- a/invenio_vcs/ext.py +++ b/invenio_vcs/ext.py @@ -129,4 +129,5 @@ def init_webhooks(app): for provider in get_provider_list(app): # Procedurally register the webhook receivers instead of including them as an entry point, since # they are defined in the VCS provider config list rather than in the instance's setup.cfg file. - state.register(provider.id, VCSReceiver) + if provider.id not in state.receivers: + state.register(provider.id, VCSReceiver) diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index 562a6602..d1869855 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -26,13 +26,15 @@ import uuid from enum import Enum +from typing import List from invenio_accounts.models import User from invenio_db import db from invenio_i18n import lazy_gettext as _ from invenio_webhooks.models import Event -from sqlalchemy import Index, UniqueConstraint +from sqlalchemy import UniqueConstraint from sqlalchemy.dialects import postgresql +from sqlalchemy.orm import Mapped from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType @@ -108,6 +110,21 @@ def color(self): return RELEASE_STATUS_COLOR[self.name] +repository_user_association = db.Table( + "vcs_repository_users", + db.Model.metadata, + db.Column( + "repository_id", + UUIDType, + db.ForeignKey("vcs_repositories.id"), + primary_key=True, + ), + db.Column( + "user_id", db.Integer, db.ForeignKey("accounts_user.id"), primary_key=True + ), +) + + class Repository(db.Model, Timestamp): """Information about a GitHub repository.""" @@ -166,21 +183,20 @@ class Repository(db.Model, Timestamp): full_name = db.Column("name", db.String(255), nullable=False) """Fully qualified name of the repository including user/organization.""" - user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) - """Reference user that can manage this repository.""" - hook = db.Column(db.String(255), nullable=True) """Hook identifier.""" + enabled_by_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) + # # Relationships # - user = db.relationship(User) + users = db.relationship(User, secondary=repository_user_association) + enabled_by_user = db.relationship(User, foreign_keys=[enabled_by_id]) @classmethod def create( cls, - user_id, provider, provider_id, html_url, @@ -192,7 +208,6 @@ def create( ): """Create the repository.""" obj = cls( - user_id=user_id, provider=provider, provider_id=provider_id, full_name=full_name, @@ -205,6 +220,16 @@ def create( db.session.add(obj) return obj + def add_user(self, user_id: int): + user = User(id=user_id) + user = db.session.merge(user) + self.users.append(user) + + def delete_user(self, user_id: int): + user = User(id=user_id) + user = db.session.merge(user) + self.users.remove(user) + @classmethod def get(cls, provider, provider_id=None, full_name=None): """Return a repository given its name or github id. diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index 025fd799..190fc1c6 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -6,9 +6,13 @@ from flask import current_app from invenio_access.permissions import authenticated_user from invenio_access.utils import get_identity +from invenio_accounts.models import User, UserIdentity from invenio_db import db from invenio_i18n import gettext as _ from invenio_oauth2server.models import Token as ProviderToken +from invenio_oauthclient import oauth_link_external_id +from invenio_oauthclient.models import RemoteAccount +from sqlalchemy import delete, select from sqlalchemy.exc import NoResultFound from werkzeug.utils import cached_property @@ -22,7 +26,12 @@ UserInfoNoneError, ) from invenio_vcs.generic_models import GenericRelease, GenericRepository -from invenio_vcs.models import Release, ReleaseStatus, Repository +from invenio_vcs.models import ( + Release, + ReleaseStatus, + Repository, + repository_user_association, +) from invenio_vcs.proxies import current_vcs from invenio_vcs.tasks import sync_hooks as sync_hooks_task from invenio_vcs.utils import iso_utcnow @@ -54,16 +63,16 @@ def is_authenticated(self): @property def user_available_repositories(self): """Retrieve user repositories from user's remote data.""" - return Repository.query.filter( - Repository.user_id == self.provider.user_id, + return Repository.query.join(repository_user_association).filter( + repository_user_association.c.user_id == self.provider.user_id, Repository.provider == self.provider.factory.id, ) @property def user_enabled_repositories(self): """Retrieve user repositories from the model.""" - return Repository.query.filter( - Repository.user_id == self.provider.user_id, + return Repository.query.join(repository_user_association).filter( + repository_user_association.c.user_id == self.provider.user_id, Repository.provider == self.provider.factory.id, Repository.hook != None, ) @@ -149,9 +158,11 @@ def check_repo_access_permissions(self, repo: Repository): - user is the owner of the repo - user has access to the repo in GitHub (stored in RemoteAccount.extra_data.repos) """ - if self.provider.user_id and repo and repo.user_id: - user_is_owner = repo.user_id == int(self.provider.user_id) - if user_is_owner: + if self.provider.user_id and repo: + user_is_collaborator = any( + user.id == self.provider.user_id for user in repo.users + ) + if user_is_collaborator: return True if self.provider.remote_account and self.provider.remote_account.extra_data: @@ -165,6 +176,32 @@ def check_repo_access_permissions(self, repo: Repository): user=self.provider.user_id, repo=repo.full_name, repo_id=repo.provider_id ) + def sync_repo_users(self, db_repo: Repository): + vcs_users = self.provider.list_repository_user_ids(db_repo.provider_id) + if vcs_users is None: + return + + is_changed = False + for extern_user_id in vcs_users: + user_identity = UserIdentity.query.filter_by( + method=self.provider.factory.id, + id=extern_user_id, + ).first() + + if user_identity is None: + continue + + if not db.session.scalar( + select(repository_user_association) + .filter_by(user_id=user_identity.id_user, repository_id=db_repo.id) + .limit(1) + ): + db_repo.add_user(user_identity.id_user) + is_changed = True + + return is_changed + # TODO: delete access for users who are no longer in vcs_users + def sync(self, hooks=True, async_hooks=True): """Synchronize user repositories. @@ -186,27 +223,34 @@ def sync(self, hooks=True, async_hooks=True): self._sync_hooks(vcs_repos.keys(), asynchronous=async_hooks) # Update changed names for repositories stored in DB - db_repos = Repository.query.filter( - Repository.user_id == self.provider.user_id, - Repository.provider == self.provider.factory.id, - ).all() + db_repos = ( + Repository.query.join(repository_user_association) + .filter( + repository_user_association.c.user_id == self.provider.user_id, + Repository.provider == self.provider.factory.id, + ) + .all() + ) for db_repo in db_repos: vcs_repo = vcs_repos.get(db_repo.provider_id) if not vcs_repo: continue - changed = vcs_repo.to_model(db_repo) - if changed: + changed_users = self.sync_repo_users(db_repo) + changed_model = vcs_repo.to_model(db_repo) + if changed_users or changed_model: db.session.add(db_repo) # Remove ownership from repositories that the user has no longer # 'admin' permissions, or have been deleted. - Repository.query.filter( - Repository.user_id == self.provider.user_id, + delete_stmt = delete(repository_user_association).where( + repository_user_association.c.user_id == self.provider.user_id, Repository.provider == self.provider.factory.id, ~Repository.provider_id.in_(vcs_repos.keys()), - ).update({"user_id": None, "hook": None}, synchronize_session=False) + repository_user_association.c.repository_id == Repository.id, + ) + db.session.execute(delete_stmt) # Add new repos from VCS to the DB (without the hook activated) for _, vcs_repo in vcs_repos.items(): @@ -214,8 +258,7 @@ def sync(self, hooks=True, async_hooks=True): # We have already added this to our DB continue - Repository.create( - user_id=self.provider.user_id, + new_db_repo = Repository.create( provider=self.provider.factory.id, provider_id=vcs_repo.id, html_url=vcs_repo.html_url, @@ -224,6 +267,7 @@ def sync(self, hooks=True, async_hooks=True): description=vcs_repo.description, license_spdx=vcs_repo.license_spdx, ) + self.sync_repo_users(new_db_repo) # Update last sync self.provider.remote_account.extra_data.update( @@ -267,7 +311,6 @@ def sync_repo_hook(self, repo_id): if hook: if not db_repo: db_repo = Repository.create( - user_id=self.provider.user_id, provider=self.provider.factory.id, provider_id=repo_id, html_url=vcs_repo.html_url, @@ -276,20 +319,22 @@ def sync_repo_hook(self, repo_id): description=vcs_repo.description, license_spdx=vcs_repo.license_spdx, ) + self.sync_repo_users(db_repo) if not db_repo.enabled: self.mark_repo_enabled(db_repo, hook.id) else: if db_repo: self.mark_repo_disabled(db_repo) - def mark_repo_disabled(self, repo): + def mark_repo_disabled(self, db_repo: Repository): """Disables an user repository.""" - repo.hook = None + db_repo.hook = None + db_repo.enabled_by_id = None - def mark_repo_enabled(self, repo, hook): + def mark_repo_enabled(self, db_repo: Repository, hook_id: str): """Enables an user repository.""" - repo.hook = hook - repo.user_id = self.provider.user_id + db_repo.hook = hook_id + db_repo.enabled_by_id = self.provider.user_id def init_account(self): """Setup a new GitHub account.""" @@ -320,6 +365,11 @@ def init_account(self): last_sync=iso_utcnow(), ) + oauth_link_external_id( + User(id=self.provider.user_id), + dict(id=user.id, method=self.provider.factory.id), + ) + db.session.add(self.provider.remote_account) def enable_repository(self, repository_id): From 8dfe75f4e3e6fafbfd70842c66af8455970236db Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 5 Sep 2025 13:19:10 +0200 Subject: [PATCH 15/19] WIP: fix UI menu bug * Fix a global variable capture issue making the menu fail to differentiate between provider items --- invenio_vcs/ext.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/invenio_vcs/ext.py b/invenio_vcs/ext.py index 1a9d4b8d..24cb4124 100644 --- a/invenio_vcs/ext.py +++ b/invenio_vcs/ext.py @@ -100,17 +100,16 @@ def finalize_app_api(app): def init_menu(app): """Init menu.""" for provider in get_provider_list(app): - id = provider.id - def is_active(): + def is_active(current_node): return ( request.endpoint.startswith("invenio_vcs.") - and request.view_args.get("provider", "") == id + and request.view_args.get("provider", "") == current_node.name ) - current_menu.submenu(f"settings.vcs_{id}").register( + current_menu.submenu(f"settings.{provider.id}").register( endpoint="invenio_vcs.get_repositories", - endpoint_arguments_constructor=lambda: {"provider": id}, + endpoint_arguments_constructor=lambda id=provider.id: {"provider": id}, text=_( "%(icon)s %(provider)s", icon=LazyString( From 77d40575bb6873d469874e31bb722b1cfd08873c Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 17 Sep 2025 12:13:02 +0200 Subject: [PATCH 16/19] WIP: full repo-user sync and dedupe support --- invenio_vcs/models.py | 2 +- invenio_vcs/service.py | 73 +++++++++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index d1869855..7731de29 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -225,7 +225,7 @@ def add_user(self, user_id: int): user = db.session.merge(user) self.users.append(user) - def delete_user(self, user_id: int): + def remove_user(self, user_id: int): user = User(id=user_id) user = db.session.merge(user) self.users.remove(user) diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index 190fc1c6..dd9d3316 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -177,12 +177,24 @@ def check_repo_access_permissions(self, repo: Repository): ) def sync_repo_users(self, db_repo: Repository): - vcs_users = self.provider.list_repository_user_ids(db_repo.provider_id) - if vcs_users is None: + """ + Synchronises the member users of the repository. + This retrieves a list of the IDs of users from the VCS who have sufficient access to the + repository (i.e. being able to access all details and create/manage webhooks). + The user IDs are compared locally to find Invenio users who have connected their VCS account. + This is then propagated to the database: Invenio users who have access to the repo are added to + the `repository_user_association` table, and ones who no longer have access are removed. + + :return: boolean of whether any changed were made to the DB + """ + + vcs_user_ids = self.provider.list_repository_user_ids(db_repo.provider_id) + if vcs_user_ids is None: return - is_changed = False - for extern_user_id in vcs_users: + vcs_user_identities: list[UserIdentity] = [] + # Find local users who have connected their VCS accounts with the IDs from the repo members + for extern_user_id in vcs_user_ids: user_identity = UserIdentity.query.filter_by( method=self.provider.factory.id, id=extern_user_id, @@ -191,16 +203,23 @@ def sync_repo_users(self, db_repo: Repository): if user_identity is None: continue - if not db.session.scalar( - select(repository_user_association) - .filter_by(user_id=user_identity.id_user, repository_id=db_repo.id) - .limit(1) - ): + vcs_user_identities.append(user_identity) + + is_changed = False + + # Create user associations that exist in the VCS but not in the DB + for user_identity in vcs_user_identities: + if not any(db_user.id == user_identity.id_user for db_user in db_repo.users): db_repo.add_user(user_identity.id_user) is_changed = True + # Remove user associations that exist in the DB but not in the VCS + for db_user in db_repo.users: + if not any(user_identity.id_user == db_user.id for user_identity in vcs_user_identities): + db_repo.remove_user(db_user.id) + is_changed = True + return is_changed - # TODO: delete access for users who are no longer in vcs_users def sync(self, hooks=True, async_hooks=True): """Synchronize user repositories. @@ -254,20 +273,28 @@ def sync(self, hooks=True, async_hooks=True): # Add new repos from VCS to the DB (without the hook activated) for _, vcs_repo in vcs_repos.items(): - if any(r.provider_id == vcs_repo.id for r in db_repos): - # We have already added this to our DB - continue + # We cannot just check the repo from the existing `db_repos` list as this only includes the repos to which the user + # already has access. E.g. a repo from the VCS might already exist in our DB but the user doesn't yet have access to it. + corresponding_db_repo = Repository.query.filter( + Repository.provider_id == vcs_repo.id, + Repository.provider == self.provider.factory.id, + ).first() - new_db_repo = Repository.create( - provider=self.provider.factory.id, - provider_id=vcs_repo.id, - html_url=vcs_repo.html_url, - default_branch=vcs_repo.default_branch, - full_name=vcs_repo.full_name, - description=vcs_repo.description, - license_spdx=vcs_repo.license_spdx, - ) - self.sync_repo_users(new_db_repo) + if corresponding_db_repo is None: + # We do not yet have this repo registered for any user at all in our DB, so we need to create it. + corresponding_db_repo = Repository.create( + provider=self.provider.factory.id, + provider_id=vcs_repo.id, + html_url=vcs_repo.html_url, + default_branch=vcs_repo.default_branch, + full_name=vcs_repo.full_name, + description=vcs_repo.description, + license_spdx=vcs_repo.license_spdx, + ) + + # In any case (even if we already have the repo) we need to sync its member users + # E.g. maybe the repo is in our DB but the user for which this sync has been trigerred isn't registered as a member + self.sync_repo_users(corresponding_db_repo) # Update last sync self.provider.remote_account.extra_data.update( From 8ae328ac112ae04818f459127a85a2e13e71f732 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 18 Sep 2025 15:43:16 +0200 Subject: [PATCH 17/19] WIP: migrate existing repo data during alembic migration --- ...54318294_switch_to_generic_git_services.py | 141 +++++++++++++++++- 1 file changed, 134 insertions(+), 7 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index faf61fe7..ad87ea79 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -1,15 +1,19 @@ # # This file is part of Invenio. -# Copyright (C) 2016-2018 CERN. +# Copyright (C) 2025 CERN. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """Switch to generic git services""" +import uuid +from datetime import datetime, timezone + import sqlalchemy as sa from alembic import op -from sqlalchemy_utils import UUIDType +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy_utils import JSONType, UUIDType # revision identifiers, used by Alembic. revision = "1754318294" @@ -43,21 +47,27 @@ def upgrade(): ) op.add_column( "vcs_repositories", - sa.Column("provider", sa.String(255), nullable=False), + # We use the provider name "github" by default as this is what we're already using across the codebase + sa.Column("provider", sa.String(255), nullable=False, server_default="github"), ) op.add_column( "vcs_repositories", - sa.Column("default_branch", sa.String(255), nullable=False, default="master"), + sa.Column( + "default_branch", sa.String(255), nullable=False, server_default="master" + ), ) op.add_column( "vcs_repositories", sa.Column("description", sa.String(10000), nullable=True) ) op.add_column( - "vcs_repositories", sa.Column("html_url", sa.String(10000), nullable=False) + # Nullable for now (see below) + "vcs_repositories", + sa.Column("html_url", sa.String(10000), nullable=True), ) op.add_column( "vcs_repositories", sa.Column("license_spdx", sa.String(255), nullable=True) ) + op.alter_column("vcs_repositories", "user_id", new_column_name="enabled_by_id") op.drop_index("ix_github_repositories_name") op.drop_index("ix_github_repositories_github_id") @@ -75,6 +85,116 @@ def upgrade(): columns=["provider", "name"], ) + # Migrate data from the OAuth remote `extra_data` field to the repositories table + # where we will now store everything directly. + # + # We need to recreate the SQLAlchemy models for `RemoteAccount` and `Repository` here but + # in a much more lightweight way. We cannot simply import the models because (a) they depend + # on the full Invenio app being initialised and all extensions available and (b) we need + # to work with the models as they stand precisely at this point in the migration chain + # rather than the model file itself which may be at a later commit. + # + # We only include here the columns, constraints, and relations that we actually need to + # perform the migration, therefore keeping these models as lightweight as possible. + remote_account_table = sa.table( + "oauthclient_remoteaccount", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("user_id", sa.Integer, sa.ForeignKey("account_user.id")), + sa.Column("client_id", sa.String(255)), + sa.Column("extra_data", MutableDict.as_mutable(JSONType)), + ) + vcs_repositories_table = sa.table( + "vcs_repositories", + sa.Column("id", UUIDType, primary_key=True), + sa.Column("provider_id", sa.String(255), nullable=True), + sa.Column("provider", sa.String(255), nullable=True), + sa.Column("description", sa.String(10000), nullable=True), + sa.Column("html_url", sa.String(10000), nullable=False), + sa.Column("license_spdx", sa.String(255), nullable=True), + sa.Column("default_branch", sa.String(255), nullable=False), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("hook", sa.String(255), nullable=True), + sa.Column( + "enabled_by_id", sa.Integer, sa.ForeignKey("account_user.id"), nullable=True + ), + sa.Column("created", sa.DateTime, nullable=False), + sa.Column("updated", sa.DateTime, nullable=False), + ) + + # This is the recommended way to run SQLAlchemy operations in a migration, see https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.execute + session = op.get_bind() + + # We don't know the client ID as this is a config variable. + # So to find the RemoteAccounts that correspond to GitHub, we need to check for the existence + # of the `repos` key in the `extra_data` JSON. We cannot make this very efficient sadly, because + # (a) in Postgres we are using JSON not JSONB so there is no efficient JSON querying and (b) the + # instance might be using MySQL/SQLite where we store it as `TEXT`. + + remote_accounts = session.execute(sa.select(remote_account_table)) + for remote_account in remote_accounts.mappings(): + if "repos" not in remote_account["extra_data"]: + continue + + repos = remote_account["extra_data"]["repos"] + + for id, github_repo in repos.items(): + # `id` (the dict key) is a string because JSON keys must be strings + + matching_db_repo_id = session.scalar( + sa.select(vcs_repositories_table).filter_by(provider_id=id) + ) + + if matching_db_repo_id is None: + # We are now storing _all_ repositories (even non-enabled ones) in the DB. + # The repo-user association will be created on the first sync after this migration, we need to download + # the list of users with access to the repo from the GitHub API. + session.execute( + vcs_repositories_table.insert().values( + id=uuid.uuid4(), + provider_id=id, + provider="github", + description=github_repo["description"], + name=github_repo["full_name"], + default_branch=github_repo["default_branch"], + # So far we have only supported github.com so we can safely assume the URL + html_url=f'https://github.com/{github_repo["full_name"]}', + # We have never stored this, it is queried at runtime right now. When the first + # sync happens after this migration, we will download all the license IDs from the VCS. + license_spdx=None, + # This repo wasn't enabled + hook=None, + enabled_by_id=None, + created=datetime.now(tz=timezone.utc), + updated=datetime.now(tz=timezone.utc), + ) + ) + else: + session.execute( + vcs_repositories_table.update() + .filter_by(id=matching_db_repo_id) + .values( + description=github_repo["description"], + name=github_repo["full_name"], + default_branch=github_repo["default_branch"], + html_url=f'https://github.com/{github_repo["full_name"]}', + updated=datetime.now(tz=timezone.utc), + ) + ) + + # Remove `repos` from the existing `extra_data`, leaving only the last sync timestamp + session.execute( + remote_account_table.update() + .filter_by(id=remote_account["id"]) + .values(extra_data={"last_sync": remote_account["extra_data"]["last_sync"]}) + ) + + # We initially set this to nullable=True so we can create the column without an error + # (it would be null for existing records) but after the SQLAlchemy operations above we + # have populated it so we can mark it non-nullable. + op.alter_column( + "vcs_repositories", "html_url", nullable=False, existing_nullable=True + ) + op.rename_table("github_releases", "vcs_releases") op.alter_column( "vcs_releases", @@ -85,7 +205,10 @@ def upgrade(): existing_type=sa.Integer(), existing_nullable=True, ) - op.add_column("vcs_releases", sa.Column("provider", sa.String(255), nullable=False)) + op.add_column( + "vcs_releases", + sa.Column("provider", sa.String(255), nullable=False, server_default="github"), + ) op.drop_constraint( op.f("uq_github_releases_release_id"), table_name="vcs_releases", type_="unique" @@ -119,12 +242,16 @@ def upgrade(): name=op.f("fk_vcs_repository_users_user_id_accounts_user"), ), ) - op.alter_column("vcs_repositories", "user_id", new_column_name="enabled_by_id") # ### end Alembic commands ### def downgrade(): """Downgrade database.""" + + # Currently, the downgrade can only be peformed **without data**. The tables are transformed but + # data will not be successfully migrated. The upgrade migration has a large amount of custom logic + # for migrating the data into the new format, and this is not replicated/reversed for downgrading. + op.alter_column( "vcs_repositories", "enabled_by_id", From 3bdb77df60fcd40228a154e2e00a06cc0947e03c Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 24 Sep 2025 14:20:33 +0200 Subject: [PATCH 18/19] WIP: user that enabled a repo is the owner of records --- .../1754318294_switch_to_generic_git_services.py | 14 ++++++++++++++ invenio_vcs/models.py | 2 -- invenio_vcs/service.py | 13 +++++++++---- invenio_vcs/tasks.py | 4 +++- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index ad87ea79..9a58c6dc 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -209,6 +209,13 @@ def upgrade(): "vcs_releases", sa.Column("provider", sa.String(255), nullable=False, server_default="github"), ) + if op.get_context().dialect.name == "postgresql": + op.alter_column( + "vcs_releases", + "errors", + type_=sa.dialects.postgresql.JSONB, + postgresql_using="errors::text::jsonb", + ) op.drop_constraint( op.f("uq_github_releases_release_id"), table_name="vcs_releases", type_="unique" @@ -309,6 +316,13 @@ def downgrade(): postgresql_using="provider_id::integer", ) op.drop_column("github_releases", "provider") + if op.get_context().dialect.name == "postgresql": + op.alter_column( + "github_releases", + "errors", + type_=sa.dialects.postgresql.JSON, + postgresql_using="errors::text::json", + ) op.create_unique_constraint( op.f("uq_github_releases_release_id"), table_name="github_releases", diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index 7731de29..c45f1c89 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -26,7 +26,6 @@ import uuid from enum import Enum -from typing import List from invenio_accounts.models import User from invenio_db import db @@ -34,7 +33,6 @@ from invenio_webhooks.models import Event from sqlalchemy import UniqueConstraint from sqlalchemy.dialects import postgresql -from sqlalchemy.orm import Mapped from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType diff --git a/invenio_vcs/service.py b/invenio_vcs/service.py index dd9d3316..aaabe7cc 100644 --- a/invenio_vcs/service.py +++ b/invenio_vcs/service.py @@ -209,13 +209,18 @@ def sync_repo_users(self, db_repo: Repository): # Create user associations that exist in the VCS but not in the DB for user_identity in vcs_user_identities: - if not any(db_user.id == user_identity.id_user for db_user in db_repo.users): + if not any( + db_user.id == user_identity.id_user for db_user in db_repo.users + ): db_repo.add_user(user_identity.id_user) is_changed = True # Remove user associations that exist in the DB but not in the VCS for db_user in db_repo.users: - if not any(user_identity.id_user == db_user.id for user_identity in vcs_user_identities): + if not any( + user_identity.id_user == db_user.id + for user_identity in vcs_user_identities + ): db_repo.remove_user(db_user.id) is_changed = True @@ -500,9 +505,9 @@ def release_zipball_url(self): @cached_property def user_identity(self): """Generates release owner's user identity.""" - identity = get_identity(self.db_repo.user) + identity = get_identity(self.db_repo.enabled_by_user) identity.provides.add(authenticated_user) - identity.user = self.db_repo.user + identity.user = self.db_repo.enabled_by_user return identity @cached_property diff --git a/invenio_vcs/tasks.py b/invenio_vcs/tasks.py index c521df19..cb8fd34f 100644 --- a/invenio_vcs/tasks.py +++ b/invenio_vcs/tasks.py @@ -128,7 +128,9 @@ def process_release(provider, release_id): Release.status.in_([ReleaseStatus.RECEIVED, ReleaseStatus.FAILED]), ).one() - provider = get_provider_by_id(provider).for_user(release_model.repository.user_id) + provider = get_provider_by_id(provider).for_user( + release_model.repository.enabled_by_id + ) release = current_vcs.release_api_class(release_model, provider) matched_error_cls = None From 53c9201f0f1bf679b1e0c7b62ca03bdd68780545 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 24 Sep 2025 17:06:55 +0200 Subject: [PATCH 19/19] WIP: start upgrade docs --- CHANGES.rst | 15 ++ RELEASE-NOTES.rst | 36 ----- docs/Makefile | 8 +- docs/api.rst | 22 +-- docs/authors.rst | 23 +--- docs/changes.rst | 23 +--- docs/conf.py | 28 ++-- docs/contributing.rst | 23 +--- docs/index.rst | 25 +--- docs/installation.rst | 23 +--- docs/license.rst | 27 ++-- docs/make.bat | 4 +- docs/upgrading.rst | 129 ++++++++++++++++++ docs/usage.rst | 25 +--- ...54318294_switch_to_generic_git_services.py | 3 + 15 files changed, 202 insertions(+), 212 deletions(-) delete mode 100644 RELEASE-NOTES.rst create mode 100644 docs/upgrading.rst diff --git a/CHANGES.rst b/CHANGES.rst index 18c8819e..79e16784 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -26,6 +26,21 @@ Changes ======= +Version v4.0.0 (release 2025-XX-XX) + +- global!: renamed package to `invenio-vcs` to reflect the extended functionality. +- models!: renamed tables to `vcs_` instead of `github_`, and added columns to identify the provider (e.g. `github`) + - Created an Alembic migration to automate this upgrade. Please note that on large instances (i.e. more than ~50k repos) this migration is likely to lead to severe stability issues and a full lock on a number of tables for several minutes. Instead of the automated migration, please refer to the upgrade guide in this module's documentation. +- models!: moved the user's full list of repositories from the `extra_data` column of `oauthclient_remoteaccount` to `vcs_repositories`, even for non-activated repos. Activated repos are instead signified by non-null `hook` and `enabled_by_id` values. User access to repos is stored in the new `vcs_repository_users` table with the access rights propagated during the sync. + - The Alembic migration also moves the JSON data into the table automatically, but this is a slow operation that scales linearly with the size of the `oauthclient_remoteaccount`. If it is too slow, please refer to the upgrade guide for a manual faster method. +- refactor!: support for multiple VCS providers through a refactored module architecture. Contrib providers can now be created by implementing abstract classes `RepositoryServiceProviderFactory` and `RepositoryServiceProvider`. Other than the contrib provider implementations, this module is now fully provider-agnostic. + - New provider implementations may be created either in this module or anywhere else in the codebase by implementing the relevant abstract classes. +- feat: support for GitHub and GitLab via provider implementations. +- config!: moved provider-specific config options into the constructor arguments of the relevant class. Providers can now be declared as a list of class instances. + - Please see the documentation for details on the new configuration format. + +BREAKING CHANGE: various major updates will require changes to the database tables and the config. Please see the upgrade guide for more details. + Version v3.0.1 (released 2025-07-30) - api: fix set alternate zipball URL when tag and branch having same name diff --git a/RELEASE-NOTES.rst b/RELEASE-NOTES.rst deleted file mode 100644 index 614423b9..00000000 --- a/RELEASE-NOTES.rst +++ /dev/null @@ -1,36 +0,0 @@ -========================== - Invenio-GitHub v1.0.0a28 -========================== - -Invenio-GitHub v1.0.0a28 was released on October 24, 2022. - -About ------ - -Invenio module that adds GitHub integration to the platform. - -*This is an experimental developer preview release.* - -What's new ----------- - -- Initial public release. - -Installation ------------- - - $ pip install invenio-github==v1.0.0a28 - -Documentation -------------- - - http://invenio-github.readthedocs.io/ - -Happy hacking and thanks for flying Invenio-GitHub. - -| Invenio Development Team -| Email: info@inveniosoftware.org -| IRC: #invenio on irc.freenode.net -| Twitter: http://twitter.com/inveniosoftware -| GitHub: https://github.com/inveniosoftware/invenio-github -| URL: http://inveniosoftware.org diff --git a/docs/Makefile b/docs/Makefile index 7bec3421..3e435a89 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -87,9 +87,9 @@ qthelp: @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Invenio-GitHub.qhcp" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Invenio-VCS.qhcp" @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Invenio-GitHub.qhc" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Invenio-VCS.qhc" applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @@ -104,8 +104,8 @@ devhelp: @echo @echo "Build finished." @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/Invenio-GitHub" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Invenio-GitHub" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Invenio-VCS" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Invenio-VCS" @echo "# devhelp" epub: diff --git a/docs/api.rst b/docs/api.rst index 8bdd0d1c..8b48daaf 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,25 +1,9 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. + Copyright (C) 2025 CERN. - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. API Docs diff --git a/docs/authors.rst b/docs/authors.rst index 2fe04ec2..87232286 100644 --- a/docs/authors.rst +++ b/docs/authors.rst @@ -1,25 +1,8 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../AUTHORS.rst diff --git a/docs/changes.rst b/docs/changes.rst index ff5c15b1..e6c39d2c 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,25 +1,8 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../CHANGES.rst diff --git a/docs/conf.py b/docs/conf.py index 3e73247e..4ab11b51 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,8 +61,8 @@ master_doc = "index" # General information about the project. -project = "Invenio-GitHub" -copyright = "2016, CERN" +project = "Invenio-VCS" +copyright = "2025, CERN" author = "CERN" # The version info for the project you're documenting, acts as replacement for @@ -123,15 +123,15 @@ html_theme = "alabaster" html_theme_options = { - "description": "Invenio module that adds GitHub integration to the platform.", + "description": "Invenio module that adds VCS integration to the platform.", "github_user": "inveniosoftware", - "github_repo": "invenio-github", + "github_repo": "invenio-vcs", "github_button": False, "github_banner": True, "show_powered_by": False, "extra_nav_links": { - "invenio-github@GitHub": "https://github.com/inveniosoftware/invenio-github", - "invenio-github@PyPI": "https://pypi.python.org/pypi/invenio-github/", + "invenio-vcs@GitHub": "https://github.com/inveniosoftware/invenio-github", + "invenio-vcs@PyPI": "https://pypi.python.org/pypi/invenio-github/", }, } @@ -236,7 +236,7 @@ # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = "invenio-github_namedoc" +htmlhelp_basename = "invenio-vcs_namedoc" # -- Options for LaTeX output --------------------------------------------- @@ -257,8 +257,8 @@ latex_documents = [ ( master_doc, - "invenio-github.tex", - "invenio-github Documentation", + "invenio-vcs.tex", + "invenio-vcs Documentation", "CERN", "manual", ), @@ -289,9 +289,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, "invenio-github", "invenio-github Documentation", [author], 1) -] +man_pages = [(master_doc, "invenio-vcs", "invenio-vcs Documentation", [author], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -305,10 +303,10 @@ texinfo_documents = [ ( master_doc, - "invenio-github", - "Invenio-GitHub Documentation", + "invenio-vcs", + "Invenio-VCS Documentation", author, - "invenio-github", + "invenio-vcs", "Invenio module that adds GitHub integration to the platform.", "Miscellaneous", ), diff --git a/docs/contributing.rst b/docs/contributing.rst index a28a49d0..8ad3dcf9 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -1,25 +1,8 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../CONTRIBUTING.rst diff --git a/docs/index.rst b/docs/index.rst index 2cdfd71e..b1938f2a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,25 +1,9 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. + Copyright (C) 2025 CERN. - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../README.rst @@ -28,13 +12,14 @@ User's Guide ------------ This part of the documentation will show you how to get started in using -Invenio-GitHub. +Invenio-VCS. .. toctree:: :maxdepth: 2 installation usage + upgrading API Reference diff --git a/docs/installation.rst b/docs/installation.rst index cc3059e3..dc0fd0bd 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -1,25 +1,8 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. .. include:: ../INSTALL.rst diff --git a/docs/license.rst b/docs/license.rst index 2fbc8ee6..9c7323a6 100644 --- a/docs/license.rst +++ b/docs/license.rst @@ -1,19 +1,16 @@ +.. + This file is part of Invenio. + Copyright (C) 2025 CERN. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + License ======= -Invenio is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. - -Invenio is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. -You should have received a copy of the GNU General Public License -along with Invenio; if not, write to the Free Software Foundation, Inc., -59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +.. include:: ../LICENSE -In applying this license, CERN does not waive the privileges and immunities -granted to it by virtue of its status as an Intergovernmental Organization or -submit itself to any jurisdiction. +.. note:: + In applying this license, CERN does not waive the privileges and immunities + granted to it by virtue of its status as an Intergovernmental Organization or + submit itself to any jurisdiction. diff --git a/docs/make.bat b/docs/make.bat index 2cd0325a..5fed4a51 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -127,9 +127,9 @@ if "%1" == "qthelp" ( echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Invenio-GitHub.qhcp + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Invenio-VCS.qhcp echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Invenio-GitHub.ghc + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Invenio-VCS.ghc goto end ) diff --git a/docs/upgrading.rst b/docs/upgrading.rst new file mode 100644 index 00000000..7882faac --- /dev/null +++ b/docs/upgrading.rst @@ -0,0 +1,129 @@ +.. + This file is part of Invenio. + Copyright (C) 2025 CERN. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + +Upgrading +========= + +====== +v4.0.0 +====== + +This version consists of a major refactor of the module and a full rename to ``invenio-vcs`` (from ``invenio-github``). +The new version has now been made generic and can support any VCS provider by implementing the relevant abstract classes. + +Contrib implementations are provided for GitHub and GitLab. +GitHub is supported with the exact same set of features as before, meaning this module can continue to be used for the original +purpose of ``invenio-github`` with just some migrations and configuration changes required. + +Please follow this guide if: + +- you are **not** using InvenioRDM; or +- you would like to try out ``invenio-vcs`` before InvenioRDM v14 is released. + + - This is not officially supported but should work for the most part. + +RDM-specific instructions can instead be found in the `InvenioRDM upgrade guide `_. + +-------------------------- +1. Update the dependencies +-------------------------- + +In your ``Pipfile`` (or any similar file you are using to manage dependencies), change the name and version of the ``invenio-vcs`` packages. +Additionally, you will need to ensure some other dependencies are up to date for compatibility with the new changes. + +.. code-block:: toml + + [packages] + # ... + invenio-vcs = ">=4.0.0,<5.0.0" + invenio-rdm-records = "TODO" + invenio-app-rdm = "TODO" + invenio-oauthclient = "TODO" + +.. note:: + + ``invenio-vcs`` is no longer packaged by default with InvenioRDM, as was the case with ``invenio-github``. + You must declare it as an explicit dependency on the instance level. + +Next, run the install operation and make sure the old module is no longer installed. +Having both installed simultaneously will lead to numerous conflicts, especially with Alembic migrations. + +.. code-block:: bash + + invenio-cli install + pip uninstall invenio-github + +---------------------------------- +2. Perform the database migrations +---------------------------------- + +Depending on the size of your instance, the migrations can be performed either automatically by running an Alembic script, or manually by +carefully following the instructions in this guide. + +If your instance meets one of these criteria, please use the manual method to avoid database stability issues: + +- An ``oauthclient_remoteaccount`` table with more than 50k rows +- A ``github_repositories`` table with more than 100k rows + +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +2a. Automated Alembic script +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Run the upgrade command: + +.. code-block:: bash + + pipenv run invenio alembic upgrade + +^^^^^^^^^^^^^^^^^ +2b. Manual method +^^^^^^^^^^^^^^^^^ + +.. code-block:: sql + + BEGIN; + ALTER TABLE github_repositories RENAME TO vcs_repositories; + ALTER TABLE vcs_repositories ALTER COLUMN github_id TYPE VARCHAR(255); + ALTER TABLE vcs_repositories ALTER COLUMN github_id SET NOT NULL; + ALTER TABLE vcs_repositories RENAME github_id TO provider_id; + ALTER TABLE vcs_repositories ALTER COLUMN hook TYPE VARCHAR(255); + ALTER TABLE vcs_repositories ALTER COLUMN hook DROP NOT NULL; + ALTER TABLE vcs_repositories ADD COLUMN provider VARCHAR(255) DEFAULT 'github' NOT NULL; + ALTER TABLE vcs_repositories ADD COLUMN default_branch VARCHAR(255) DEFAULT 'master' NOT NULL; + ALTER TABLE vcs_repositories ADD COLUMN description VARCHAR(10000); + ALTER TABLE vcs_repositories ADD COLUMN html_url VARCHAR(10000); + ALTER TABLE vcs_repositories ADD COLUMN license_spdx VARCHAR(255); + ALTER TABLE vcs_repositories RENAME user_id TO enabled_by_id; + DROP INDEX ix_github_repositories_name; + DROP INDEX ix_github_repositories_github_id; + ALTER TABLE vcs_repositories ADD CONSTRAINT uq_vcs_repositories_provider_provider_id UNIQUE (provider, provider_id); + ALTER TABLE vcs_repositories ADD CONSTRAINT uq_vcs_repositories_provider_name UNIQUE (provider, name); + COMMIT; + +Do some things here + +.. code-block:: sql + + BEGIN; + ALTER TABLE vcs_repositories ALTER COLUMN html_url SET NOT NULL; + ALTER TABLE github_releases RENAME TO vcs_releases; + ALTER TABLE vcs_releases ALTER COLUMN release_id TYPE VARCHAR(255); + ALTER TABLE vcs_releases ALTER COLUMN release_id SET NOT NULL; + ALTER TABLE vcs_releases RENAME release_id TO provider_id; + ALTER TABLE vcs_releases ADD COLUMN provider VARCHAR(255) DEFAULT 'github' NOT NULL; + ALTER TABLE vcs_releases ALTER COLUMN errors TYPE JSONB USING errors::text::jsonb; + ALTER TABLE vcs_releases DROP CONSTRAINT uq_github_releases_release_id; + ALTER TABLE vcs_releases ADD CONSTRAINT uq_vcs_releases_provider_id_provider UNIQUE (provider_id, provider); + ALTER TABLE vcs_releases ADD CONSTRAINT uq_vcs_releases_provider_id_provider_tag UNIQUE (provider_id, provider, tag); + CREATE TABLE vcs_repository_users ( + repository_id UUID NOT NULL, + user_id INTEGER NOT NULL, + PRIMARY KEY (repository_id, user_id), + CONSTRAINT fk_vcs_repository_users_repository_id_vcs_repositories FOREIGN KEY(repository_id) REFERENCES vcs_repositories (id), + CONSTRAINT fk_vcs_repository_users_user_id_accounts_user FOREIGN KEY(user_id) REFERENCES accounts_user (id) + ); + COMMIT; diff --git a/docs/usage.rst b/docs/usage.rst index e8ebe1ac..d54954cf 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -1,28 +1,11 @@ .. This file is part of Invenio. - Copyright (C) 2016 CERN. - - Invenio is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - Invenio is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Invenio; if not, write to the - Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307, USA. - - In applying this license, CERN does not - waive the privileges and immunities granted to it by virtue of its status - as an Intergovernmental Organization or submit itself to any jurisdiction. + Copyright (C) 2025 CERN. + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. Usage ===== -.. automodule:: invenio_github +.. automodule:: invenio_vcs diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index 9a58c6dc..a7ad75d2 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -85,6 +85,7 @@ def upgrade(): columns=["provider", "name"], ) + """ # Migrate data from the OAuth remote `extra_data` field to the repositories table # where we will now store everything directly. # @@ -188,6 +189,8 @@ def upgrade(): .values(extra_data={"last_sync": remote_account["extra_data"]["last_sync"]}) ) + """ + # We initially set this to nullable=True so we can create the column without an error # (it would be null for existing records) but after the SQLAlchemy operations above we # have populated it so we can mark it non-nullable.