diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf2c37e..5743fc0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: hooks: - id: check-hooks-apply - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: check-ast - id: check-yaml @@ -15,7 +15,7 @@ repos: - id: trailing-whitespace - id: debug-statements - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.6 + rev: v0.7.1 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] diff --git a/README.md b/README.md index 49ca766..b88c283 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ This plugin implements an [export provider](https://rdmo.readthedocs.io/en/latest/plugins/index.html#export-providers) for RDMO, which lets users push metadata from RDMO to Zenodo work packages. The plugin uses [OAUTH 2.0](https://oauth.net/2/), so that users use their respective accounts in both systems. It creates only the metadata in Zenodo, so that users need to upload the actual data on Zenodo themselfes. -Setup ------ +## Setup Install the plugin in your RDMO virtual environment using pip (directly from GitHub): @@ -21,25 +20,45 @@ Add the plugin to `PROJECT_EXPORTS` in `config/settings/local.py`: ```python PROJECT_EXPORTS += [ - ('zenodo', _('Directly to Zenodo'), 'rdmo_zenodo.exports.ZenodoExportProvider') + ('zenodo', _('Directly to Zenodo'), 'rdmo_zenodo.exports.ZenodoExportProvider'), + ('zenodo-publish', _('Publish to Zenodo'), 'rdmo_zenodo.exports.ZenodoPublishProvider') ] ``` +When the translation method `_` was not yet imported in your `config/settings/local.py`, then add it this import at the top: +``` +from django.utils.translation import gettext_lazy as _ +``` + +## Configuration + +### Register a *Developer application* for authentication -An *Developer applications* has to be registered with Zenodo here: https://zenodo.org/account/settings/applications/. For development, you can also use the sandbox instance provided by Zenodo: https://sandbox.zenodo.org/account/settings/applications/. During the registration, you need to enter a **Redirect URI** for your RDMO instance: +A *Developer applications* has to be registered with Zenodo here: https://zenodo.org/account/settings/applications/. +For development, you can also use the sandbox instance provided by Zenodo: https://sandbox.zenodo.org/account/settings/applications/. +Or for development against an InvenioRDM Instance the sandbox https://inveniordm.web.cern.ch/ can be used. +During the registration, you need to enter a **Redirect URI** for your RDMO instance: ``` https://rdmo.example.com/services/oauth/zenodo/callback/ -http://localhost:8000/services/oauth/zenodo/callback/ # for development +https://rdmo.example.com/services/oauth/zenodo-publish/callback/ + +# or for local development +http://localhost:8000/services/oauth/zenodo/callback/ +http://localhost:8000/services/oauth/zenodo-publish/callback/ ``` -After registration, you are provided with a `client_id` and a `client_secret`, which need to be added to the RDMO settings, along with some other optional entries: +### Configure the RDMO settings +After registration, you are provided with a `client_id` and a `client_secret`, +which need to be added to the RDMO settings in `config/settings/local.py`, along with some other optional entries: ```python ZENODO_PROVIDER = { 'client_id': os.getenv('ZENODO_CLIENT_ID'), - 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), - 'add_project_members': True, # add the members of the project as creators to each dataset - 'resource_type': 'dataset', # specify the resource type + 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), + 'zenodo_url': 'https://zenodo.org', # optional, default shown here , or your own InvenioRDM instance url + 'zenodo_auth_scope': 'deposit:write', # optional, default shown here or 'user:email' for InvenioRDM + 'zenodo_record_id_uri': 'https://rdmorganiser.github.io/terms/project/metadata/publication/zenodo/record_id', # optional, default is shown here + 'add_project_members': True, # add the members of the project as creators to exported record 'language': 'eng', # specify the language 'publisher': '', # specify the publisher 'funding': [ # specify funding information @@ -63,18 +82,27 @@ ZENODO_PROVIDER = { ] } ``` +The `resource_type` will be set by the specific export provider, e.g. as `'dataset'` or as `'publication-datamanagementplan'` for `zenodo-publish` export. -Usage ------ +## Usage -The plugins apears as export options on the RDMO project overview. +The plugins appear as export options on the RDMO project overview. For a Zenodo backend, it was tested against https://sandbox.zenodo.org/. +Analogous to Zenodo this plugin can also be used with InvenioRDM instances for which it was tested against https://inveniordm.web.cern.ch/. Currently, the following properties of the Zenodo data model are created from RDMO attributes: -| Zenodo field | RDMO attribute | -| ------------- | ---------------------------------------------------------------------------------| -| `title` | `project/dataset/title` or `project/dataset/id` or `f'Dataset #{set_index + 1}'` | -| `description` | `project/dataset/description` | -| `rights` | `project/dataset/sharing/conditions` | - +| Zenodo field | RDMO attribute | +|-----------------------|-----------------------------------------------------------------------------------------------------------------------| +| `title` | `project/dataset/title` or `project/dataset/id` or `f'Dataset #{set_index + 1}'` or `project.title` or `snapshot.title` | +| `description` | `project/dataset/description` | +| `license` or `rights` | `project/dataset/sharing/conditions` | +| `subjects` | `project/research_question/keywords` | +| `creators` | from `project.member` | In addition, several fields can be configured in the settings as shown above. + +### Development +Information about the API schemas can be found at: +* https://inveniordm.docs.cern.ch/reference/metadata/#metadata +* https://github.com/inveniosoftware/invenio-rdm-records/tree/master/invenio_rdm_records/records/jsonschemas/records +* https://zenodraft.github.io/metadata-schema-zenodo/latest/schema.json +* https://developers.zenodo.org/#depositions diff --git a/pyproject.toml b/pyproject.toml index f0df57e..0da6357 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,8 @@ classifiers = [ ] dependencies = [ "rdmo", + "attrs", + "cattrs", ] dynamic = ["version"] @@ -55,6 +57,8 @@ version = {attr = "rdmo_zenodo.__version__"} [tool.ruff] target-version = "py38" line-length = 120 + +[tool.ruff.lint] select = [ "B", # flake8-bugbear "C4", # flake8-comprehensions @@ -74,7 +78,7 @@ ignore = [ "RUF012", # mutable-class-default ] -[tool.ruff.isort] +[tool.ruff.lint.isort] section-order = [ "future", "standard-library", @@ -87,7 +91,7 @@ section-order = [ "local-folder" ] -[tool.ruff.isort.sections] +[tool.ruff.lint.isort.sections] pytest = ["pytest"] django = ["django"] rest_framework = ["rest_framework"] diff --git a/rdmo_zenodo/exports.py b/rdmo_zenodo/exports.py deleted file mode 100644 index 9f4a3ff..0000000 --- a/rdmo_zenodo/exports.py +++ /dev/null @@ -1,201 +0,0 @@ -import logging - -from django import forms -from django.conf import settings -from django.core.exceptions import ObjectDoesNotExist -from django.shortcuts import redirect, render, reverse -from django.utils.translation import gettext_lazy as _ - -from rdmo.projects.exports import Export -from rdmo.services.providers import OauthProviderMixin - -logger = logging.getLogger(__name__) - - -class BaseZenodoExportProvider(OauthProviderMixin, Export): - - @property - def client_id(self): - return settings.ZENODO_PROVIDER['client_id'] - - @property - def client_secret(self): - return settings.ZENODO_PROVIDER['client_secret'] - - @property - def zenodo_url(self): - return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://sandbox.zenodo.org').strip('/') - - @property - def authorize_url(self): - return f'{self.zenodo_url}/oauth/authorize' - - @property - def token_url(self): - return f'{self.zenodo_url}/oauth/token' - - @property - def deposit_url(self): - return f'{self.zenodo_url}/api/records' - - @property - def redirect_path(self): - return reverse('oauth_callback', args=['zenodo']) - - def get_authorize_params(self, request, state): - return { - 'response_type': 'code', - 'client_id': self.client_id, - 'scope': 'deposit:write', - 'redirect_uri': request.build_absolute_uri(self.redirect_path), - 'state': state - } - - def get_callback_data(self, request): - return { - 'client_id': self.client_id, - 'client_secret': self.client_secret, - 'grant_type': 'authorization_code', - 'redirect_uri': request.build_absolute_uri(self.redirect_path), - 'code': request.GET.get('code') - } - - def get_error_message(self, response): - return response.json().get('errors') - - -class ZenodoExportProvider(BaseZenodoExportProvider): - - rights_uri_options = { - 'dataset_license_types/71': 'cc-by-4.0', - 'dataset_license_types/73': 'cc-by-nc-4.0', - 'dataset_license_types/74': 'cc-by-nd-4.0', - 'dataset_license_types/75': 'cc-by-sa-4.0', - 'dataset_license_types/cc0': 'cc-zero' - } - - class Form(forms.Form): - - dataset = forms.CharField(label=_('Select dataset of your project')) - - def __init__(self, *args, **kwargs): - dataset_choices = kwargs.pop('dataset_choices') - super().__init__(*args, **kwargs) - - self.fields['dataset'].widget = forms.RadioSelect(choices=dataset_choices) - - def render(self): - datasets = self.get_set('project/dataset/id') - dataset_choices = [(dataset.set_index, dataset.value)for dataset in datasets] - - self.store_in_session(self.request, 'dataset_choices', dataset_choices) - - form = self.Form( - dataset_choices=dataset_choices - ) - - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - - def submit(self): - dataset_choices = self.get_from_session(self.request, 'dataset_choices') - form = self.Form(self.request.POST, dataset_choices=dataset_choices) - - if 'cancel' in self.request.POST: - return redirect('project', self.project.id) - - if form.is_valid(): - url = self.get_post_url() - data = self.get_post_data(form.cleaned_data['dataset']) - return self.post(self.request, url, data) - else: - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - - def post_success(self, request, response): - zenodo_url = response.json().get('links', {}).get('self_html') - if zenodo_url: - return redirect(zenodo_url) - else: - return render(request, 'core/error.html', { - 'title': _('ZENODO error'), - 'errors': [_('The URL of the new dataset could not be retrieved.')] - }, status=200) - - def get_post_url(self): - return self.deposit_url - - def get_post_data(self, set_index): - # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata - metadata = {} - - # set the resource_type from the settings - resource_type = settings.ZENODO_PROVIDER.get('resource_type') - if resource_type: - metadata['resource_type'] = { - 'id': resource_type - } - - # add the creators from the project members - add_project_members = settings.ZENODO_PROVIDER.get('add_project_members') - if add_project_members: - metadata['creators'] = [] - for user in self.project.user.all(): - creator = { - 'family_name': user.last_name, - 'given_name': user.first_name, - 'type': 'personal' - } - - try: - orcid_socialaccount = user.socialaccount_set.get(provider='orcid') - creator['identifiers'] = [ - { - 'scheme': 'orcid', - 'identifier': orcid_socialaccount.uid - } - ] - except (ObjectDoesNotExist, AttributeError): - pass - - metadata['creators'].append({ - 'person_or_org': creator - }) - - # set the title from the title or id or the running index - metadata['title'] = \ - self.get_text('project/dataset/title', set_index=set_index) or \ - self.get_text('project/dataset/id', set_index=set_index) or \ - f'Dataset #{set_index + 1}' - - # set the description - description = self.get_text('project/dataset/description', set_index=set_index) - if description: - metadata['description'] = description - - # set the rights/licenses - for rights in self.get_values('project/dataset/sharing/conditions', set_index=set_index): - if rights.option: - metadata['rights'] = [{ - 'id': self.rights_uri_options.get(rights.option.uri_path) - }] - break - - # set the language from the settings - language = settings.ZENODO_PROVIDER.get('language') - if language: - metadata['languages'] = [ - {'id': language} - ] - - # set the publisher from the settings - publisher = settings.ZENODO_PROVIDER.get('publisher') - if publisher: - metadata['publisher'] = publisher - - # set the funding from the settings - funding = settings.ZENODO_PROVIDER.get('funding') - if funding: - metadata['funding'] = funding - - return { - 'metadata': metadata - } diff --git a/rdmo_zenodo/exports/__init__.py b/rdmo_zenodo/exports/__init__.py new file mode 100644 index 0000000..036cfa9 --- /dev/null +++ b/rdmo_zenodo/exports/__init__.py @@ -0,0 +1,2 @@ +from .export import ZenodoExportProvider as ZenodoExportProvider +from .publish import ZenodoPublishProvider as ZenodoPublishProvider diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py new file mode 100644 index 0000000..380328d --- /dev/null +++ b/rdmo_zenodo/exports/base.py @@ -0,0 +1,147 @@ +import logging + +from django.conf import settings +from django.shortcuts import reverse + +from rdmo.projects.exports import Export +from rdmo.services.providers import OauthProviderMixin + +from rdmo_zenodo.exports.metadata.builder import METADATA_METHODS, extract_metadata, serialize_payload, validate_schema +from rdmo_zenodo.exports.metadata.context import MetadataContext + +logger = logging.getLogger(__name__) + +json_header = { + 'Content-Type': 'application/json', + } +binary_header = { + 'Content-Type': 'application/octet-stream', +} + + +class BaseZenodoExportProvider(OauthProviderMixin, Export): + + @property + def client_id(self): + return settings.ZENODO_PROVIDER['client_id'] + + @property + def client_secret(self): + return settings.ZENODO_PROVIDER['client_secret'] + + @property + def zenodo_url(self): + return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://zenodo.org').strip('/') + + @property + def zenodo_backend_type(self): + if 'zenodo' in self.zenodo_url: + return 'zenodo' + return 'invenio' + + @property + def authorize_url(self): + return f'{self.zenodo_url}/oauth/authorize' + + @property + def token_url(self): + return f'{self.zenodo_url}/oauth/token' + + @property + def redirect_path(self): + return reverse('oauth_callback', args=[self.key]) + + @property + def authorization_header(self): + return self.get_authorization_headers(self.get_from_session(self.request, 'access_token')) + + @property + def authorization_scope(self): + if scope := settings.ZENODO_PROVIDER.get('zenodo_auth_scope'): + return scope + if self.zenodo_backend_type == 'zenodo': + return 'deposit:write' + return 'user:email' + + @property + def authorized_binary_header(self): + return {**binary_header, **self.authorization_header} + + @property + def authorized_json_header(self): + return {**json_header, **self.authorization_header} + + def record_uploads_url(self, record_id): + return f"{self.zenodo_url}/uploads/{record_id}" + + @property + def records_url(self): + return f'{self.zenodo_url}/api/records' + + def record_url(self, record_id): + return f"{self.records_url}/{record_id}" + + def record_draft_url(self, record_id): + return f"{self.records_url}/{record_id}/draft" + + def record_versions_url(self, record_id): + return f"{self.records_url}/{record_id}/versions" + + def record_file_url(self, record_id): + return f"{self.record_draft_url(record_id)}/files" + + def record_file_content_url(self, record_id, file_key): + return f"{self.record_file_url(record_id)}/{file_key}/content" + + def record_file_commit_url(self, record_id, file_key): + return f"{self.record_file_url(record_id)}/{file_key}/commit" + + def record_publish_url(self, record_id): + return f"{self.record_draft_url(record_id)}/actions/publish" + + def get_authorize_params(self, request, state): + return { + 'response_type': 'code', + 'client_id': self.client_id, + 'scope': self.authorization_scope, + 'redirect_uri': request.build_absolute_uri(self.redirect_path), + 'state': state + } + + def get_callback_data(self, request): + return { + 'client_id': self.client_id, + 'client_secret': self.client_secret, + 'grant_type': 'authorization_code', + 'redirect_uri': request.build_absolute_uri(self.redirect_path), + 'code': request.GET.get('code') + } + + def post_with_retry(self, request, url, data): + response = self.post(request, url, data) + # Hacky way: in case of OAuth error (from e.g. 403), pop access_token and re-try + if 'OAuth' in response.content.decode(): + self.pop_from_session(request, 'access_token') + response = self.post(request, url, data) + return response + + def get_metadata_context(self, set_index=None): + return MetadataContext( + project=self.project, + snapshot=self.snapshot, + set_index=set_index, + get_values=self.get_values, + get_text=self.get_text, + zenodo_backend_type=self.zenodo_backend_type, + ) + + def get_metadata(self, set_index=None): + + context = self.get_metadata_context(set_index=set_index) + + mapper, schema, payload_cls = METADATA_METHODS[self.zenodo_backend_type] + metadata_dict = extract_metadata(context, mapper) + metadata_obj = validate_schema(metadata_dict, schema) + + payload_obj = payload_cls(metadata=metadata_obj) + return serialize_payload(payload_obj) diff --git a/rdmo_zenodo/exports/export.py b/rdmo_zenodo/exports/export.py new file mode 100644 index 0000000..846489c --- /dev/null +++ b/rdmo_zenodo/exports/export.py @@ -0,0 +1,59 @@ +import logging + +from django.shortcuts import redirect, render +from django.utils.translation import gettext_lazy as _ + +from .base import BaseZenodoExportProvider +from .forms import ZenodoDatasetForm +from .metadata.exceptions import MetadataBuildError + +logger = logging.getLogger(__name__) + + +class ZenodoExportProvider(BaseZenodoExportProvider): + + def get_dataset_choices(self): + datasets = self.get_set('project/dataset/id') + return [(dataset.set_index, dataset.value) for dataset in datasets] + + def render(self): + dataset_choices = self.get_dataset_choices() + + self.store_in_session(self.request, 'dataset_choices', dataset_choices) + + form = ZenodoDatasetForm( + dataset_choices=dataset_choices + ) + + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def submit(self): + dataset_choices = self.get_from_session(self.request, 'dataset_choices') + form = ZenodoDatasetForm(self.request.POST, dataset_choices=dataset_choices) + + if 'cancel' in self.request.POST: + return redirect('project', self.project.id) + + if form.is_valid(): + url = self.records_url + try: + payload = self.get_metadata(set_index=form.cleaned_data['dataset']) + except MetadataBuildError as e: + form.add_error(None, str(e)) + return render( + self.request, 'plugins/exports_zenodo.html', {'form': form}, status=400 + ) + return self.post_with_retry(self.request, url, payload) + + else: + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def post_success(self, request, response): + if zenodo_url := response.json().get('links', {}).get('self_html'): + return redirect(zenodo_url) + return render(request, 'core/error.html', { + 'title': _('ZENODO error'), + 'errors': [_('The URL of the new dataset could not be retrieved.')] + }, status=200) + + diff --git a/rdmo_zenodo/exports/forms.py b/rdmo_zenodo/exports/forms.py new file mode 100644 index 0000000..0182ec4 --- /dev/null +++ b/rdmo_zenodo/exports/forms.py @@ -0,0 +1,39 @@ +from django import forms +from django.conf import settings +from django.utils.translation import gettext_lazy as _ + + +class ZenodoDatasetForm(forms.Form): + dataset = forms.CharField(label=_('Select dataset of your project')) + + def __init__(self, *args, **kwargs): + dataset_choices = kwargs.pop('dataset_choices') + super().__init__(*args, **kwargs) + + self.fields['dataset'].widget = forms.RadioSelect(choices=dataset_choices) + + +class ZenodoSnapshotForm(forms.Form): + snapshot = forms.ChoiceField( + label=_('Select snapshot of your project'), + required=False, # Allows empty selection + widget=forms.RadioSelect + ) + view = forms.ChoiceField( + label=_("Select the view with which your project will be published"), + required=False, # Allows empty selection + widget=forms.Select, + ) + export_format = forms.ChoiceField( + label=_("Select the export format"), + required=False, widget=forms.Select, + choices=settings.EXPORT_FORMATS + ) + + def __init__(self, *args, **kwargs): + snapshot_choices = kwargs.pop('snapshot_choices', []) + view_choices = kwargs.pop("view_choices", []) + super().__init__(*args, **kwargs) + self.fields['snapshot'].choices = [(None, _("Create new snapshot")), *snapshot_choices] + self.fields['snapshot'].initial = None + self.fields['view'].choices = view_choices diff --git a/rdmo_zenodo/exports/metadata/__init__.py b/rdmo_zenodo/exports/metadata/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rdmo_zenodo/exports/metadata/builder.py b/rdmo_zenodo/exports/metadata/builder.py new file mode 100644 index 0000000..f15156b --- /dev/null +++ b/rdmo_zenodo/exports/metadata/builder.py @@ -0,0 +1,58 @@ +import inspect +from typing import Any, Callable + +from attr import AttrsInstance, fields +from cattrs import ClassValidationError, transform_error + +from rdmo_zenodo.exports.metadata.context import MetadataContext +from rdmo_zenodo.exports.metadata.converter import converter +from rdmo_zenodo.exports.metadata.exceptions import SchemaValidationError +from rdmo_zenodo.exports.metadata.invenio import InvenioMetadataV6, InvenioRecordV6Payload +from rdmo_zenodo.exports.metadata.mappers import INVENIO_FIELD_MAPPER, ZENODO_FIELD_MAPPER +from rdmo_zenodo.exports.metadata.zenodo import ZenodoDepositionPayload, ZenodoMetadata + +METADATA_METHODS = { + "zenodo": (ZENODO_FIELD_MAPPER, ZenodoMetadata, ZenodoDepositionPayload), + "invenio": (INVENIO_FIELD_MAPPER, InvenioMetadataV6, InvenioRecordV6Payload), +} + +def extract_metadata(context: MetadataContext, fields: dict[str, Callable]): + # 1: extract metadata from rdmo project values + extracted = {} + for name, getter in fields.items(): + sig = inspect.signature(getter) + if len(sig.parameters) == 0: + value = getter() + elif len(sig.parameters) == 1: + value = getter(context) + else: + raise ValueError(f"Unsupported getter signature {sig}") + extracted[name] = value + return extracted + + +def validate_schema(metadata_dict: dict[str, Any], schema: AttrsInstance) -> Any: + # 2: validate metadata dict against attrs schema + + # 2.1 check for unknown keys + allowed = {f.name for f in fields(schema)} + unknown = set(metadata_dict) - allowed + if unknown: + raise SchemaValidationError( + f"Unexpected fields in metadata for {schema.__name__}", + details=", ".join(sorted(unknown)) + ) + # 2.2 structure from dict and return validated + try: + return converter.structure(metadata_dict, schema) + except ClassValidationError as e: + raise SchemaValidationError( + f"Schema validation failed for {schema.__name__}", details=",".join(transform_error(e)) + ) from e + except (TypeError, ValueError) as e: + raise SchemaValidationError("Invalid metadata structure", details=str(e)) from e + + +def serialize_payload(payload_obj: Any) -> dict[str, Any]: + # convert payload object to JSON-serializable dict + return converter.unstructure(payload_obj) diff --git a/rdmo_zenodo/exports/metadata/context.py b/rdmo_zenodo/exports/metadata/context.py new file mode 100644 index 0000000..f0946b4 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/context.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Callable + +from django.conf import settings + +from rdmo.projects.models import Project, Snapshot + + +@dataclass(frozen=True) +class MetadataContext: + project: Project + snapshot: Snapshot | None + get_values: Callable[..., list[Any]] + get_text: Callable[..., str | None] + zenodo_backend_type: str + set_index: int | None = None + view: Any | None = None + export_format: str | None = None + + @property + def project_members(self) -> list: + if ( + settings.ZENODO_PROVIDER.get("add_project_members") + and self.project is not None + ): + return list(self.project.user.all()) + return [] diff --git a/rdmo_zenodo/exports/metadata/converter.py b/rdmo_zenodo/exports/metadata/converter.py new file mode 100644 index 0000000..94470f0 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/converter.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from typing import Any, get_args, get_origin + +import attr +import cattrs + +from rdmo_zenodo.exports.metadata.invenio import Language, ResourceType +from rdmo_zenodo.exports.metadata.zenodo import Community, Grant + +converter = cattrs.Converter() + +_EMPTY = (None, "", [], {}) + + +def strip_empty(value: Any) -> Any: + if isinstance(value, dict): + # Clean nested first, then drop empty keys. + cleaned = {k: strip_empty(v) for k, v in value.items()} + return {k: v for k, v in cleaned.items() if v not in _EMPTY} + if isinstance(value, list): + # Clean nested first, then drop empty items. + cleaned = [strip_empty(v) for v in value] + return [v for v in cleaned if v not in _EMPTY] + return value + + +def unstructure_attrs_and_strip(inst: Any) -> Any: + cls = type(inst) + # Use attr.fields to walk declared attrs fields + data = {f.name: converter.unstructure(getattr(inst, f.name)) for f in attr.fields(cls)} + return strip_empty(data) + + +# Apply to ALL attrs-based classes (top-level and nested). +# Using attr.has as the predicate means: “if this is an @attrs class, use the hook”. +converter.register_unstructure_hook_factory( + attr.has, + lambda _cls: unstructure_attrs_and_strip, +) + +def register_from_string_hook(converter, cls): + converter.register_structure_hook( + cls, + lambda v, _: cls.from_string(v) if isinstance(v, str) else cls(**v) + ) + +for simple_cls in (Language, ResourceType, Grant, Community): + register_from_string_hook(converter, simple_cls) + +def make_list_hook(inner_cls): + def _hook(value: Any, _: Any): + if isinstance(value, str): + return [inner_cls.from_string(value)] + if isinstance(value, dict): + return [inner_cls(**value)] + if isinstance(value, list): + return [inner_cls.from_string(v) if isinstance(v, str) else inner_cls(**v) for v in value] + raise TypeError(f"Cannot structure {value!r} as list[{inner_cls.__name__}]") + return _hook + +def make_list_pred(inner_cls): + return lambda tp: get_origin(tp) is list and get_args(tp) == (inner_cls,) + +for simple_cls in (Language, Grant, Community): + converter.register_structure_hook_func( + make_list_pred(simple_cls), + make_list_hook(simple_cls), + ) diff --git a/rdmo_zenodo/exports/metadata/exceptions.py b/rdmo_zenodo/exports/metadata/exceptions.py new file mode 100644 index 0000000..59d1806 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/exceptions.py @@ -0,0 +1,20 @@ + +class MetadataBuildError(Exception): + """Base class for errors raised during metadata composition.""" + + def __init__(self, message: str, details: str | None = None): + self.details = details + self.message = message + super().__init__(message) + + def __str__(self): + if self.details: + return f'{self.message}: {self.details}' + else: + return f'{self.message}' + +class SchemaValidationError(MetadataBuildError): + """Raised when schema (attrs) validation fails.""" + +class ExtractionError(MetadataBuildError): + """Raised when an extractor or mapper fails.""" diff --git a/rdmo_zenodo/exports/metadata/extractors.py b/rdmo_zenodo/exports/metadata/extractors.py new file mode 100644 index 0000000..84205d8 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/extractors.py @@ -0,0 +1,158 @@ +from __future__ import annotations + +from typing import Any + +from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist +from django.utils import timezone + +from rdmo_zenodo.exports.metadata.context import MetadataContext + +RIGHTS_URI_OPTIONS = { + "dataset_license_types/71": "cc-by-4.0", + "dataset_license_types/73": "cc-by-nc-4.0", + "dataset_license_types/74": "cc-by-nd-4.0", + "dataset_license_types/75": "cc-by-sa-4.0", + "dataset_license_types/cc0": "cc-zero", +} + +DEFAULT_SUBJECTS = ["Data Management Plan", "DMP"] + + +def get_title_from_project(context: MetadataContext) -> str: + if context.project.title: + return f"Data Management Plan for project {context.project.title}." + return "Data Management Plan." + +def get_title_from_dataset(context: MetadataContext) -> str: + if context.set_index is None: + raise ValueError("Cannot extract title from dataset without set_index") + title = context.get_text("project/dataset/title", set_index=context.set_index) + if title: + return title + dataset_id = context.get_text("project/dataset/id", set_index=context.set_index) + if dataset_id: + return dataset_id + index = (int(context.set_index) + 1) if isinstance(context.set_index, int) else 1 + return f"Dataset #{index}" + +def get_title_from_snapshot(context: MetadataContext) -> str: + title = get_title_from_project(context) + if context.snapshot and context.snapshot.title: + title += f" - {context.snapshot.title}" + return title + +def get_title_from_context(context: MetadataContext) -> str: + title = "" + if context.snapshot: + title = get_title_from_snapshot(context) + if context.set_index is not None: + dataset_title = get_title_from_dataset(context) + if title: + title += f" - {dataset_title}" + else: + return dataset_title + return title + +def get_description_from_project(context: MetadataContext) -> str: + desc = get_title_from_project(context) + if context.snapshot is not None: + if context.snapshot.description: + desc += "\n" + desc += context.snapshot.description + if context.set_index is not None: + desc += "\n" + desc += get_title_from_dataset(context) + if context.view and context.export_format: + desc += "\n" + desc += f"Exported to {context.export_format} with the {context.view.title} view." + + return desc + +# === from settings === # + +def get_access_right_from_settings(_,) -> str: + return settings.ZENODO_PROVIDER.get("access_right", "open") + +def get_upload_type_from_settings() -> str: + return settings.ZENODO_PROVIDER.get("upload_type", "dataset") + +def get_publication_type_from_settings() -> str | None: + if settings.ZENODO_PROVIDER.get("upload_type") == "publication": + return settings.ZENODO_PROVIDER.get("publication_type", "datamanagementplan") + return None + +def get_resource_type_from_settings_and_context(context) -> str: + if resource_type := settings.ZENODO_PROVIDER.get("resource_type"): + return resource_type + if context.set_index is not None: + return "dataset" + return "publication-datamanagementplan" + +def get_language_from_settings() -> str | None: + if language := settings.ZENODO_PROVIDER.get("language"): + return language + return None + +def get_publisher_from_settings() -> str | None: + return settings.ZENODO_PROVIDER.get("publisher") + +def get_funding_from_settings() -> str | None: + return settings.ZENODO_PROVIDER.get("funding") + +def get_publication_date_from_today() -> str: + return timezone.localdate().isoformat() + +# === users === # + +def get_orcid_from_user(user: Any) -> str | None: + try: + return user.socialaccount_set.get(provider="orcid") + except (ObjectDoesNotExist, AttributeError): + return None + +def get_invenio_creator_from_user(user): + orcid = get_orcid_from_user(user) + identifiers = [{"scheme": "orcid", "identifier": orcid.uid}] if orcid else [] + return { + "person_or_org": { + "family_name": user.last_name, + "given_name": user.first_name, + "identifiers": identifiers, + "type": "personal", + } + } + +def get_zenodo_creator_from_user(user): + orcid = get_orcid_from_user(user) + return { + "name": f"{user.last_name}, {user.first_name}".strip(), + "orcid": orcid.uid if orcid else None, + "affiliation": None, + } + +def get_creators_from_context(context: MetadataContext) -> list[dict[str, Any]]: + creators = [] + get_creator = get_invenio_creator_from_user + for user in context.project_members: + creators.append(get_creator(user)) + return creators + +# === licenses, subjects, keywords === + +def get_license_id_from_context(context: MetadataContext) -> list[dict[str, str]] | list[dict[str, dict[str,str]]]: + set_index = context.set_index if context.set_index is not None else 0 + values = context.get_values("project/dataset/sharing/conditions", set_index=set_index) + for v in values: + if v.option and (license_id := RIGHTS_URI_OPTIONS.get(v.option.uri_path)): + return [{"id": license_id}] + if v.option.additional_input == "text" and v.text: + return [{"title": {"en": v.text}}] + return [] + +def get_keywords_from_context(context: MetadataContext) -> list[str]: + keywords = [v.text for v in context.get_values("project/research_question/keywords") if v.text] + return DEFAULT_SUBJECTS + keywords + +def get_subjects_from_keywords_and_context(context: MetadataContext) -> list[dict[str, str]]: + return [{"subject": s} for s in get_keywords_from_context(context)] diff --git a/rdmo_zenodo/exports/metadata/invenio.py b/rdmo_zenodo/exports/metadata/invenio.py new file mode 100644 index 0000000..99e14da --- /dev/null +++ b/rdmo_zenodo/exports/metadata/invenio.py @@ -0,0 +1,314 @@ +# https://inveniordm.docs.cern.ch/reference/metadata/#metadata +# https://github.com/inveniosoftware/invenio-rdm-records/tree/master/invenio_rdm_records/records/jsonschemas/records + +from __future__ import annotations + +from typing import Any, Literal + +import attrs + +from rdmo_zenodo.exports.metadata.utils import is_edtf_l0_date, is_iso_date + +ISO639_1 = str +ISO639_3 = str # ISO-639-3 +IdentifierSchemes = Literal[ + 'ark', 'arxiv', 'ads', 'bibcode', 'crossreffunderid', 'doi', 'ean13', + 'eissn', 'grid', 'handle', 'igsn', 'isbn', 'issn', 'istc', 'lissn', + 'lsid', 'pmid', 'purl', 'upc', 'url', 'urn', 'w3id', 'other' +] + + +@attrs.define +class ResourceType: + id: str + + @classmethod + def from_string(cls, value: str) -> ResourceType: + return cls(id=value) + + def __attrs_post_init__(self) -> None: + if not self.id: + raise ValueError("resource_type.id must be a non-empty string") + +@attrs.define +class Creator: + person_or_org: PersonalPersonOrOrg | OrganizationalPersonOrOrg + role: Role | None = None # optional for creators + affiliations: list[Affiliation] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + if ( + isinstance(self.person_or_org, OrganizationalPersonOrOrg) + and self.affiliations + ): + raise ValueError("affiliations are only allowed for personal creators") + +@attrs.define +class Role: + id: str + +@attrs.define +class Affiliation: + id: str | None = None # CV id (preferred, if known) + name: str | None = None # free text fallback + + def __attrs_post_init__(self) -> None: + # One of id or name must be present + if not (self.id or self.name): + raise ValueError("affiliation requires either 'id' or 'name'") + + +@attrs.define +class PersonalPersonOrOrg: + given_name: str + family_name: str + type: Literal["personal"] = "personal" + identifiers: list[CreatorIdentifier] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + if not (self.given_name and self.family_name): + raise ValueError("personal requires given_name and family_name") + +@attrs.define +class OrganizationalPersonOrOrg: + name: str + type: Literal["organizational"] = "organizational" + identifiers: list[AffiliationIdentifier] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + if not self.name: + raise ValueError("organizational requires 'name'") + +@attrs.define +class CreatorIdentifier: + scheme: Literal['orcid', 'gnd', 'isni', 'ror'] + identifier: str + +@attrs.define +class AffiliationIdentifier: + scheme: Literal['isni', 'ror'] + identifier: str + +@attrs.define +class AdditionTitles: + title: str + type: AdditionTitleType + lang: AdditionalLang | None = None + +@attrs.define +class AdditionalLang: + id: ISO639_3 + +@attrs.define +class AdditionTitleType: + id: Literal["alternative-title", "subtitle", "translated-title", "other"] + title: dict[ISO639_1, str] + +@attrs.define +class AdditionalDescriptions: + description: str # free-text + type: AdditionTitleType + lang: AdditionalLang | None = None + +@attrs.define +class AdditionDescriptionType: + id: Literal["abstract", "methods", "series-information", "table-of-contents", "technical-info", "other"] + title: dict[ISO639_1, str] + +@attrs.define +class Rights: + id: str | None = None # CV + title: dict[ISO639_1, str] | None = None # Localized human readable title + description: dict[ISO639_1, str] | None = None # Localized license description text + link: str | None = None + + def __attrs_post_init__(self) -> None: + if bool(self.id) and bool(self.title): + raise ValueError("rights: either 'id' or 'title' must be set, but not both") + +@attrs.define +class Contributor: + person_or_org: PersonalPersonOrOrg | OrganizationalPersonOrOrg + role: Role # required for contributors + affiliations: list[Affiliation] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + if self.person_or_org.type == "organizational" and self.affiliations: + raise ValueError("affiliations are only allowed for personal contributors") + +@attrs.define +class Subject: + id: str | None = None # CV id + subject: str | None = None # free keyword + + def __attrs_post_init__(self) -> None: + if self.id and self.subject: + raise ValueError("subject: set exactly one of 'id' or 'subject'") + +@attrs.define +class Language: + id: ISO639_3 # ISO-639-3 code, e.g. 'eng', 'dan' + + @classmethod + def from_string(cls, value: str) -> Language: + if len(value) != 3: + raise ValueError("language must be 3 characters long (ISO639-3)") + return cls(id=value) + +@attrs.define +class Date: + date: str = attrs.field(converter=is_edtf_l0_date) + type: DateRole + description: str | None = None + +@attrs.define +class DateRole: + id: Literal[ + 'accepted', 'available', 'collected', 'copyrighted', 'created', 'issued', + 'other', 'submitted', 'updated', 'valid', 'withdrawn' + ] + title: dict[ISO639_1, str] = None # only id needed on the REST API + +@attrs.define +class AlternateIdentifier: + identifier: str + scheme: str # CV scheme (doi, isbn, url, ...) + +@attrs.define +class RelatedIdentifier: + identifier: str + scheme: IdentifierSchemes + relation_type: RelationType + resource_type: RelatedIdentifierResourceType | None = None + +@attrs.define +class RelationType: + id: str # CV + title: dict[ISO639_1, str] | None = None + +@attrs.define +class RelatedIdentifierResourceType: + id: str + title: dict[ISO639_1, str] + +@attrs.define +class FundingRef: + funder: Funder + award: Award | None = None + +@attrs.define +class Funder: + id: str | None = None # from CV + name: str | None = None # free-text + + def __attrs_post_init__(self): + if self.id and self.name: + raise ValueError("funder: one of 'id' or 'name' must be set") + +@attrs.define +class Award: + id: str | None = None + title: dict[ISO639_1,str] | None = None + number: str | None = None + identifiers: list[AwardIdentifier] | None = None + + def __attrs_post_init__(self): + has_id = bool(self.id) + has_fallback = bool(self.title and self.number) + if not (has_id or has_fallback): + raise ValueError("award: one of 'id' or ('title' and 'number') must be set") + +@attrs.define +class AwardIdentifier: + scheme: IdentifierSchemes + identifier: str + +@attrs.define +class References: + reference: str + scheme: IdentifierSchemes | None = None + identifier: str | None = None + +@attrs.define +class InvenioMetadataV6: + resource_type: ResourceType + title: str + publication_date: str = attrs.field(converter=is_edtf_l0_date) + creators: list[Creator] = attrs.field(factory=list) + + additional_titles: list[AdditionTitles] = attrs.field(factory=list) + description: str | None = None # may use certain HTML tags + additional_descriptions: list[AdditionalDescriptions] = attrs.field(factory=list) + + rights: list[Rights] = attrs.field(factory=list) + copyright: str | None = None # free-text + contributors: list[Contributor] = attrs.field(factory=list) + subjects: list[Subject] = attrs.field(factory=list) + languages: list[Language] = attrs.field(factory=list) + dates: list[Date] = attrs.field(factory=list) + + version: str | None = None # eg. semantic versioning + publisher: str | None = None + + alternate_identifiers: list[AlternateIdentifier] = attrs.field(factory=list) + related_identifiers: list[RelatedIdentifier] = attrs.field(factory=list) + + sizes: list[str] = attrs.field(factory=list) + formats: list[str] = attrs.field(factory=list) + locations: list[dict[str, Any]] = attrs.field(factory=list) + funding: list[FundingRef] = attrs.field(factory=list) + references: list[str] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + # minimal required fields per docs + if not self.creators: + raise ValueError("metadata.creators requires at least one creator") + if not self.title: + raise ValueError("metadata.title is required") + if not self.resource_type or not self.resource_type.id: + raise ValueError("metadata.resource_type.id is required") + +@attrs.define +class Access: + record: Literal["public", "restricted"] = "public" + files: Literal["public", "restricted"] = "public" + embargo: AccessEmbargo | None = None + +@attrs.define +class AccessEmbargo: + active: bool + until: str | None = attrs.field(default=None, converter=lambda v: is_iso_date(v) if v else None) + reason: str | None = None + + def __attrs_post_init__(self) -> None: + if self.active and not self.until: + raise ValueError("embargo.until (YYYY-MM-DD) is required when embargo.active is true") + +@attrs.define +class FilesOptions: + enabled: bool # should (and can) files be attached to this record or not. + default_preview: str | None = None + order: list[str] = attrs.field(factory=list) + +@attrs.define +class ExternalPID: + doi: DOI | None = None + +@attrs.define +class DOI: + identifier: str + provider: str + client: str | None = None + +@attrs.define +class InvenioRecordV6Payload: + metadata: InvenioMetadataV6 + access: Access = attrs.field(factory=Access) + files: FilesOptions | None = None + pids: ExternalPID | None = None + custom_fields: dict | None = None + + def to_dict(self) -> dict[str, Any]: + d = attrs.asdict(self, recurse=True) + # drop empties for cleaner payloads + return {k: v for k, v in d.items() if v not in (None, "", [], {})} diff --git a/rdmo_zenodo/exports/metadata/mappers.py b/rdmo_zenodo/exports/metadata/mappers.py new file mode 100644 index 0000000..4eccf26 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/mappers.py @@ -0,0 +1,52 @@ +from typing import Any, Callable + +from rdmo_zenodo.exports.metadata.extractors import ( + get_creators_from_context, + get_description_from_project, + get_funding_from_settings, + get_keywords_from_context, + get_language_from_settings, + get_license_id_from_context, + get_publication_date_from_today, + get_publication_type_from_settings, + get_publisher_from_settings, + get_resource_type_from_settings_and_context, + get_subjects_from_keywords_and_context, + get_title_from_context, + get_upload_type_from_settings, +) + +FieldGetter = Callable[[Any], Any] + +ZENODO_FIELD_MAPPER: dict[str, FieldGetter] = { + # required fields + "upload_type": get_upload_type_from_settings, + "publication_type": get_publication_type_from_settings, + "resource_type": get_resource_type_from_settings_and_context, + "title": get_title_from_context, + "publication_date": get_publication_date_from_today, + "creators": get_creators_from_context, + "description": get_description_from_project, + + # optional metadata fields + "funding": get_funding_from_settings, + "keywords": get_keywords_from_context, + "languages": get_language_from_settings, + "license": get_license_id_from_context, + "publisher": get_publisher_from_settings, +} +INVENIO_FIELD_MAPPER: dict[str, FieldGetter] = { + # required fields + "resource_type": get_resource_type_from_settings_and_context, + "title": get_title_from_context, + "publication_date": get_publication_date_from_today, + "creators": get_creators_from_context, + "description": get_description_from_project, + + # optional metadata fields + "funding": get_funding_from_settings, + "subjects": get_subjects_from_keywords_and_context, + "languages": get_language_from_settings, + "rights": get_license_id_from_context, + "publisher": get_publisher_from_settings, +} diff --git a/rdmo_zenodo/exports/metadata/utils.py b/rdmo_zenodo/exports/metadata/utils.py new file mode 100644 index 0000000..13fc5d3 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/utils.py @@ -0,0 +1,17 @@ +import re + + +def is_edtf_l0_date(value: str) -> str: + """ + Accepts EDTF Level 0 "Date" or "Date Interval": + YYYY | YYYY-MM | YYYY-MM-DD + YYYY/YYYY | YYYY-MM/YYYY-MM | YYYY-MM-DD/YYYY-MM-DD, etc. + """ + if not re.match(r"^\d{4}(-\d{2}(-\d{2})?)?(/\d{4}(-\d{2}(-\d{2})?)?)?$", value): + raise ValueError(f"Invalid EDTF Level 0 date {value!r}") + return value + +def is_iso_date(value: str) -> str: + if not re.match(r"^\d{4}-\d{2}-\d{2}$", value): + raise ValueError(f"Invalid ISO date {value!r}; must be YYYY-MM-DD") + return value diff --git a/rdmo_zenodo/exports/metadata/zenodo.py b/rdmo_zenodo/exports/metadata/zenodo.py new file mode 100644 index 0000000..2b1be99 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/zenodo.py @@ -0,0 +1,140 @@ +# https://zenodraft.github.io/metadata-schema-zenodo/latest/schema.json +# https://developers.zenodo.org/#depositions + +from __future__ import annotations + +from typing import Any, Literal + +import attrs + +from rdmo_zenodo.exports.metadata.invenio import Creator, FundingRef, Language, ResourceType +from rdmo_zenodo.exports.metadata.utils import is_iso_date + +UploadType = Literal[ + "dataset", "image", "publication", "poster", "presentation", + "software", "lesson", "physicalobject", "other" +] + +PublicationType = Literal[ + "annotationcollection", "book", "section", "conferencepaper", "datamanagementplan", + "article", "patent", "preprint", "deliverable", "milestone", + "proposal", "report", "softwaredocumentation", "taxonomictreatment", + "technicalnote", "thesis", "workingpaper", "other" +] + +ImageType = Literal["figure", "plot", "drawing", "diagram", "photo", "other"] + +AccessRight = Literal["open", "embargoed", "restricted", "closed"] + +ContributorType = Literal[ + "ContactPerson", "DataCollector", "DataCurator", "DataManager", + "Distributor", "Editor", "HostingInstitution", "Producer", + "ProjectLeader", "ProjectManager", "ProjectMember", "RegistrationAgency", + "RegistrationAuthority", "RelatedPerson", "ResearchGroup", "RightsHolder", + "Sponsor", "Supervisor", "WorkPackageLeader", "Other", "Annotator" +] + + +@attrs.define +class ZenodoCreator: + name: str # in the format Family name, Given names + affiliation: str | None = None + orcid: str | None = None + gnd : str | None = None + + def __attrs_post_init__(self) -> None: + if not self.name: + raise ValueError("creator.name is required") + +@attrs.define +class Contributor: + name: str + type: ContributorType + affiliation: str | None = None + orcid: str | None = None + gnd: str | None = None + +@attrs.define +class Identifier: + identifier: str + relation: str | None = None # e.g. "isSupplementTo" + scheme: str | None = None # "doi", "url", etc. + resource_type: str | None = None + +@attrs.define +class Grant: + id: str # e.g. "10.13039/501100000780::101122483" + + @classmethod + def from_string(cls, value: str) -> Grant: + return cls(id=value) + +@attrs.define +class Community: + identifier: str # e.g. "zenodo-community-id" + + @classmethod + def from_string(cls, value: str) -> Community: + return cls(identifier=value) + +@attrs.define +class RelatedIdentifier: + identifier: str + relation: str + scheme: str | None = None + resource_type: str | None = None + + +@attrs.define +class ZenodoMetadata: + resource_type: ResourceType + title: str + publication_date: str = attrs.field(converter=is_iso_date) + creators: list[Creator] = attrs.field(factory=list) + upload_type: UploadType | None = None + description: str | None = None + publication_type: str | None = None + contributors: list[Contributor] = attrs.field(factory=list) + keywords: list[str] = attrs.field(factory=list) + languages: list[Language] = attrs.field(factory=list) + related_identifiers: list[RelatedIdentifier] = attrs.field(factory=list) + alternate_identifiers: list[Identifier] = attrs.field(factory=list) + funding: list[FundingRef] = attrs.field(factory=list) + grants: list[Grant] = attrs.field(factory=list) + references: list[str] = attrs.field(factory=list) + notes: str | None = None + communities: list[Community] = attrs.field(factory=list) + access_right: AccessRight = "open" + license: str | None = None # any SPDX ID or custom name + embargo_date: str | None = attrs.field(default=None, converter=lambda v: is_iso_date(v) if v else None) + access_conditions: str | None = None + publisher: str | None = None + version: str | None = None + + def __attrs_post_init__(self) -> None: + # upload type dependent constraints + if self.upload_type == "publication" and not self.publication_type: + raise ValueError("publication_type required when upload_type='publication'") + + # access_right constraints + if self.access_right in {"open", "embargoed"} and not self.license: + raise ValueError("license required when access_right is open or embargoed") + if self.access_right == "embargoed" and not self.embargo_date: + raise ValueError("embargo_date required when access_right='embargoed'") + if self.access_right == "restricted" and not self.access_conditions: + raise ValueError("access_conditions required when access_right='restricted'") + + # required minimal fields + if not self.creators: + raise ValueError("At least one creator required") + if not self.title or not self.description: + raise ValueError("Both title and description are required") + +@attrs.define +class ZenodoDepositionPayload: + metadata: ZenodoMetadata + + def to_dict(self) -> dict[str, Any]: + d = attrs.asdict(self, recurse=True) + # remove empties + return {k: v for k, v in d.items() if v not in (None, "", [], {})} diff --git a/rdmo_zenodo/exports/publish.py b/rdmo_zenodo/exports/publish.py new file mode 100644 index 0000000..ef7c579 --- /dev/null +++ b/rdmo_zenodo/exports/publish.py @@ -0,0 +1,258 @@ +import logging + +from django.http import HttpResponseBadRequest +from django.shortcuts import redirect, render +from django.utils.formats import localize +from django.utils.text import slugify +from django.utils.translation import gettext_lazy as _ + +import requests + +from rdmo.projects.models import Project + +from .base import BaseZenodoExportProvider +from .forms import ZenodoSnapshotForm +from .metadata.exceptions import MetadataBuildError +from .utils import ( + clear_record_id_from_project_value, + get_concept_or_parent_id_from_payload, + get_or_create_snapshot, + get_record_id_from_project_value, + render_and_export_project_from_view, + save_record_id_in_project_value, +) + +logger = logging.getLogger(__name__) + + +class ZenodoPublishProvider(BaseZenodoExportProvider): + + view = None + export_format = None + + def get_snapshot_choices(self): + return [ + (i.id, f"{i.title} ({localize(i.created)})") + for i in self.project.snapshots.order_by('-created') + ] + + def get_view_choices(self): + return [ + (i.id, f"{i.title}") + for i in self.project.views.all() + ] + + def get_from_session_and_set_on_self(self, request): + self.project = self.get_project_from_session(request) + self.snapshot = self.get_snapshot_from_session(request, self.project) + self.view = self.get_view_from_session(request, self.project) + self.export_format = self.get_from_session(request, 'export_format') + + def get_project_from_session(self, request): + project_id = self.get_from_session(request, 'project_id') + return Project.objects.filter_user(request.user).get(id=project_id) + + def get_snapshot_from_session(self, request, project): + snapshot_id = self.get_from_session(request, 'snapshot_id') + return project.snapshots.get(id=snapshot_id) + + def get_view_from_session(self, request, project): + view_id = self.get_from_session(request, 'view_id') + return project.views.get(id=view_id) + + def render(self): + snapshot_choices = self.get_snapshot_choices() + view_choices = self.get_view_choices() + + self.store_in_session(self.request, 'snapshot_choices', snapshot_choices) + self.store_in_session(self.request, "view_choices", view_choices) + + form = ZenodoSnapshotForm( + snapshot_choices=snapshot_choices, + view_choices=view_choices, + ) + context = {'form': form } + + record_id = get_record_id_from_project_value(self.project) + if record_id: + context['record_id'] = self.record_uploads_url(record_id) + + return render(self.request, 'plugins/publish_zenodo.html', context=context, status=200) + + def submit(self): + snapshot_choices = self.get_from_session(self.request, 'snapshot_choices') + view_choices = self.get_from_session(self.request, "view_choices") + form = ZenodoSnapshotForm(self.request.POST, snapshot_choices=snapshot_choices, view_choices=view_choices) + + if 'cancel' in self.request.POST: + return redirect('project', self.project.id) + + if form.is_valid(): + snapshot_id = form.cleaned_data['snapshot'] or None + self.snapshot = get_or_create_snapshot(self.project, snapshot_id=snapshot_id) + view_id = form.cleaned_data['view'] or None + self.view = self.project.views.get(pk=view_id) + self.export_format = form.cleaned_data['export_format'] or None + + # store project and snapshot in session else they get lost after post + self.store_in_session(self.request, 'project_id', self.project.id) + self.store_in_session(self.request, 'snapshot_id', self.snapshot.id) + self.store_in_session(self.request, 'view_id', self.view.id) + self.store_in_session(self.request, 'export_format', self.export_format) + + if record_versions_url := self.validate_record_id_from_project_value_at_zenodo(): + # if record exists then post new version to zenodo, no data required + # a 403 post_with_retry handled in retry. + return self.post_with_retry(self.request, record_versions_url, {}) + else: + # else create new draft record + try: + payload = self.get_metadata() + except MetadataBuildError as e: + form.add_error(None, str(e)) + return render( + self.request, 'plugins/exports_zenodo.html', {'form': form}, status=400 + ) + return self.post_with_retry(self.request, self.records_url, payload) + else: + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def validate_record_id_from_project_value_at_zenodo(self): + # Retrieve record_id from the project's stored values + record_id = get_record_id_from_project_value(self.project) + + if not record_id: + logger.warning("validate record_id: no record ID found in project values.") + return None + + # Send a GET request to Zenodo to validate the record ID + response = requests.get(self.record_url(record_id), headers=self.authorization_header) + + # Check if the response was successful + if response.status_code == 200: + logger.info(f"Record ID {record_id} is valid.") + + concept_record_id = get_concept_or_parent_id_from_payload(response.json()) + save_record_id_in_project_value(self.project, concept_record_id) + versions_url = response.json().get('links', {}).get('versions') + return versions_url + elif response.status_code == 404: + logger.warning(f"Record ID {record_id} is invalid or not found in {response.request.url}.") + # the record_id does not exist, delete it from the project.value.text + clear_record_id_from_project_value(self.project) + else: + # Log any other unexpected response code + logger.error(f"Error validating record ID {record_id}: {response.status_code}") + return None + + def post_success(self, request, response): + # Retrieve project,snapshot,view and export_format from session + self.get_from_session_and_set_on_self(request) + self.request = request # and set request on self + if not response.json()['is_draft']: # and ... response.json()['status'] == ... + # metadata needs to be posted to the new version with a new request and response + zenodo_api_url = response.json().get('links', {}).get('self') + try: + data = self.get_metadata() + except MetadataBuildError as e: + return render(request, 'core/error.html', { + 'title': _('Metadata error'), + 'errors': [_('Error in the metadata'), str(e)] + }, status=200) + + response = requests.put(zenodo_api_url, json=data, headers=self.authorized_json_header) + logger.debug("PUT to %s", zenodo_api_url) + + payload = response.json() + zenodo_url = payload.get("links", {}).get("self_html") + + if zenodo_url: + record_id = payload.get('id') + concept_record_id = get_concept_or_parent_id_from_payload(payload) + files_url = payload.get('links', {}).get('files') + export_response = self.post_export_file_to_zenodo( + record_id=record_id, files_url=files_url, + ) + if 500 > export_response.status_code >= 400: + if isinstance(export_response, HttpResponseBadRequest): + if export_response.content.decode().startswith('Render to format failed.'): + message = 'Render to format failed. Try another view or format.' + else: + message = export_response.content.decode() + + return render(request, 'core/error.html', { + 'title': _('Export error'), + 'errors': [_('The project could not be exported.'), message], + }, status=200) + + if export_response.url.startswith(self.zenodo_url): + return render(request, 'core/error.html', { + 'title': _('Export error'), + 'errors': [_('The project could not be uploaded.'), response.json().get('message')], + }, status=200) + + + publish_response = self.publish_draft_record(record_id=record_id) + if 500 > publish_response.status_code >= 400: + return render(request, 'core/error.html', { + 'title': _('Publish error'), + 'errors': [_('The project could not be published.'), + publish_response.json()['message'], + publish_response.json()['errors'], + ], + }, status=200) + + save_record_id_in_project_value(self.project, concept_record_id) + return redirect(zenodo_url) + else: + return render(request, 'core/error.html', { + 'title': _('ZENODO error'), + 'errors': [_('The URL of the new publication could not be retrieved.')] + }, status=200) + + def post_export_file_to_zenodo( + self, record_id=None, files_url=None + ): + # https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records/#draft-files + if record_id is None or files_url is None or self.export_format is None: + logger.debug("post export file failed, missing args") + return None + + rdmo_render_response = render_and_export_project_from_view( + self.project, self.snapshot, self.export_format, view=self.view + ) + if rdmo_render_response.status_code != 200: + logger.error("Render failed: %s", rdmo_render_response.content.decode()) + return rdmo_render_response + + binary = rdmo_render_response.content + export_filename = slugify(self.snapshot.title) + filename = f"{export_filename}.{self.export_format}" + + # get access token from the session + draft_file_post_response = requests.post(files_url, headers=self.authorization_header, json=[{'key': filename}]) + entries = draft_file_post_response.json().get('entries', []) + draft_file_entry = next(filter(lambda i: i["key"] == filename, entries), None) + if draft_file_entry is None: + breakpoint() + return draft_file_post_response + + content_url = draft_file_entry.get('links', {}).get('content') + _data_content_response = requests.put(content_url, headers=self.authorized_binary_header, data=binary) + logger.debug("PUT to %s", content_url) + + commit_url = draft_file_entry.get('links', {}).get('commit') + data_commit_response = requests.post(commit_url, headers=self.authorization_header) + logger.debug("POST to %s", commit_url) + + return data_commit_response + + def publish_draft_record(self, record_id=None): + # https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records/#publish-a-draft-record + if record_id is None: + logger.debug("POST to publish failed, missing record_id") + return None + publish_url = self.record_publish_url(record_id) + response = requests.post(publish_url, headers=self.authorization_header) + logger.debug("POST to %s with response ", publish_url, response) + return response diff --git a/rdmo_zenodo/exports/utils.py b/rdmo_zenodo/exports/utils.py new file mode 100644 index 0000000..4430b07 --- /dev/null +++ b/rdmo_zenodo/exports/utils.py @@ -0,0 +1,130 @@ +from urllib.parse import urlparse + +from django.conf import settings +from django.http import HttpResponseBadRequest +from django.template import TemplateDoesNotExist, TemplateSyntaxError + +from rdmo.core.utils import render_to_format +from rdmo.domain.models import Attribute +from rdmo.projects.models.snapshot import Snapshot +from rdmo.projects.models.value import Value +from rdmo.projects.utils import get_value_path + +DEFAULT_RECORD_ATTRIBUTE_URI = "https://rdmorganiser.github.io/terms/project/metadata/publication/zenodo/record_id" + + +def get_or_create_snapshot(project, snapshot_id=None): + if snapshot_id is None: + new_snapshot_count = project.snapshots.count() + 1 + description = project.description + if description: + description += "\n" + description += f"This snapshot({new_snapshot_count}.) was automatically generated by the zenodo-publish export provider." # noqa: E501 + snapshot = Snapshot( + project=project, + title=f"{project.title} #{new_snapshot_count}", # "Cool project #3" + description=description + ) + snapshot.save() + return snapshot + + return project.snapshots.get(id=snapshot_id) + + +def get_project_value_with_record_id(project): + + record_uri = settings.ZENODO_PROVIDER.get("zenodo_record_id_uri") or DEFAULT_RECORD_ATTRIBUTE_URI + uri_prefix, key = split_attribute_uri(record_uri) + record_id_attribute, _created = Attribute.objects.get_or_create( + uri_prefix=uri_prefix, + key=key, + ) + if _created: + record_id_attribute.comment = "This attribute was automatically generated by the rdmo_zenodo plugin." + record_id_attribute.save() + + project_doi_value = project.values.filter(attribute=record_id_attribute).first() + return project_doi_value, record_id_attribute + + +def get_record_id_from_project_value(project): + project_doi_value, _ = get_project_value_with_record_id(project) + + if project_doi_value is not None: + return project_doi_value.text + else: + return None + + +def save_record_id_in_project_value(project, record_id): + if project is None or record_id is None: + return + + project_doi_value, record_id_attribute = get_project_value_with_record_id(project) + + if project_doi_value is None: + # create the value with the record_id and add it to the project + value = Value(project=project, attribute=record_id_attribute, text=record_id) + value.save() + project.values.add(value) + elif project_doi_value.text != record_id: + # update and overwrite the value.text + project_doi_value.text = record_id + project_doi_value.save() + + +def clear_record_id_from_project_value(project): + """Clear the record_id text from the project's values by setting it to an empty string.""" + save_record_id_in_project_value(project, '') + + +def render_and_export_project_from_view(project, snapshot, export_format, view): + + try: + rendered_view = view.render(project, snapshot) + except (TemplateDoesNotExist,TemplateSyntaxError) as e: + return HttpResponseBadRequest(f"Render from view failed. {e}") + + try: + response = render_to_format( + None, export_format, project.title, 'projects/project_view_export.html', { + 'format': export_format, + 'title': project.title, + 'view': view, + 'rendered_view': rendered_view, + 'resource_path': get_value_path(project, snapshot) + } + ) + except RuntimeError as e: + return HttpResponseBadRequest(f"Render to format failed. {e}") + except (TemplateDoesNotExist,TemplateSyntaxError) as e: + return HttpResponseBadRequest(f"Render to format failed, template error. {e}") + else: + return response + +def get_concept_or_parent_id_from_payload(data): + # the conceptrecid is the concept record identifier for all versions of this zenodo record + # https://inveniordm.docs.cern.ch/reference/metadata/#system-managed-persistent-identifiers + if 'conceptrecid' in data: + return data['conceptrecid'] + elif 'parent' in data: + # in invenioRDM it is the parent.id field + return data['parent']['id'] + raise KeyError + +def split_attribute_uri(uri: str) -> tuple[str, str]: + + parsed = urlparse(uri) + parts = parsed.path.strip("/").split("/") + + if not parts: + raise ValueError("URI has no path segments") + + if parts[0] == "terms": + uri_prefix = f"{parsed.scheme}://{parsed.netloc}/terms" + key = "/".join(parts[1:]) + else: + uri_prefix = f"{parsed.scheme}://{parsed.netloc}" + key = "/".join(parts) + + return uri_prefix, key diff --git a/rdmo_zenodo/templates/plugins/publish_zenodo.html b/rdmo_zenodo/templates/plugins/publish_zenodo.html new file mode 100644 index 0000000..e8b6f70 --- /dev/null +++ b/rdmo_zenodo/templates/plugins/publish_zenodo.html @@ -0,0 +1,18 @@ +{% extends 'core/page.html' %} +{% load i18n %} +{% load core_tags %} + +{% block page %} + +

{% trans 'Export and Publish to Zenodo' %}

+ {% if record_id %} +

+ {% trans 'This project has a Zenodo record id' %}: {{ record_id }} +

+ {% else %} +

{% trans 'This project does not have a Zenodo record id.' %}

+ {% endif %} + + {% bootstrap_form submit=_('Publish to Zenodo') %} + +{% endblock %}