From d40251ef234d6cf3b9468bae1bf73761375a5875 Mon Sep 17 00:00:00 2001 From: ctpfaff Date: Fri, 18 Oct 2024 07:47:27 +0200 Subject: [PATCH 01/41] Publish project snapshots to zenodo * Collects metadata for the project snapshot * Creates snapshot questions based document * Creates zenodo deposit, adds metadata, adds the file and publishes the deposit. --- rdmo_zenodo/exports.py | 346 ++++++++++++++++++++++++----------------- 1 file changed, 207 insertions(+), 139 deletions(-) diff --git a/rdmo_zenodo/exports.py b/rdmo_zenodo/exports.py index 9f4a3ff..86cd77f 100644 --- a/rdmo_zenodo/exports.py +++ b/rdmo_zenodo/exports.py @@ -1,201 +1,269 @@ -import logging +import json +import tempfile +from typing import Dict, Any, Tuple, Optional, List, Union +import requests from django import forms from django.conf import settings from django.core.exceptions import ObjectDoesNotExist -from django.shortcuts import redirect, render, reverse +from django.http import HttpRequest, HttpResponse +from django.shortcuts import redirect, render +from django.urls import reverse from django.utils.translation import gettext_lazy as _ +from django.utils import timezone from rdmo.projects.exports import Export from rdmo.services.providers import OauthProviderMixin +from rdmo.projects.models.snapshot import Snapshot -logger = logging.getLogger(__name__) +class ZenodoExportProvider(OauthProviderMixin, Export): - -class BaseZenodoExportProvider(OauthProviderMixin, Export): + RIGHTS_URI_OPTIONS = { + 'dataset_license_types/71': 'cc-by-4.0', + 'dataset_license_types/73': 'cc-by-nc-4.0', + 'dataset_license_types/74': 'cc-by-nd-4.0', + 'dataset_license_types/75': 'cc-by-sa-4.0', + 'dataset_license_types/cc0': 'cc-zero' + } @property - def client_id(self): + def client_id(self) -> str: return settings.ZENODO_PROVIDER['client_id'] @property - def client_secret(self): + def client_secret(self) -> str: return settings.ZENODO_PROVIDER['client_secret'] @property - def zenodo_url(self): - return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://sandbox.zenodo.org').strip('/') + def zenodo_url(self) -> str: + return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://sandbox.zenodo.org').rstrip('/') @property - def authorize_url(self): + def authorize_url(self) -> str: return f'{self.zenodo_url}/oauth/authorize' @property - def token_url(self): + def token_url(self) -> str: return f'{self.zenodo_url}/oauth/token' @property - def deposit_url(self): - return f'{self.zenodo_url}/api/records' + def deposit_url(self) -> str: + return f'{self.zenodo_url}/api/deposit/depositions' @property - def redirect_path(self): + def redirect_path(self) -> str: return reverse('oauth_callback', args=['zenodo']) - def get_authorize_params(self, request, state): - return { - 'response_type': 'code', - 'client_id': self.client_id, - 'scope': 'deposit:write', - 'redirect_uri': request.build_absolute_uri(self.redirect_path), - 'state': state - } - - def get_callback_data(self, request): - return { - 'client_id': self.client_id, - 'client_secret': self.client_secret, - 'grant_type': 'authorization_code', - 'redirect_uri': request.build_absolute_uri(self.redirect_path), - 'code': request.GET.get('code') - } - - def get_error_message(self, response): - return response.json().get('errors') - - -class ZenodoExportProvider(BaseZenodoExportProvider): - - rights_uri_options = { - 'dataset_license_types/71': 'cc-by-4.0', - 'dataset_license_types/73': 'cc-by-nc-4.0', - 'dataset_license_types/74': 'cc-by-nd-4.0', - 'dataset_license_types/75': 'cc-by-sa-4.0', - 'dataset_license_types/cc0': 'cc-zero' - } - class Form(forms.Form): - - dataset = forms.CharField(label=_('Select dataset of your project')) + snapshot = forms.ChoiceField(label=_('Select a snapshot of your project')) def __init__(self, *args, **kwargs): - dataset_choices = kwargs.pop('dataset_choices') + snapshot_choices = kwargs.pop('snapshot_choices') super().__init__(*args, **kwargs) + self.fields['snapshot'].choices = snapshot_choices - self.fields['dataset'].widget = forms.RadioSelect(choices=dataset_choices) + def render(self) -> HttpResponse: + snapshots = Snapshot.objects.filter(project=self.project) + snapshot_choices = [(str(snapshot.id), f"Snapshot {snapshot.title} - {snapshot.created}") for snapshot in snapshots] - def render(self): - datasets = self.get_set('project/dataset/id') - dataset_choices = [(dataset.set_index, dataset.value)for dataset in datasets] + self.store_in_session(self.request, 'snapshot_choices', snapshot_choices) - self.store_in_session(self.request, 'dataset_choices', dataset_choices) - - form = self.Form( - dataset_choices=dataset_choices - ) + form = self.Form(snapshot_choices=snapshot_choices) return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - def submit(self): - dataset_choices = self.get_from_session(self.request, 'dataset_choices') - form = self.Form(self.request.POST, dataset_choices=dataset_choices) + def submit(self) -> HttpResponse: + snapshot_choices = self.get_from_session(self.request, 'snapshot_choices') + form = self.Form(self.request.POST, snapshot_choices=snapshot_choices) if 'cancel' in self.request.POST: return redirect('project', self.project.id) if form.is_valid(): - url = self.get_post_url() - data = self.get_post_data(form.cleaned_data['dataset']) + url = self.deposit_url + data = self.get_post_data(form.cleaned_data['snapshot']) return self.post(self.request, url, data) else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - def post_success(self, request, response): - zenodo_url = response.json().get('links', {}).get('self_html') - if zenodo_url: - return redirect(zenodo_url) - else: - return render(request, 'core/error.html', { - 'title': _('ZENODO error'), - 'errors': [_('The URL of the new dataset could not be retrieved.')] - }, status=200) + def get_post_data(self, snapshot_id: str) -> Dict[str, Any]: + snapshot = Snapshot.objects.get(id=snapshot_id) + metadata = self._prepare_metadata(snapshot) + docx_file = self._generate_docx(snapshot) - def get_post_url(self): - return self.deposit_url + return { + 'metadata': metadata, + 'file': docx_file + } - def get_post_data(self, set_index): - # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata - metadata = {} + def _prepare_metadata(self, snapshot: Snapshot) -> Dict[str, Any]: + metadata = { + 'title': f"{self.project.title} - Snapshot: {snapshot.title}", + 'upload_type': settings.ZENODO_PROVIDER.get('upload_type', 'dataset'), + 'description': f"{self.project.description or 'No description provided.'}\n\nSnapshot of project '{self.project.title}' taken on {snapshot.created}", + 'publication_date': timezone.now().date().isoformat(), + } - # set the resource_type from the settings resource_type = settings.ZENODO_PROVIDER.get('resource_type') if resource_type: - metadata['resource_type'] = { - 'id': resource_type - } + metadata['resource_type'] = resource_type + + if settings.ZENODO_PROVIDER.get('add_project_members'): + metadata['creators'] = self._get_creators() + + rights = snapshot.values.filter(attribute__uri='project/dataset/sharing/conditions').first() + if rights and rights.option: + metadata['license'] = self.RIGHTS_URI_OPTIONS.get(rights.option.uri_path) - # add the creators from the project members - add_project_members = settings.ZENODO_PROVIDER.get('add_project_members') - if add_project_members: - metadata['creators'] = [] - for user in self.project.user.all(): - creator = { - 'family_name': user.last_name, - 'given_name': user.first_name, - 'type': 'personal' - } - - try: - orcid_socialaccount = user.socialaccount_set.get(provider='orcid') - creator['identifiers'] = [ - { - 'scheme': 'orcid', - 'identifier': orcid_socialaccount.uid - } - ] - except (ObjectDoesNotExist, AttributeError): - pass - - metadata['creators'].append({ - 'person_or_org': creator - }) - - # set the title from the title or id or the running index - metadata['title'] = \ - self.get_text('project/dataset/title', set_index=set_index) or \ - self.get_text('project/dataset/id', set_index=set_index) or \ - f'Dataset #{set_index + 1}' - - # set the description - description = self.get_text('project/dataset/description', set_index=set_index) - if description: - metadata['description'] = description - - # set the rights/licenses - for rights in self.get_values('project/dataset/sharing/conditions', set_index=set_index): - if rights.option: - metadata['rights'] = [{ - 'id': self.rights_uri_options.get(rights.option.uri_path) - }] - break - - # set the language from the settings language = settings.ZENODO_PROVIDER.get('language') if language: - metadata['languages'] = [ - {'id': language} - ] + metadata['language'] = language + + keywords = self.get_values('project/research_question/keywords') + if keywords: + metadata['keywords'] = [keyword.text for keyword in keywords] + + notes = settings.ZENODO_PROVIDER.get('notes') + if notes: + metadata['notes'] = notes + + return metadata + + def _get_creators(self) -> List[Dict[str, str]]: + creators = [] + for user in self.project.user.all(): + creator = { + 'name': f"{user.first_name} {user.last_name}" + } + try: + orcid_socialaccount = user.socialaccount_set.get(provider='orcid') + creator['orcid'] = orcid_socialaccount.uid + except (ObjectDoesNotExist, AttributeError): + pass + creators.append(creator) + return creators + + def _generate_docx(self, snapshot: Snapshot) -> Tuple[str, bytes]: + docx_url = self.request.build_absolute_uri( + f'/projects/{self.project.id}/snapshots/{snapshot.id}/answers/export/docx/' + ) + + session = requests.Session() + session.cookies.update(self.request.COOKIES) + + try: + response = session.get(docx_url) + response.raise_for_status() + + with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_file: + temp_file.write(response.content) + temp_file_path = temp_file.name + + with open(temp_file_path, 'rb') as f: + file_content = f.read() + + return ('snapshot_answers.docx', file_content) + except requests.exceptions.RequestException as e: + raise Exception(f"Failed to generate DOCX. Error: {str(e)}") + + def post(self, request: HttpRequest, url: str, data: Dict[str, Any]) -> HttpResponse: + access_token = self.get_from_session(request, 'access_token') + if not access_token: + self.store_in_session(request, 'request', ('post', url, data)) + return self.authorize(request) + + json_headers = self.get_authorization_headers(access_token) + json_headers['Content-Type'] = 'application/json' + + deposition = self._create_deposition(url, data['metadata'], json_headers) + if isinstance(deposition, HttpResponse): + return deposition + + if 'file' in data: + file_upload_result = self._upload_file(deposition, data['file'], access_token) + if isinstance(file_upload_result, HttpResponse): + return file_upload_result + + publish_result = self._publish_deposition(deposition['id'], json_headers) + return publish_result if isinstance(publish_result, HttpResponse) else self.success_response(request, publish_result) + + def _create_deposition(self, url: str, metadata: Dict[str, Any], headers: Dict[str, str]) -> Union[Dict[str, Any], HttpResponse]: + response = requests.post(url, json={'metadata': metadata}, headers=headers) + + if response.status_code == 401: + self.pop_from_session(self.request, 'access_token') + self.store_in_session(self.request, 'request', ('post', url, {'metadata': metadata})) + return self.authorize(self.request) + + if response.status_code != 201: + return self.error_response(self.request, response) + + return response.json() + + def _upload_file(self, deposition: Dict[str, Any], file_data: Tuple[str, bytes], access_token: str) -> Optional[HttpResponse]: + file_name, file_content = file_data + bucket_url = deposition['links']['bucket'] + file_url = f"{bucket_url}/{file_name}" + + binary_headers = self.get_authorization_headers(access_token) + binary_headers['Content-Type'] = 'application/octet-stream' + + upload_response = requests.put(file_url, headers=binary_headers, data=file_content) + + if upload_response.status_code not in [200, 201]: + return self.error_response(self.request, upload_response) - # set the publisher from the settings - publisher = settings.ZENODO_PROVIDER.get('publisher') - if publisher: - metadata['publisher'] = publisher + def _publish_deposition(self, deposition_id: str, headers: Dict[str, str]) -> Union[requests.Response, HttpResponse]: + publish_url = f"{self.deposit_url}/{deposition_id}/actions/publish" + publish_response = requests.post(publish_url, headers=headers) - # set the funding from the settings - funding = settings.ZENODO_PROVIDER.get('funding') - if funding: - metadata['funding'] = funding + if publish_response.status_code != 202: + return self.error_response(self.request, publish_response) + return publish_response + + def get_authorize_params(self, request: HttpRequest, state: str) -> Dict[str, str]: return { - 'metadata': metadata + 'response_type': 'code', + 'client_id': self.client_id, + 'scope': 'deposit:write', + 'redirect_uri': request.build_absolute_uri(self.redirect_path), + 'state': state + } + + def get_callback_data(self, request: HttpRequest) -> Dict[str, str]: + return { + 'client_id': self.client_id, + 'client_secret': self.client_secret, + 'grant_type': 'authorization_code', + 'redirect_uri': request.build_absolute_uri(self.redirect_path), + 'code': request.GET.get('code') } + + def error_response(self, request: HttpRequest, response: requests.Response) -> HttpResponse: + error_message = self._get_error_message(response) + return render(request, 'core/error.html', { + 'title': _('Zenodo error'), + 'errors': [error_message] + }, status=response.status_code) + + def success_response(self, request: HttpRequest, response: requests.Response) -> HttpResponse: + zenodo_url = response.json().get('links', {}).get('html') + if zenodo_url: + return redirect(zenodo_url) + else: + return render(request, 'core/error.html', { + 'title': _('Zenodo error'), + 'errors': [_('The URL of the new deposition could not be retrieved.')] + }, status=200) + + def _get_error_message(self, response: requests.Response) -> str: + try: + error_json = response.json() + error_message = error_json.get('message', str(response.content)) + if 'errors' in error_json: + error_details = '; '.join([f"{error.get('field', '')}: {error.get('message', '')}" for error in error_json['errors']]) + error_message += f" Details: {error_details}" + return error_message + except ValueError: + return str(response.content) From a8a37b475b77588e46e2a6f7d8b3f16f53737b4f Mon Sep 17 00:00:00 2001 From: ctpfaff Date: Fri, 18 Oct 2024 08:30:28 +0200 Subject: [PATCH 02/41] Enhance Zenodo Export Provider Refactored the Zenodo Export Provider to improve code quality, reduce duplication, and enhance error handling. --- rdmo_zenodo/exports.py | 240 ++++++++++++++++++++++++++++++++--------- 1 file changed, 189 insertions(+), 51 deletions(-) diff --git a/rdmo_zenodo/exports.py b/rdmo_zenodo/exports.py index 86cd77f..86f4623 100644 --- a/rdmo_zenodo/exports.py +++ b/rdmo_zenodo/exports.py @@ -1,8 +1,7 @@ import json import tempfile from typing import Dict, Any, Tuple, Optional, List, Union - -import requests +from contextlib import contextmanager from django import forms from django.conf import settings from django.core.exceptions import ObjectDoesNotExist @@ -11,12 +10,18 @@ from django.urls import reverse from django.utils.translation import gettext_lazy as _ from django.utils import timezone - +import requests from rdmo.projects.exports import Export from rdmo.services.providers import OauthProviderMixin from rdmo.projects.models.snapshot import Snapshot class ZenodoExportProvider(OauthProviderMixin, Export): + """ + A provider for exporting project snapshots to Zenodo. + + Attributes: + RIGHTS_URI_OPTIONS (Dict[str, str]): Mapping of rights URI options to their corresponding license names. + """ RIGHTS_URI_OPTIONS = { 'dataset_license_types/71': 'cc-by-4.0', @@ -28,75 +33,118 @@ class ZenodoExportProvider(OauthProviderMixin, Export): @property def client_id(self) -> str: + """Return the Zenodo client ID from settings.""" return settings.ZENODO_PROVIDER['client_id'] @property def client_secret(self) -> str: + """Return the Zenodo client secret from settings.""" return settings.ZENODO_PROVIDER['client_secret'] @property def zenodo_url(self) -> str: + """Return the base Zenodo URL from settings, defaulting to the sandbox URL.""" return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://sandbox.zenodo.org').rstrip('/') @property def authorize_url(self) -> str: + """Construct the Zenodo authorization URL.""" return f'{self.zenodo_url}/oauth/authorize' @property def token_url(self) -> str: + """Construct the Zenodo token URL.""" return f'{self.zenodo_url}/oauth/token' @property def deposit_url(self) -> str: + """Construct the Zenodo deposit URL.""" return f'{self.zenodo_url}/api/deposit/depositions' @property def redirect_path(self) -> str: + """Return the redirect path for the OAuth callback.""" return reverse('oauth_callback', args=['zenodo']) class Form(forms.Form): + """ + Form for selecting a project snapshot to export to Zenodo. + + Attributes: + snapshot (forms.ChoiceField): Field for selecting a snapshot. + """ snapshot = forms.ChoiceField(label=_('Select a snapshot of your project')) def __init__(self, *args, **kwargs): + """ + Initialize the form with snapshot choices. + + :param args: Standard form arguments. + :param kwargs: Standard form keyword arguments, including'snapshot_choices'. + """ snapshot_choices = kwargs.pop('snapshot_choices') super().__init__(*args, **kwargs) self.fields['snapshot'].choices = snapshot_choices def render(self) -> HttpResponse: + """ + Render the form for selecting a snapshot to export to Zenodo. + + :return: An HTTP response with the rendered form. + """ + request = self.request # Local variable for readability snapshots = Snapshot.objects.filter(project=self.project) snapshot_choices = [(str(snapshot.id), f"Snapshot {snapshot.title} - {snapshot.created}") for snapshot in snapshots] - self.store_in_session(self.request, 'snapshot_choices', snapshot_choices) + self.store_in_session(request,'snapshot_choices', snapshot_choices) form = self.Form(snapshot_choices=snapshot_choices) - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + return render(request, 'plugins/exports_zenodo.html', {'form': form}, status=200) def submit(self) -> HttpResponse: - snapshot_choices = self.get_from_session(self.request, 'snapshot_choices') - form = self.Form(self.request.POST, snapshot_choices=snapshot_choices) + """ + Handle the form submission for exporting a snapshot to Zenodo. + + :return: An HTTP response redirecting to the project or initiating the export. + """ + request = self.request + snapshot_choices = self.get_from_session(request,'snapshot_choices') + form = self.Form(request.POST, snapshot_choices=snapshot_choices) - if 'cancel' in self.request.POST: + if 'cancel' in request.POST: return redirect('project', self.project.id) if form.is_valid(): url = self.deposit_url data = self.get_post_data(form.cleaned_data['snapshot']) - return self.post(self.request, url, data) + return self.post(request, url, data) else: - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + return render(request, 'plugins/exports_zenodo.html', {'form': form}, status=200) def get_post_data(self, snapshot_id: str) -> Dict[str, Any]: + """ + Prepare the data for the POST request to Zenodo. + + :param snapshot_id: The ID of the selected snapshot. + :return: A dictionary containing the metadata and file. + """ snapshot = Snapshot.objects.get(id=snapshot_id) metadata = self._prepare_metadata(snapshot) docx_file = self._generate_docx(snapshot) return { - 'metadata': metadata, + 'metadata': metadata, 'file': docx_file } def _prepare_metadata(self, snapshot: Snapshot) -> Dict[str, Any]: + """ + Prepare the metadata for the Zenodo deposition. + + :param snapshot: The snapshot to generate metadata for. + :return: A dictionary containing the metadata. + """ metadata = { 'title': f"{self.project.title} - Snapshot: {snapshot.title}", 'upload_type': settings.ZENODO_PROVIDER.get('upload_type', 'dataset'), @@ -104,6 +152,7 @@ def _prepare_metadata(self, snapshot: Snapshot) -> Dict[str, Any]: 'publication_date': timezone.now().date().isoformat(), } + # Add additional metadata fields as needed resource_type = settings.ZENODO_PROVIDER.get('resource_type') if resource_type: metadata['resource_type'] = resource_type @@ -130,6 +179,11 @@ def _prepare_metadata(self, snapshot: Snapshot) -> Dict[str, Any]: return metadata def _get_creators(self) -> List[Dict[str, str]]: + """ + Retrieve the creators (project members) for the metadata. + + :return: A list of dictionaries containing creator information. + """ creators = [] for user in self.project.user.all(): creator = { @@ -143,33 +197,56 @@ def _get_creators(self) -> List[Dict[str, str]]: creators.append(creator) return creators + @contextmanager + def _temp_file(self, suffix='.docx'): + """ + Context manager for creating a temporary file. + + :param suffix: The file suffix (default: '.docx'). + :yield: The temporary file path. + """ + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: + yield temp_file.name + def _generate_docx(self, snapshot: Snapshot) -> Tuple[str, bytes]: - docx_url = self.request.build_absolute_uri( + """ + Generate a DOCX file for the given snapshot. + + :param snapshot: The snapshot to generate the DOCX for. + :return: A tuple containing the file name and content. + """ + request = self.request + docx_url = request.build_absolute_uri( f'/projects/{self.project.id}/snapshots/{snapshot.id}/answers/export/docx/' ) - session = requests.Session() - session.cookies.update(self.request.COOKIES) - - try: - response = session.get(docx_url) - response.raise_for_status() - - with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_file: - temp_file.write(response.content) - temp_file_path = temp_file.name - - with open(temp_file_path, 'rb') as f: - file_content = f.read() - - return ('snapshot_answers.docx', file_content) - except requests.exceptions.RequestException as e: - raise Exception(f"Failed to generate DOCX. Error: {str(e)}") + with requests.Session() as session: + session.cookies.update(request.COOKIES) + try: + response = session.get(docx_url) + response.raise_for_status() + with self._temp_file() as temp_file_path: + with open(temp_file_path, 'wb') as temp_file: + temp_file.write(response.content) + with open(temp_file_path, 'rb') as file: + file_content = file.read() + file_name ='snapshot_answers.docx' + return file_name, file_content + except requests.exceptions.RequestException as e: + raise Exception(f"Failed to generate DOCX. Error: {str(e)}") def post(self, request: HttpRequest, url: str, data: Dict[str, Any]) -> HttpResponse: + """ + Handle the POST request to Zenodo. + + :param request: The HTTP request. + :param url: The URL for the POST request. + :param data: The data to be sent. + :return: An HTTP response. + """ access_token = self.get_from_session(request, 'access_token') if not access_token: - self.store_in_session(request, 'request', ('post', url, data)) + self.store_in_session(request,'request', ('post', url, data)) return self.authorize(request) json_headers = self.get_authorization_headers(access_token) @@ -187,20 +264,44 @@ def post(self, request: HttpRequest, url: str, data: Dict[str, Any]) -> HttpResp publish_result = self._publish_deposition(deposition['id'], json_headers) return publish_result if isinstance(publish_result, HttpResponse) else self.success_response(request, publish_result) - def _create_deposition(self, url: str, metadata: Dict[str, Any], headers: Dict[str, str]) -> Union[Dict[str, Any], HttpResponse]: - response = requests.post(url, json={'metadata': metadata}, headers=headers) + def _handle_zenodo_response(self, response: requests.Response) -> Optional[HttpResponse]: + """ + Handle a response from Zenodo, returning an HTTP response if an error occurs. + :param response: The Zenodo response. + :return: An HTTP response if an error occurs, otherwise None. + """ if response.status_code == 401: self.pop_from_session(self.request, 'access_token') - self.store_in_session(self.request, 'request', ('post', url, {'metadata': metadata})) + self.store_in_session(self.request,'request', ('post', response.url, {})) return self.authorize(self.request) - - if response.status_code != 201: + elif response.status_code not in [200, 201, 202]: return self.error_response(self.request, response) + def _create_deposition(self, url: str, metadata: Dict[str, Any], headers: Dict[str, str]) -> Union[Dict[str, Any], HttpResponse]: + """ + Create a new deposition on Zenodo. + + :param url: The URL for the deposition creation. + :param metadata: The metadata for the deposition. + :param headers: The headers for the request. + :return: The deposition data or an HTTP response if an error occurs. + """ + response = requests.post(url, json={'metadata': metadata}, headers=headers) + result = self._handle_zenodo_response(response) + if result: + return result return response.json() def _upload_file(self, deposition: Dict[str, Any], file_data: Tuple[str, bytes], access_token: str) -> Optional[HttpResponse]: + """ + Upload a file to the deposition on Zenodo. + + :param deposition: The deposition data. + :param file_data: The file name and content. + :param access_token: The access token for authentication. + :return: An HTTP response if an error occurs, otherwise None. + """ file_name, file_content = file_data bucket_url = deposition['links']['bucket'] file_url = f"{bucket_url}/{file_name}" @@ -208,39 +309,63 @@ def _upload_file(self, deposition: Dict[str, Any], file_data: Tuple[str, bytes], binary_headers = self.get_authorization_headers(access_token) binary_headers['Content-Type'] = 'application/octet-stream' - upload_response = requests.put(file_url, headers=binary_headers, data=file_content) - - if upload_response.status_code not in [200, 201]: - return self.error_response(self.request, upload_response) + response = requests.put(file_url, headers=binary_headers, data=file_content) + return self._handle_zenodo_response(response) def _publish_deposition(self, deposition_id: str, headers: Dict[str, str]) -> Union[requests.Response, HttpResponse]: + """ + Publish the deposition on Zenodo. + + :param deposition_id: The ID of the deposition to publish. + :param headers: The headers for the request. + :return: The response from Zenodo or an HTTP response if an error occurs. + """ publish_url = f"{self.deposit_url}/{deposition_id}/actions/publish" - publish_response = requests.post(publish_url, headers=headers) - - if publish_response.status_code != 202: - return self.error_response(self.request, publish_response) - - return publish_response + response = requests.post(publish_url, headers=headers) + result = self._handle_zenodo_response(response) + if result: + return result + return response def get_authorize_params(self, request: HttpRequest, state: str) -> Dict[str, str]: + """ + Prepare the parameters for the authorization request. + + :param request: The HTTP request. + :param state: The state parameter. + :return: A dictionary containing the authorization parameters. + """ return { - 'response_type': 'code', + 'response_type': 'code', 'client_id': self.client_id, - 'scope': 'deposit:write', - 'redirect_uri': request.build_absolute_uri(self.redirect_path), - 'state': state + 'scope': 'deposit:write', + 'redirect_uri': request.build_absolute_uri(self.redirect_path), + 'state': state } def get_callback_data(self, request: HttpRequest) -> Dict[str, str]: + """ + Prepare the data for the callback request. + + :param request: The HTTP request. + :return: A dictionary containing the callback data. + """ return { 'client_id': self.client_id, 'client_secret': self.client_secret, 'grant_type': 'authorization_code', - 'redirect_uri': request.build_absolute_uri(self.redirect_path), + 'redirect_uri': request.build_absolute_uri(self.redirect_path), 'code': request.GET.get('code') } def error_response(self, request: HttpRequest, response: requests.Response) -> HttpResponse: + """ + Generate an error response based on the Zenodo response. + + :param request: The HTTP request. + :param response: The Zenodo response. + :return: An HTTP error response. + """ error_message = self._get_error_message(response) return render(request, 'core/error.html', { 'title': _('Zenodo error'), @@ -248,6 +373,13 @@ def error_response(self, request: HttpRequest, response: requests.Response) -> H }, status=response.status_code) def success_response(self, request: HttpRequest, response: requests.Response) -> HttpResponse: + """ + Generate a success response after a successful deposition. + + :param request: The HTTP request. + :param response: The Zenodo response. + :return: An HTTP response redirecting to the deposition or an error page. + """ zenodo_url = response.json().get('links', {}).get('html') if zenodo_url: return redirect(zenodo_url) @@ -258,6 +390,12 @@ def success_response(self, request: HttpRequest, response: requests.Response) -> }, status=200) def _get_error_message(self, response: requests.Response) -> str: + """ + Extract a user-friendly error message from the Zenodo response. + + :param response: The Zenodo response. + :return: A formatted error message. + """ try: error_json = response.json() error_message = error_json.get('message', str(response.content)) @@ -265,5 +403,5 @@ def _get_error_message(self, response: requests.Response) -> str: error_details = '; '.join([f"{error.get('field', '')}: {error.get('message', '')}" for error in error_json['errors']]) error_message += f" Details: {error_details}" return error_message - except ValueError: + except json.JSONDecodeError: return str(response.content) From 877ce86f6ff4700cdcebacb5bebed2cccc7c79ec Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 17 Oct 2024 10:59:05 +0200 Subject: [PATCH 03/41] refactor: split exports and add publish provider Signed-off-by: David Wallace --- README.md | 3 +- rdmo_zenodo/exports.py | 407 -------------------------------- rdmo_zenodo/exports/__init__.py | 2 + rdmo_zenodo/exports/base.py | 62 +++++ rdmo_zenodo/exports/exports.py | 149 ++++++++++++ rdmo_zenodo/exports/publish.py | 111 +++++++++ 6 files changed, 326 insertions(+), 408 deletions(-) delete mode 100644 rdmo_zenodo/exports.py create mode 100644 rdmo_zenodo/exports/__init__.py create mode 100644 rdmo_zenodo/exports/base.py create mode 100644 rdmo_zenodo/exports/exports.py create mode 100644 rdmo_zenodo/exports/publish.py diff --git a/README.md b/README.md index 49ca766..273421d 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,8 @@ Add the plugin to `PROJECT_EXPORTS` in `config/settings/local.py`: ```python PROJECT_EXPORTS += [ - ('zenodo', _('Directly to Zenodo'), 'rdmo_zenodo.exports.ZenodoExportProvider') + ('zenodo', _('Directly to Zenodo'), 'rdmo_zenodo.exports.ZenodoExportProvider'), + ('zenodo', _('Directly to Zenodo'), 'rdmo_zenodo.exports.ZenodoPublishProvider') ] ``` diff --git a/rdmo_zenodo/exports.py b/rdmo_zenodo/exports.py deleted file mode 100644 index 86f4623..0000000 --- a/rdmo_zenodo/exports.py +++ /dev/null @@ -1,407 +0,0 @@ -import json -import tempfile -from typing import Dict, Any, Tuple, Optional, List, Union -from contextlib import contextmanager -from django import forms -from django.conf import settings -from django.core.exceptions import ObjectDoesNotExist -from django.http import HttpRequest, HttpResponse -from django.shortcuts import redirect, render -from django.urls import reverse -from django.utils.translation import gettext_lazy as _ -from django.utils import timezone -import requests -from rdmo.projects.exports import Export -from rdmo.services.providers import OauthProviderMixin -from rdmo.projects.models.snapshot import Snapshot - -class ZenodoExportProvider(OauthProviderMixin, Export): - """ - A provider for exporting project snapshots to Zenodo. - - Attributes: - RIGHTS_URI_OPTIONS (Dict[str, str]): Mapping of rights URI options to their corresponding license names. - """ - - RIGHTS_URI_OPTIONS = { - 'dataset_license_types/71': 'cc-by-4.0', - 'dataset_license_types/73': 'cc-by-nc-4.0', - 'dataset_license_types/74': 'cc-by-nd-4.0', - 'dataset_license_types/75': 'cc-by-sa-4.0', - 'dataset_license_types/cc0': 'cc-zero' - } - - @property - def client_id(self) -> str: - """Return the Zenodo client ID from settings.""" - return settings.ZENODO_PROVIDER['client_id'] - - @property - def client_secret(self) -> str: - """Return the Zenodo client secret from settings.""" - return settings.ZENODO_PROVIDER['client_secret'] - - @property - def zenodo_url(self) -> str: - """Return the base Zenodo URL from settings, defaulting to the sandbox URL.""" - return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://sandbox.zenodo.org').rstrip('/') - - @property - def authorize_url(self) -> str: - """Construct the Zenodo authorization URL.""" - return f'{self.zenodo_url}/oauth/authorize' - - @property - def token_url(self) -> str: - """Construct the Zenodo token URL.""" - return f'{self.zenodo_url}/oauth/token' - - @property - def deposit_url(self) -> str: - """Construct the Zenodo deposit URL.""" - return f'{self.zenodo_url}/api/deposit/depositions' - - @property - def redirect_path(self) -> str: - """Return the redirect path for the OAuth callback.""" - return reverse('oauth_callback', args=['zenodo']) - - class Form(forms.Form): - """ - Form for selecting a project snapshot to export to Zenodo. - - Attributes: - snapshot (forms.ChoiceField): Field for selecting a snapshot. - """ - snapshot = forms.ChoiceField(label=_('Select a snapshot of your project')) - - def __init__(self, *args, **kwargs): - """ - Initialize the form with snapshot choices. - - :param args: Standard form arguments. - :param kwargs: Standard form keyword arguments, including'snapshot_choices'. - """ - snapshot_choices = kwargs.pop('snapshot_choices') - super().__init__(*args, **kwargs) - self.fields['snapshot'].choices = snapshot_choices - - def render(self) -> HttpResponse: - """ - Render the form for selecting a snapshot to export to Zenodo. - - :return: An HTTP response with the rendered form. - """ - request = self.request # Local variable for readability - snapshots = Snapshot.objects.filter(project=self.project) - snapshot_choices = [(str(snapshot.id), f"Snapshot {snapshot.title} - {snapshot.created}") for snapshot in snapshots] - - self.store_in_session(request,'snapshot_choices', snapshot_choices) - - form = self.Form(snapshot_choices=snapshot_choices) - - return render(request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - - def submit(self) -> HttpResponse: - """ - Handle the form submission for exporting a snapshot to Zenodo. - - :return: An HTTP response redirecting to the project or initiating the export. - """ - request = self.request - snapshot_choices = self.get_from_session(request,'snapshot_choices') - form = self.Form(request.POST, snapshot_choices=snapshot_choices) - - if 'cancel' in request.POST: - return redirect('project', self.project.id) - - if form.is_valid(): - url = self.deposit_url - data = self.get_post_data(form.cleaned_data['snapshot']) - return self.post(request, url, data) - else: - return render(request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - - def get_post_data(self, snapshot_id: str) -> Dict[str, Any]: - """ - Prepare the data for the POST request to Zenodo. - - :param snapshot_id: The ID of the selected snapshot. - :return: A dictionary containing the metadata and file. - """ - snapshot = Snapshot.objects.get(id=snapshot_id) - metadata = self._prepare_metadata(snapshot) - docx_file = self._generate_docx(snapshot) - - return { - 'metadata': metadata, - 'file': docx_file - } - - def _prepare_metadata(self, snapshot: Snapshot) -> Dict[str, Any]: - """ - Prepare the metadata for the Zenodo deposition. - - :param snapshot: The snapshot to generate metadata for. - :return: A dictionary containing the metadata. - """ - metadata = { - 'title': f"{self.project.title} - Snapshot: {snapshot.title}", - 'upload_type': settings.ZENODO_PROVIDER.get('upload_type', 'dataset'), - 'description': f"{self.project.description or 'No description provided.'}\n\nSnapshot of project '{self.project.title}' taken on {snapshot.created}", - 'publication_date': timezone.now().date().isoformat(), - } - - # Add additional metadata fields as needed - resource_type = settings.ZENODO_PROVIDER.get('resource_type') - if resource_type: - metadata['resource_type'] = resource_type - - if settings.ZENODO_PROVIDER.get('add_project_members'): - metadata['creators'] = self._get_creators() - - rights = snapshot.values.filter(attribute__uri='project/dataset/sharing/conditions').first() - if rights and rights.option: - metadata['license'] = self.RIGHTS_URI_OPTIONS.get(rights.option.uri_path) - - language = settings.ZENODO_PROVIDER.get('language') - if language: - metadata['language'] = language - - keywords = self.get_values('project/research_question/keywords') - if keywords: - metadata['keywords'] = [keyword.text for keyword in keywords] - - notes = settings.ZENODO_PROVIDER.get('notes') - if notes: - metadata['notes'] = notes - - return metadata - - def _get_creators(self) -> List[Dict[str, str]]: - """ - Retrieve the creators (project members) for the metadata. - - :return: A list of dictionaries containing creator information. - """ - creators = [] - for user in self.project.user.all(): - creator = { - 'name': f"{user.first_name} {user.last_name}" - } - try: - orcid_socialaccount = user.socialaccount_set.get(provider='orcid') - creator['orcid'] = orcid_socialaccount.uid - except (ObjectDoesNotExist, AttributeError): - pass - creators.append(creator) - return creators - - @contextmanager - def _temp_file(self, suffix='.docx'): - """ - Context manager for creating a temporary file. - - :param suffix: The file suffix (default: '.docx'). - :yield: The temporary file path. - """ - with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: - yield temp_file.name - - def _generate_docx(self, snapshot: Snapshot) -> Tuple[str, bytes]: - """ - Generate a DOCX file for the given snapshot. - - :param snapshot: The snapshot to generate the DOCX for. - :return: A tuple containing the file name and content. - """ - request = self.request - docx_url = request.build_absolute_uri( - f'/projects/{self.project.id}/snapshots/{snapshot.id}/answers/export/docx/' - ) - - with requests.Session() as session: - session.cookies.update(request.COOKIES) - try: - response = session.get(docx_url) - response.raise_for_status() - with self._temp_file() as temp_file_path: - with open(temp_file_path, 'wb') as temp_file: - temp_file.write(response.content) - with open(temp_file_path, 'rb') as file: - file_content = file.read() - file_name ='snapshot_answers.docx' - return file_name, file_content - except requests.exceptions.RequestException as e: - raise Exception(f"Failed to generate DOCX. Error: {str(e)}") - - def post(self, request: HttpRequest, url: str, data: Dict[str, Any]) -> HttpResponse: - """ - Handle the POST request to Zenodo. - - :param request: The HTTP request. - :param url: The URL for the POST request. - :param data: The data to be sent. - :return: An HTTP response. - """ - access_token = self.get_from_session(request, 'access_token') - if not access_token: - self.store_in_session(request,'request', ('post', url, data)) - return self.authorize(request) - - json_headers = self.get_authorization_headers(access_token) - json_headers['Content-Type'] = 'application/json' - - deposition = self._create_deposition(url, data['metadata'], json_headers) - if isinstance(deposition, HttpResponse): - return deposition - - if 'file' in data: - file_upload_result = self._upload_file(deposition, data['file'], access_token) - if isinstance(file_upload_result, HttpResponse): - return file_upload_result - - publish_result = self._publish_deposition(deposition['id'], json_headers) - return publish_result if isinstance(publish_result, HttpResponse) else self.success_response(request, publish_result) - - def _handle_zenodo_response(self, response: requests.Response) -> Optional[HttpResponse]: - """ - Handle a response from Zenodo, returning an HTTP response if an error occurs. - - :param response: The Zenodo response. - :return: An HTTP response if an error occurs, otherwise None. - """ - if response.status_code == 401: - self.pop_from_session(self.request, 'access_token') - self.store_in_session(self.request,'request', ('post', response.url, {})) - return self.authorize(self.request) - elif response.status_code not in [200, 201, 202]: - return self.error_response(self.request, response) - - def _create_deposition(self, url: str, metadata: Dict[str, Any], headers: Dict[str, str]) -> Union[Dict[str, Any], HttpResponse]: - """ - Create a new deposition on Zenodo. - - :param url: The URL for the deposition creation. - :param metadata: The metadata for the deposition. - :param headers: The headers for the request. - :return: The deposition data or an HTTP response if an error occurs. - """ - response = requests.post(url, json={'metadata': metadata}, headers=headers) - result = self._handle_zenodo_response(response) - if result: - return result - return response.json() - - def _upload_file(self, deposition: Dict[str, Any], file_data: Tuple[str, bytes], access_token: str) -> Optional[HttpResponse]: - """ - Upload a file to the deposition on Zenodo. - - :param deposition: The deposition data. - :param file_data: The file name and content. - :param access_token: The access token for authentication. - :return: An HTTP response if an error occurs, otherwise None. - """ - file_name, file_content = file_data - bucket_url = deposition['links']['bucket'] - file_url = f"{bucket_url}/{file_name}" - - binary_headers = self.get_authorization_headers(access_token) - binary_headers['Content-Type'] = 'application/octet-stream' - - response = requests.put(file_url, headers=binary_headers, data=file_content) - return self._handle_zenodo_response(response) - - def _publish_deposition(self, deposition_id: str, headers: Dict[str, str]) -> Union[requests.Response, HttpResponse]: - """ - Publish the deposition on Zenodo. - - :param deposition_id: The ID of the deposition to publish. - :param headers: The headers for the request. - :return: The response from Zenodo or an HTTP response if an error occurs. - """ - publish_url = f"{self.deposit_url}/{deposition_id}/actions/publish" - response = requests.post(publish_url, headers=headers) - result = self._handle_zenodo_response(response) - if result: - return result - return response - - def get_authorize_params(self, request: HttpRequest, state: str) -> Dict[str, str]: - """ - Prepare the parameters for the authorization request. - - :param request: The HTTP request. - :param state: The state parameter. - :return: A dictionary containing the authorization parameters. - """ - return { - 'response_type': 'code', - 'client_id': self.client_id, - 'scope': 'deposit:write', - 'redirect_uri': request.build_absolute_uri(self.redirect_path), - 'state': state - } - - def get_callback_data(self, request: HttpRequest) -> Dict[str, str]: - """ - Prepare the data for the callback request. - - :param request: The HTTP request. - :return: A dictionary containing the callback data. - """ - return { - 'client_id': self.client_id, - 'client_secret': self.client_secret, - 'grant_type': 'authorization_code', - 'redirect_uri': request.build_absolute_uri(self.redirect_path), - 'code': request.GET.get('code') - } - - def error_response(self, request: HttpRequest, response: requests.Response) -> HttpResponse: - """ - Generate an error response based on the Zenodo response. - - :param request: The HTTP request. - :param response: The Zenodo response. - :return: An HTTP error response. - """ - error_message = self._get_error_message(response) - return render(request, 'core/error.html', { - 'title': _('Zenodo error'), - 'errors': [error_message] - }, status=response.status_code) - - def success_response(self, request: HttpRequest, response: requests.Response) -> HttpResponse: - """ - Generate a success response after a successful deposition. - - :param request: The HTTP request. - :param response: The Zenodo response. - :return: An HTTP response redirecting to the deposition or an error page. - """ - zenodo_url = response.json().get('links', {}).get('html') - if zenodo_url: - return redirect(zenodo_url) - else: - return render(request, 'core/error.html', { - 'title': _('Zenodo error'), - 'errors': [_('The URL of the new deposition could not be retrieved.')] - }, status=200) - - def _get_error_message(self, response: requests.Response) -> str: - """ - Extract a user-friendly error message from the Zenodo response. - - :param response: The Zenodo response. - :return: A formatted error message. - """ - try: - error_json = response.json() - error_message = error_json.get('message', str(response.content)) - if 'errors' in error_json: - error_details = '; '.join([f"{error.get('field', '')}: {error.get('message', '')}" for error in error_json['errors']]) - error_message += f" Details: {error_details}" - return error_message - except json.JSONDecodeError: - return str(response.content) diff --git a/rdmo_zenodo/exports/__init__.py b/rdmo_zenodo/exports/__init__.py new file mode 100644 index 0000000..e442d58 --- /dev/null +++ b/rdmo_zenodo/exports/__init__.py @@ -0,0 +1,2 @@ +from .exports import ZenodoExportProvider +from .publish import ZenodoPublishProvider \ No newline at end of file diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py new file mode 100644 index 0000000..409f8c3 --- /dev/null +++ b/rdmo_zenodo/exports/base.py @@ -0,0 +1,62 @@ +import logging + +from django.conf import settings +from django.shortcuts import reverse +from django.utils.translation import gettext_lazy as _ + +from rdmo.projects.exports import Export +from rdmo.services.providers import OauthProviderMixin + +logger = logging.getLogger(__name__) + + +class BaseZenodoExportProvider(OauthProviderMixin, Export): + + @property + def client_id(self): + return settings.ZENODO_PROVIDER['client_id'] + + @property + def client_secret(self): + return settings.ZENODO_PROVIDER['client_secret'] + + @property + def zenodo_url(self): + return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://sandbox.zenodo.org').strip('/') + + @property + def authorize_url(self): + return f'{self.zenodo_url}/oauth/authorize' + + @property + def token_url(self): + return f'{self.zenodo_url}/oauth/token' + + @property + def deposit_url(self): + return f'{self.zenodo_url}/api/records' + + @property + def redirect_path(self): + return reverse('oauth_callback', args=['zenodo']) + + def get_authorize_params(self, request, state): + return { + 'response_type': 'code', + 'client_id': self.client_id, + 'scope': 'deposit:write', + 'redirect_uri': request.build_absolute_uri(self.redirect_path), + 'state': state + } + + def get_callback_data(self, request): + return { + 'client_id': self.client_id, + 'client_secret': self.client_secret, + 'grant_type': 'authorization_code', + 'redirect_uri': request.build_absolute_uri(self.redirect_path), + 'code': request.GET.get('code') + } + + def get_error_message(self, response): + return response.json().get('errors') diff --git a/rdmo_zenodo/exports/exports.py b/rdmo_zenodo/exports/exports.py new file mode 100644 index 0000000..7f606dc --- /dev/null +++ b/rdmo_zenodo/exports/exports.py @@ -0,0 +1,149 @@ +import logging + +from django import forms +from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist +from django.shortcuts import redirect, render +from django.utils.translation import gettext_lazy as _ + +from .base import BaseZenodoExportProvider + +logger = logging.getLogger(__name__) + + + +class ZenodoExportProvider(BaseZenodoExportProvider): + + rights_uri_options = { + 'dataset_license_types/71': 'cc-by-4.0', + 'dataset_license_types/73': 'cc-by-nc-4.0', + 'dataset_license_types/74': 'cc-by-nd-4.0', + 'dataset_license_types/75': 'cc-by-sa-4.0', + 'dataset_license_types/cc0': 'cc-zero' + } + + class Form(forms.Form): + + dataset = forms.CharField(label=_('Select dataset of your project')) + + def __init__(self, *args, **kwargs): + dataset_choices = kwargs.pop('dataset_choices') + super().__init__(*args, **kwargs) + + self.fields['dataset'].widget = forms.RadioSelect(choices=dataset_choices) + + def render(self): + datasets = self.get_set('project/dataset/id') + dataset_choices = [(dataset.set_index, dataset.value)for dataset in datasets] + + self.store_in_session(self.request, 'dataset_choices', dataset_choices) + + form = self.Form( + dataset_choices=dataset_choices + ) + + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def submit(self): + dataset_choices = self.get_from_session(self.request, 'dataset_choices') + form = self.Form(self.request.POST, dataset_choices=dataset_choices) + + if 'cancel' in self.request.POST: + return redirect('project', self.project.id) + + if form.is_valid(): + url = self.get_post_url() + data = self.get_post_data(form.cleaned_data['dataset']) + return self.post(self.request, url, data) + else: + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def post_success(self, request, response): + zenodo_url = response.json().get('links', {}).get('self_html') + if zenodo_url: + return redirect(zenodo_url) + else: + return render(request, 'core/error.html', { + 'title': _('ZENODO error'), + 'errors': [_('The URL of the new dataset could not be retrieved.')] + }, status=200) + + def get_post_url(self): + return self.deposit_url + + def get_post_data(self, set_index): + # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata + metadata = {} + + # set the resource_type from the settings + resource_type = settings.ZENODO_PROVIDER.get('resource_type') + if resource_type: + metadata['resource_type'] = { + 'id': resource_type + } + + # add the creators from the project members + add_project_members = settings.ZENODO_PROVIDER.get('add_project_members') + if add_project_members: + metadata['creators'] = [] + for user in self.project.user.all(): + creator = { + 'family_name': user.last_name, + 'given_name': user.first_name, + 'type': 'personal' + } + + try: + orcid_socialaccount = user.socialaccount_set.get(provider='orcid') + creator['identifiers'] = [ + { + 'scheme': 'orcid', + 'identifier': orcid_socialaccount.uid + } + ] + except (ObjectDoesNotExist, AttributeError): + pass + + metadata['creators'].append({ + 'person_or_org': creator + }) + + # set the title from the title or id or the running index + metadata['title'] = \ + self.get_text('project/dataset/title', set_index=set_index) or \ + self.get_text('project/dataset/id', set_index=set_index) or \ + f'Dataset #{set_index + 1}' + + # set the description + description = self.get_text('project/dataset/description', set_index=set_index) + if description: + metadata['description'] = description + + # set the rights/licenses + for rights in self.get_values('project/dataset/sharing/conditions', set_index=set_index): + if rights.option: + metadata['rights'] = [{ + 'id': self.rights_uri_options.get(rights.option.uri_path) + }] + break + + # set the language from the settings + language = settings.ZENODO_PROVIDER.get('language') + if language: + metadata['languages'] = [ + {'id': language} + ] + + # set the publisher from the settings + publisher = settings.ZENODO_PROVIDER.get('publisher') + if publisher: + metadata['publisher'] = publisher + + # set the funding from the settings + funding = settings.ZENODO_PROVIDER.get('funding') + if funding: + metadata['funding'] = funding + + return { + 'metadata': metadata + } diff --git a/rdmo_zenodo/exports/publish.py b/rdmo_zenodo/exports/publish.py new file mode 100644 index 0000000..eccabd1 --- /dev/null +++ b/rdmo_zenodo/exports/publish.py @@ -0,0 +1,111 @@ +import logging + +from django import forms +from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist +from django.shortcuts import redirect, render +from django.utils.translation import gettext_lazy as _ + +from .base import BaseZenodoExportProvider + +logger = logging.getLogger(__name__) + + + +class ZenodoPublishProvider(BaseZenodoExportProvider): + + rights_uri_options = { + 'dataset_license_types/71': 'cc-by-4.0', + 'dataset_license_types/73': 'cc-by-nc-4.0', + 'dataset_license_types/74': 'cc-by-nd-4.0', + 'dataset_license_types/75': 'cc-by-sa-4.0', + 'dataset_license_types/cc0': 'cc-zero' + } + + class Form(forms.Form): + + snapshot = forms.CharField(label=_('Select snapshot of your project')) + + def __init__(self, *args, **kwargs): + snapshot_choices = kwargs.pop('snapshot_choices') + super().__init__(*args, **kwargs) + + self.fields['snapshot'].widget = forms.RadioSelect(choices=snapshot_choices) + + def render(self): + datasets = self.get_set('project/dataset/id') + # get project snapshots + snapshot_choices = [(dataset.set_index, dataset.value)for dataset in datasets] + + self.store_in_session(self.request, 'snapshot_choices', snapshot_choices) + + form = self.Form( + snapshot_choices=snapshot_choices + ) + + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def submit(self): + snapshot_choices = self.get_from_session(self.request, 'snapshot_choices') + form = self.Form(self.request.POST, snapshot_choices=snapshot_choices) + + if 'cancel' in self.request.POST: + return redirect('project', self.project.id) + + if form.is_valid(): + url = self.get_post_url() + data = self.get_post_data(form.cleaned_data['snapshot']) + return self.post(self.request, url, data) + else: + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def post_success(self, request, response): + zenodo_url = response.json().get('links', {}).get('self_html') + if zenodo_url: + return redirect(zenodo_url) + else: + return render(request, 'core/error.html', { + 'title': _('ZENODO error'), + 'errors': [_('The URL of the new dataset could not be retrieved.')] + }, status=200) + + def get_post_url(self): + return self.deposit_url + + def get_post_data(self, set_index): + # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata + metadata = {} + + # set the title from the title or id or the running index + metadata['title'] = self.project.title + + # set the resource_type from the settings + metadata['resource_type'] = {'id': 'publication-datamanagementplan'} + + # set the description + description = self.project.description or \ + f"Data Management Plan for project {self.project.title}" + # self.get_text('project/dataset/description', set_index=set_index) + if description: + metadata['description'] = description + + # set subjects + metadata['subjects'] = [ + { + 'subject': 'Data Management Plan' + }, + { + 'subject': 'DMP' + } + ] + + # set keywords + keywords = self.get_values('project/research_question/keywords', set_index=set_index) + for keyword in keywords: + metadata['subjects'].append({ + 'subject': keyword.text + }) + + return { + 'metadata': metadata + } From 0c67ec72df7e85a1ea18cda3189ffcbd6775afda Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 17 Oct 2024 11:13:57 +0200 Subject: [PATCH 04/41] docs: fix url for zenodo-publish Signed-off-by: David Wallace --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 273421d..f0b5e58 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Add the plugin to `PROJECT_EXPORTS` in `config/settings/local.py`: ```python PROJECT_EXPORTS += [ ('zenodo', _('Directly to Zenodo'), 'rdmo_zenodo.exports.ZenodoExportProvider'), - ('zenodo', _('Directly to Zenodo'), 'rdmo_zenodo.exports.ZenodoPublishProvider') + ('zenodo-publish', _('Publish to Zenodo'), 'rdmo_zenodo.exports.ZenodoPublishProvider') ] ``` From 19aaf8d42d7e6ac2a578f10a93682302c8b48796 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Fri, 18 Oct 2024 08:59:18 +0200 Subject: [PATCH 05/41] fix: add class attribute for RDMO_PLUGIN_KEY Signed-off-by: David Wallace --- rdmo_zenodo/exports/exports.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rdmo_zenodo/exports/exports.py b/rdmo_zenodo/exports/exports.py index 7f606dc..7aa5bd7 100644 --- a/rdmo_zenodo/exports/exports.py +++ b/rdmo_zenodo/exports/exports.py @@ -11,9 +11,10 @@ logger = logging.getLogger(__name__) - class ZenodoExportProvider(BaseZenodoExportProvider): + RDMO_PLUGIN_KEY = "zenodo" + rights_uri_options = { 'dataset_license_types/71': 'cc-by-4.0', 'dataset_license_types/73': 'cc-by-nc-4.0', From 3d04c95784e686b84ee77cf4a2f9a3854461de50 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Fri, 18 Oct 2024 09:00:28 +0200 Subject: [PATCH 06/41] feat,fix: add RDMO_PLUGIN_KEY and methods for pdf from snapshot upload Signed-off-by: David Wallace --- rdmo_zenodo/exports/base.py | 79 ++++++++++++++++++++++++++++++++-- rdmo_zenodo/exports/publish.py | 46 ++++++++++++-------- 2 files changed, 103 insertions(+), 22 deletions(-) diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index 409f8c3..9812651 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -3,15 +3,28 @@ from django.conf import settings from django.shortcuts import reverse from django.utils.translation import gettext_lazy as _ +from django.http import HttpResponse +from django.template import TemplateSyntaxError +from rdmo.core.utils import render_to_format from rdmo.projects.exports import Export +from rdmo.projects.utils import get_value_path from rdmo.services.providers import OauthProviderMixin +from rdmo.views.models import View logger = logging.getLogger(__name__) +json_header = { + 'Content-Type': 'application/json', + } +binary_header= { + 'Content-Type': 'application/octet-stream', +} class BaseZenodoExportProvider(OauthProviderMixin, Export): + RDMO_PLUGIN_KEY = None + @property def client_id(self): return settings.ZENODO_PROVIDER['client_id'] @@ -38,7 +51,18 @@ def deposit_url(self): @property def redirect_path(self): - return reverse('oauth_callback', args=['zenodo']) + if self.RDMO_PLUGIN_KEY is None: + raise ValueError("the RDMO_PLUGIN_KEY should be set as a class attribute") + return reverse('oauth_callback', args=[self.RDMO_PLUGIN_KEY]) + + def record_file_url(self, record_id): + return f"{self.deposit_url}/records/{record_id}/draft/files" + + def record_file_content_url(self, record_id, file_key): + return f"{self.record_file_url(record_id)}/{file_key}/content" + + def record_file_commit_url(self, record_id, file_key): + return f"{self.record_file_url(record_id)}/{file_key}/commit" def get_authorize_params(self, request, state): return { @@ -58,5 +82,54 @@ def get_callback_data(self, request): 'code': request.GET.get('code') } - def get_error_message(self, response): - return response.json().get('errors') + def render_project_views(self, project, snapshot, attachments_format, view=None): + + if view is None: + view = View.objects.get(uri="https://rdmorganiser.github.io/terms/views/variable_check") + + try: + rendered_view = view.render(project, snapshot) + except TemplateSyntaxError: + return HttpResponse() + + return render_to_format( + None, attachments_format, project.title, 'projects/project_view_export.html', { + 'format': attachments_format, + 'title': project.title, + 'view': view, + 'rendered_view': rendered_view, + 'resource_path': get_value_path(project, snapshot) + } + ) + + def render_snapshot_to_pdf(self, snapshot): + # get the pdf + return self.render_project_views(self.project, snapshot, "pdf") + + + def upload_file(self, record_id, binary=None): + """ + takes the record_id of the draft record and the file contents as binary data. + API_URL should be the zenodo API URL, e.g. + """ + # POST data / PDF + files_url = self.record_file_url(record_id) + file_data = [{"key": "DMP.pdf"}] + data_initialization_response = self.post(self.request, files_url, json=file_data) + + # upload the actual file content as binary stream + # extract files/content URL from the data upload response + breakpoint() + # files_content_url = data_initialization_response.json()["entries"][0]["links"]["content"] + files_content_url = self.record_file_content_url(record_id, file_data[0]['key']) + # self.deposit_url+f"records/{record_id}/draft/files/{file_data[0]['key']}/content" + ## multiple files as upload even possible??? ^^^ + + # upload with binary data + data_content_response = self.put(self.request, files_content_url, data=binary) #headers=binary_header, + + # commit the file upload + file_commit_url = self.deposit_url+f"records/{record_id}/draft/files/{file_data[0]['key']}/commit" + + data_commit_response = self.post(self.request, file_commit_url) + return data_commit_response diff --git a/rdmo_zenodo/exports/publish.py b/rdmo_zenodo/exports/publish.py index eccabd1..568e016 100644 --- a/rdmo_zenodo/exports/publish.py +++ b/rdmo_zenodo/exports/publish.py @@ -1,8 +1,6 @@ import logging from django import forms -from django.conf import settings -from django.core.exceptions import ObjectDoesNotExist from django.shortcuts import redirect, render from django.utils.translation import gettext_lazy as _ @@ -11,9 +9,10 @@ logger = logging.getLogger(__name__) - class ZenodoPublishProvider(BaseZenodoExportProvider): + RDMO_PLUGIN_KEY = "zenodo-publish" + rights_uri_options = { 'dataset_license_types/71': 'cc-by-4.0', 'dataset_license_types/73': 'cc-by-nc-4.0', @@ -33,9 +32,7 @@ def __init__(self, *args, **kwargs): self.fields['snapshot'].widget = forms.RadioSelect(choices=snapshot_choices) def render(self): - datasets = self.get_set('project/dataset/id') - # get project snapshots - snapshot_choices = [(dataset.set_index, dataset.value)for dataset in datasets] + snapshot_choices = [(i.id,i.title) for i in self.project.snapshots.all()] self.store_in_session(self.request, 'snapshot_choices', snapshot_choices) @@ -45,16 +42,28 @@ def render(self): return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + def upload_file_to_zenodo(self, file): + # placeholder for posting the file + return + def submit(self): - snapshot_choices = self.get_from_session(self.request, 'snapshot_choices') + snapshot_choices = self.project.snapshots.all() form = self.Form(self.request.POST, snapshot_choices=snapshot_choices) if 'cancel' in self.request.POST: return redirect('project', self.project.id) if form.is_valid(): - url = self.get_post_url() - data = self.get_post_data(form.cleaned_data['snapshot']) + url = self.get_post_url() # deposit url + snapshot_id = form.cleaned_data['snapshot'] + breakpoint() + snapshot = self.project.snapshots.get(id=snapshot_id) + data = self.get_post_data(snapshot) + zen_data_response = self.post(self.request, url, data) + rdmo_pdf_response = self.render_snapshot_to_pdf(snapshot) + zen_pdf_response = self.upload_file_to_zenodo(rdmo_pdf_response.content) + return zen_data_response + return self.post(self.request, url, data) else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) @@ -72,20 +81,19 @@ def post_success(self, request, response): def get_post_url(self): return self.deposit_url - def get_post_data(self, set_index): + def get_post_data(self, snapshot): # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata metadata = {} # set the title from the title or id or the running index - metadata['title'] = self.project.title + metadata['title'] = snapshot.title # set the resource_type from the settings metadata['resource_type'] = {'id': 'publication-datamanagementplan'} # set the description - description = self.project.description or \ - f"Data Management Plan for project {self.project.title}" - # self.get_text('project/dataset/description', set_index=set_index) + description = snapshot.description or \ + f"Data Management Plan for project {snapshot.title}" if description: metadata['description'] = description @@ -100,11 +108,11 @@ def get_post_data(self, set_index): ] # set keywords - keywords = self.get_values('project/research_question/keywords', set_index=set_index) - for keyword in keywords: - metadata['subjects'].append({ - 'subject': keyword.text - }) + # keywords = self.get_values('project/research_question/keywords', set_index=set_index) + # for keyword in keywords: + # metadata['subjects'].append({ + # 'subject': keyword.text + # }) return { 'metadata': metadata From 0c5d4a8dd1e95bacce0ce4de7fe4ec2e88faf282 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Fri, 25 Oct 2024 08:50:06 +0200 Subject: [PATCH 07/41] feat: add functions for zenodo api upload Signed-off-by: David Wallace --- rdmo_zenodo/exports/zenodo_api_upload.py | 137 +++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 rdmo_zenodo/exports/zenodo_api_upload.py diff --git a/rdmo_zenodo/exports/zenodo_api_upload.py b/rdmo_zenodo/exports/zenodo_api_upload.py new file mode 100644 index 0000000..87230ad --- /dev/null +++ b/rdmo_zenodo/exports/zenodo_api_upload.py @@ -0,0 +1,137 @@ +import requests + +token = "token_generated_from_Zenodo_for_RDMO" + +API_URL = "https://sandbox.zenodo.org/api/" +URL = "https://sandbox.zenodo.org/" +DOI_URL = "https://doi.org/10.5072/zenodo." +PUBLISHER = "RDMO" + +json_header = { + "Accept": "application/json", + 'Content-Type': 'application/json', + 'Authorization': 'Bearer {}'.format(token) +} +binary_header= { + "Accept": "application/json", + 'Content-Type': 'application/octet-stream', + 'Authorization': 'Bearer {}'.format(token) +} +plain_header = { + "Accept": "application/json", + 'Authorization': 'Bearer {}'.format(token) +} + +# sample json data +sample_data = { + "metadata": { + "title": "RDMO_02", + "description": "schubidu, schubida", + "creators": [ + { + "person_or_org": { + "given_name": "Max", + "family_name": "Mustermann", + "type": "personal", + "identifiers": [{"identifier": "0001-0002-0003-0004"}] + }, + "affiliations": [{"name": "University of Test"}] + } + ], + "publication_date": "2024-10-17", + "publisher": PUBLISHER, + "resource_type": {"id": "publication-datamanagementplan"}, + "subjects": [{"subject": "Data Management Plan"}, {"subject": "DMP"}] + } +} + +def convert_json(input_json, publication_date, publisher): + """ + Convert the metadata output of a Zenodo response into a + format that gets accepted as input. + """ + output_json = {} + output_json['metadata'] = { + 'title': input_json['metadata']['title'], + 'description': input_json['metadata']['description'], + 'publication_date': publication_date, + 'publisher': publisher, + 'creators': [{ + 'person_or_org': { + 'given_name': creator['name'].split(', ')[1], + 'family_name': creator['name'].split(', ')[0], + 'type': 'personal', + 'identifiers': [{'identifier': creator['orcid']}] + }, + 'affiliations': [{'name': creator['affiliation']}] + } for creator in input_json['metadata']['creators']], + 'resource_type': { + 'id': "publication-datamanagementplan" + }, + 'subjects': [{'subject': keyword} for keyword in input_json['metadata']['keywords']] + } + + return output_json + +def upload_file(id, binary=None): + # POST data / PDF + files_url = API_URL+f"records/{id}/draft/files" + file_data = [{"key": "DMP.pdf"}] + data_initialization_response = requests.post(files_url, headers=json_header, json=file_data) + + # upload the actual file content as binary stream + files_content_url = API_URL+f"records/{id}/draft/files/{file_data[0]['key']}/content" + ## multiple files as upload realistic??? ^^^ + + # upload data from a file path + file_path = r'C:\Users\path\to\my.pdf' + + # Open the file in binary mode + with open(file_path, 'rb') as file: + data_content_response = requests.put(files_content_url, headers=binary_header, data=file) + + # alternative with binary data + #data_content_response = requests.put(files_content_url, headers=binary_header, data=binary) + + # commit the file upload + file_commit_url = API_URL+f"records/{id}/draft/files/{file_data[0]['key']}/commit" + data_commit_response = requests.post(file_commit_url, headers=plain_header) + +def create_record(metadata): + records_url = API_URL+"records" + draft_response = requests.post(records_url, headers=json_header, json=metadata) + # return the id of the draft + return draft_response.json()["id"] + +def get_publication_url(id): + return URL+f"uploads/{id}" + +def get_doi(id): + """ + id is the Zenodo record id + """ + return DOI_URL+f"{id}" + +def get_overarching_doi(id_v1): + """ + we need the id of v1 + overarching id = id of v1 -1 + """ + return DOI_URL+f"{int(id_v1) - 1}" + + +def create_new_version(id_v1, new_date, publisher=PUBLISHER): + """ + id of v1 needed, NOT "overall" id!!! + new_date: e.g. "2024-10-17" + returns id of the new draft + """ + new_version_response = requests.post(API_URL+f"records/{id_v1}/versions", headers=plain_header).json() + new_id = new_version_response["id"] + # get metadata from the draft and add new publication date + new_data = convert_json(new_version_response, new_date, publisher) + update_url = API_URL+f"records/{new_id}/draft" + # update draft dataset with the new publication date + draft_response = requests.put(update_url, headers=json_header, json=new_data) + + return draft_response.json()["id"] From 571a980616c0fb4993dbcac1a5e78f294e85e3ea Mon Sep 17 00:00:00 2001 From: David Wallace Date: Fri, 25 Oct 2024 09:00:24 +0200 Subject: [PATCH 08/41] build: autoupdate pre-commit Signed-off-by: David Wallace --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf2c37e..5743fc0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: hooks: - id: check-hooks-apply - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: check-ast - id: check-yaml @@ -15,7 +15,7 @@ repos: - id: trailing-whitespace - id: debug-statements - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.6 + rev: v0.7.1 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] From ae73bae3addc86d419849f8e1eb9d572de929e62 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Fri, 25 Oct 2024 09:04:00 +0200 Subject: [PATCH 09/41] chore: remove breakpoint Signed-off-by: David Wallace --- rdmo_zenodo/exports/publish.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rdmo_zenodo/exports/publish.py b/rdmo_zenodo/exports/publish.py index 568e016..1bf4b1c 100644 --- a/rdmo_zenodo/exports/publish.py +++ b/rdmo_zenodo/exports/publish.py @@ -56,15 +56,12 @@ def submit(self): if form.is_valid(): url = self.get_post_url() # deposit url snapshot_id = form.cleaned_data['snapshot'] - breakpoint() snapshot = self.project.snapshots.get(id=snapshot_id) data = self.get_post_data(snapshot) zen_data_response = self.post(self.request, url, data) rdmo_pdf_response = self.render_snapshot_to_pdf(snapshot) - zen_pdf_response = self.upload_file_to_zenodo(rdmo_pdf_response.content) + _zen_pdf_response = self.upload_file_to_zenodo(rdmo_pdf_response.content) return zen_data_response - - return self.post(self.request, url, data) else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) From 99ea1d36cd906c583ddeed564163c9e382894534 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 16:52:11 +0100 Subject: [PATCH 10/41] refactor: rename to export_dataset Signed-off-by: David Wallace --- rdmo_zenodo/exports/__init__.py | 4 +- rdmo_zenodo/exports/export_dataset.py | 59 ++++++++++ rdmo_zenodo/exports/exports.py | 150 -------------------------- 3 files changed, 61 insertions(+), 152 deletions(-) create mode 100644 rdmo_zenodo/exports/export_dataset.py delete mode 100644 rdmo_zenodo/exports/exports.py diff --git a/rdmo_zenodo/exports/__init__.py b/rdmo_zenodo/exports/__init__.py index e442d58..ca69198 100644 --- a/rdmo_zenodo/exports/__init__.py +++ b/rdmo_zenodo/exports/__init__.py @@ -1,2 +1,2 @@ -from .exports import ZenodoExportProvider -from .publish import ZenodoPublishProvider \ No newline at end of file +from .export_dataset import ZenodoExportProvider as ZenodoExportProvider +from .publish_snapshot import ZenodoPublishProvider as ZenodoPublishProvider diff --git a/rdmo_zenodo/exports/export_dataset.py b/rdmo_zenodo/exports/export_dataset.py new file mode 100644 index 0000000..550b1e2 --- /dev/null +++ b/rdmo_zenodo/exports/export_dataset.py @@ -0,0 +1,59 @@ +import logging + +from django.shortcuts import redirect, render +from django.utils.translation import gettext_lazy as _ + +from .base import BaseZenodoExportProvider +from .forms import ZenodoDatasetForm +from .metadata import ZenodoMetadataExport + +logger = logging.getLogger(__name__) + + +class ZenodoExportProvider(BaseZenodoExportProvider): + + RDMO_PLUGIN_KEY = "zenodo" + + def get_dataset_choices(self): + datasets = self.get_set('project/dataset/id') + return [(dataset.set_index, dataset.value) for dataset in datasets] + + def render(self): + dataset_choices = self.get_dataset_choices() + + self.store_in_session(self.request, 'dataset_choices', dataset_choices) + + form = ZenodoDatasetForm( + dataset_choices=dataset_choices + ) + + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def submit(self): + dataset_choices = self.get_from_session(self.request, 'dataset_choices') + form = ZenodoDatasetForm(self.request.POST, dataset_choices=dataset_choices) + + if 'cancel' in self.request.POST: + return redirect('project', self.project.id) + + if form.is_valid(): + url = self.deposit_url + data = self.get_post_data(form.cleaned_data['dataset']) + return self.post(self.request, url, data) + else: + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def post_success(self, request, response): + zenodo_url = response.json().get('links', {}).get('self_html') + if zenodo_url: + return redirect(zenodo_url) + else: + return render(request, 'core/error.html', { + 'title': _('ZENODO error'), + 'errors': [_('The URL of the new dataset could not be retrieved.')] + }, status=200) + + def get_post_data(self, set_index): + # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata + metadata_builder = ZenodoMetadataExport(project=self.project, set_index=set_index) + return metadata_builder.build_metadata() diff --git a/rdmo_zenodo/exports/exports.py b/rdmo_zenodo/exports/exports.py deleted file mode 100644 index 7aa5bd7..0000000 --- a/rdmo_zenodo/exports/exports.py +++ /dev/null @@ -1,150 +0,0 @@ -import logging - -from django import forms -from django.conf import settings -from django.core.exceptions import ObjectDoesNotExist -from django.shortcuts import redirect, render -from django.utils.translation import gettext_lazy as _ - -from .base import BaseZenodoExportProvider - -logger = logging.getLogger(__name__) - - -class ZenodoExportProvider(BaseZenodoExportProvider): - - RDMO_PLUGIN_KEY = "zenodo" - - rights_uri_options = { - 'dataset_license_types/71': 'cc-by-4.0', - 'dataset_license_types/73': 'cc-by-nc-4.0', - 'dataset_license_types/74': 'cc-by-nd-4.0', - 'dataset_license_types/75': 'cc-by-sa-4.0', - 'dataset_license_types/cc0': 'cc-zero' - } - - class Form(forms.Form): - - dataset = forms.CharField(label=_('Select dataset of your project')) - - def __init__(self, *args, **kwargs): - dataset_choices = kwargs.pop('dataset_choices') - super().__init__(*args, **kwargs) - - self.fields['dataset'].widget = forms.RadioSelect(choices=dataset_choices) - - def render(self): - datasets = self.get_set('project/dataset/id') - dataset_choices = [(dataset.set_index, dataset.value)for dataset in datasets] - - self.store_in_session(self.request, 'dataset_choices', dataset_choices) - - form = self.Form( - dataset_choices=dataset_choices - ) - - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - - def submit(self): - dataset_choices = self.get_from_session(self.request, 'dataset_choices') - form = self.Form(self.request.POST, dataset_choices=dataset_choices) - - if 'cancel' in self.request.POST: - return redirect('project', self.project.id) - - if form.is_valid(): - url = self.get_post_url() - data = self.get_post_data(form.cleaned_data['dataset']) - return self.post(self.request, url, data) - else: - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - - def post_success(self, request, response): - zenodo_url = response.json().get('links', {}).get('self_html') - if zenodo_url: - return redirect(zenodo_url) - else: - return render(request, 'core/error.html', { - 'title': _('ZENODO error'), - 'errors': [_('The URL of the new dataset could not be retrieved.')] - }, status=200) - - def get_post_url(self): - return self.deposit_url - - def get_post_data(self, set_index): - # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata - metadata = {} - - # set the resource_type from the settings - resource_type = settings.ZENODO_PROVIDER.get('resource_type') - if resource_type: - metadata['resource_type'] = { - 'id': resource_type - } - - # add the creators from the project members - add_project_members = settings.ZENODO_PROVIDER.get('add_project_members') - if add_project_members: - metadata['creators'] = [] - for user in self.project.user.all(): - creator = { - 'family_name': user.last_name, - 'given_name': user.first_name, - 'type': 'personal' - } - - try: - orcid_socialaccount = user.socialaccount_set.get(provider='orcid') - creator['identifiers'] = [ - { - 'scheme': 'orcid', - 'identifier': orcid_socialaccount.uid - } - ] - except (ObjectDoesNotExist, AttributeError): - pass - - metadata['creators'].append({ - 'person_or_org': creator - }) - - # set the title from the title or id or the running index - metadata['title'] = \ - self.get_text('project/dataset/title', set_index=set_index) or \ - self.get_text('project/dataset/id', set_index=set_index) or \ - f'Dataset #{set_index + 1}' - - # set the description - description = self.get_text('project/dataset/description', set_index=set_index) - if description: - metadata['description'] = description - - # set the rights/licenses - for rights in self.get_values('project/dataset/sharing/conditions', set_index=set_index): - if rights.option: - metadata['rights'] = [{ - 'id': self.rights_uri_options.get(rights.option.uri_path) - }] - break - - # set the language from the settings - language = settings.ZENODO_PROVIDER.get('language') - if language: - metadata['languages'] = [ - {'id': language} - ] - - # set the publisher from the settings - publisher = settings.ZENODO_PROVIDER.get('publisher') - if publisher: - metadata['publisher'] = publisher - - # set the funding from the settings - funding = settings.ZENODO_PROVIDER.get('funding') - if funding: - metadata['funding'] = funding - - return { - 'metadata': metadata - } From 3bfaef73912fae5c00baceee931e21dd889225f4 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 17:02:05 +0100 Subject: [PATCH 11/41] build: update ruff.lint config Signed-off-by: David Wallace --- pyproject.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f0df57e..f5e5f67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,8 @@ version = {attr = "rdmo_zenodo.__version__"} [tool.ruff] target-version = "py38" line-length = 120 + +[tool.ruff.lint] select = [ "B", # flake8-bugbear "C4", # flake8-comprehensions @@ -74,7 +76,7 @@ ignore = [ "RUF012", # mutable-class-default ] -[tool.ruff.isort] +[tool.ruff.lint.isort] section-order = [ "future", "standard-library", @@ -87,7 +89,7 @@ section-order = [ "local-folder" ] -[tool.ruff.isort.sections] +[tool.ruff.lint.isort.sections] pytest = ["pytest"] django = ["django"] rest_framework = ["rest_framework"] From f326385780650a7b9f8ef0665d4bd599c9ebc5e1 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 17:03:32 +0100 Subject: [PATCH 12/41] feat: add zenodo urls to base Signed-off-by: David Wallace --- rdmo_zenodo/exports/base.py | 90 +++++++++++++------------------------ 1 file changed, 30 insertions(+), 60 deletions(-) diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index 9812651..17878a8 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -2,25 +2,20 @@ from django.conf import settings from django.shortcuts import reverse -from django.utils.translation import gettext_lazy as _ -from django.http import HttpResponse -from django.template import TemplateSyntaxError -from rdmo.core.utils import render_to_format from rdmo.projects.exports import Export -from rdmo.projects.utils import get_value_path from rdmo.services.providers import OauthProviderMixin -from rdmo.views.models import View logger = logging.getLogger(__name__) json_header = { 'Content-Type': 'application/json', } -binary_header= { +binary_header = { 'Content-Type': 'application/octet-stream', } + class BaseZenodoExportProvider(OauthProviderMixin, Export): RDMO_PLUGIN_KEY = None @@ -55,8 +50,32 @@ def redirect_path(self): raise ValueError("the RDMO_PLUGIN_KEY should be set as a class attribute") return reverse('oauth_callback', args=[self.RDMO_PLUGIN_KEY]) + @property + def authorization_header(self): + return self.get_authorization_headers(self.get_from_session(self.request, 'access_token')) + + @property + def authorized_binary_header(self): + return {**binary_header, **self.authorization_header} + + @property + def export_file_format(self): + return settings.ZENODO_PROVIDER.get('export_format', 'pdf') + + def record_uploads_url(self, record_id): + return f"{self.zenodo_url}/uploads/{record_id}" + + def record_url(self, record_id): + return f"{self.deposit_url}/{record_id}" + + def record_draft_url(self, record_id): + return f"{self.deposit_url}/{record_id}/draft" + + def record_versions_url(self, record_id): + return f"{self.deposit_url}/{record_id}/versions" + def record_file_url(self, record_id): - return f"{self.deposit_url}/records/{record_id}/draft/files" + return f"{self.record_draft_url(record_id)}/files" def record_file_content_url(self, record_id, file_key): return f"{self.record_file_url(record_id)}/{file_key}/content" @@ -64,6 +83,9 @@ def record_file_content_url(self, record_id, file_key): def record_file_commit_url(self, record_id, file_key): return f"{self.record_file_url(record_id)}/{file_key}/commit" + def record_publish_url(self, record_id): + return f"{self.record_draft_url(record_id)}/actions/publish" + def get_authorize_params(self, request, state): return { 'response_type': 'code', @@ -81,55 +103,3 @@ def get_callback_data(self, request): 'redirect_uri': request.build_absolute_uri(self.redirect_path), 'code': request.GET.get('code') } - - def render_project_views(self, project, snapshot, attachments_format, view=None): - - if view is None: - view = View.objects.get(uri="https://rdmorganiser.github.io/terms/views/variable_check") - - try: - rendered_view = view.render(project, snapshot) - except TemplateSyntaxError: - return HttpResponse() - - return render_to_format( - None, attachments_format, project.title, 'projects/project_view_export.html', { - 'format': attachments_format, - 'title': project.title, - 'view': view, - 'rendered_view': rendered_view, - 'resource_path': get_value_path(project, snapshot) - } - ) - - def render_snapshot_to_pdf(self, snapshot): - # get the pdf - return self.render_project_views(self.project, snapshot, "pdf") - - - def upload_file(self, record_id, binary=None): - """ - takes the record_id of the draft record and the file contents as binary data. - API_URL should be the zenodo API URL, e.g. - """ - # POST data / PDF - files_url = self.record_file_url(record_id) - file_data = [{"key": "DMP.pdf"}] - data_initialization_response = self.post(self.request, files_url, json=file_data) - - # upload the actual file content as binary stream - # extract files/content URL from the data upload response - breakpoint() - # files_content_url = data_initialization_response.json()["entries"][0]["links"]["content"] - files_content_url = self.record_file_content_url(record_id, file_data[0]['key']) - # self.deposit_url+f"records/{record_id}/draft/files/{file_data[0]['key']}/content" - ## multiple files as upload even possible??? ^^^ - - # upload with binary data - data_content_response = self.put(self.request, files_content_url, data=binary) #headers=binary_header, - - # commit the file upload - file_commit_url = self.deposit_url+f"records/{record_id}/draft/files/{file_data[0]['key']}/commit" - - data_commit_response = self.post(self.request, file_commit_url) - return data_commit_response From c7eef4e48d1e32f3584e1abdfa47dfc62ef71446 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 17:07:57 +0100 Subject: [PATCH 13/41] feat: add ZenodoMetadataExport class in metadata.py Signed-off-by: David Wallace --- rdmo_zenodo/exports/metadata.py | 134 ++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 rdmo_zenodo/exports/metadata.py diff --git a/rdmo_zenodo/exports/metadata.py b/rdmo_zenodo/exports/metadata.py new file mode 100644 index 0000000..84ed5da --- /dev/null +++ b/rdmo_zenodo/exports/metadata.py @@ -0,0 +1,134 @@ +from typing import Any, Dict, List + +from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist +from django.utils import timezone + +from rdmo.projects.exports import Export + + +class ZenodoMetadataExport(Export): + + rights_uri_options = { + 'dataset_license_types/71': 'cc-by-4.0', + 'dataset_license_types/73': 'cc-by-nc-4.0', + 'dataset_license_types/74': 'cc-by-nd-4.0', + 'dataset_license_types/75': 'cc-by-sa-4.0', + 'dataset_license_types/cc0': 'cc-zero' + } + default_resource_type = 'publication-datamanagementplan' + default_subjects = ['Data Management Plan', 'DMP'] + + def __init__(self, project=None, snapshot=None, set_index=None): + self.project = project + self.snapshot = snapshot + self.set_index = set_index + # Retrieve settings once at initialization + self.zenodo_settings = settings.ZENODO_PROVIDER + + def build_metadata(self) -> Dict[str, Any]: + """Build the metadata dictionary for Zenodo export, excluding empty fields.""" + metadata = { + 'resource_type': self._get_resource_type(), + 'creators': self._get_creators() if self._should_add_project_members() else [], + 'title': self._get_title(), + 'description': self._get_description(), + 'rights': self._get_rights(), + 'languages': self._get_languages(), + 'publisher': self._get_publisher(), + 'funding': self._get_funding(), + 'upload_type': self.zenodo_settings.get('upload_type', 'dataset'), + 'publication_date': timezone.localdate().isoformat(), + 'subjects': self._get_subjects(), + } + # Filter out empty values + return { + 'metadata': self._filter_empty_values(metadata) + } + + def _filter_empty_values(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + """Remove empty values from the metadata dictionary.""" + return {k: v for k, v in metadata.items() if v not in [None, '', [], {}]} + + def _get_resource_type(self) -> Dict[str, str]: + """Retrieve the resource type from settings or use default.""" + resource_type = self.zenodo_settings.get('resource_type', self.default_resource_type) + if self.snapshot is None and self.set_index is not None: + resource_type = 'dataset' + return {'id': resource_type} + + def _get_creators(self) -> List[Dict[str, Any]]: + """Build the list of creators from project members.""" + creators = [] + for user in self.project.user.all(): + creators.append({ + 'person_or_org': { + 'family_name': user.last_name, + 'given_name': user.first_name, + 'type': 'personal', + 'identifiers': self._get_identifiers(user) + } + }) + return creators + + def _get_identifiers(self, user) -> List[Dict[str, str]]: + """Retrieve ORCID identifier if available for the user.""" + try: + orcid = user.socialaccount_set.get(provider='orcid') + return [{'scheme': 'orcid', 'identifier': orcid.uid}] + except (ObjectDoesNotExist, AttributeError): + return [] + + def _should_add_project_members(self) -> bool: + """Determine if project members should be added as creators.""" + return self.zenodo_settings.get('add_project_members', False) + + def _get_title(self) -> str: + """Construct the title for the metadata.""" + title_from_snapshot = f"{self.project.title} - Snapshot: {self.snapshot.title}" if self.snapshot else None + return ( + title_from_snapshot or + self.get_text('project/dataset/title', set_index=self.set_index) or + self.get_text('project/dataset/id', set_index=self.set_index) or + f'Dataset #{self.set_index + 1}' + ) + + def _get_description(self) -> str: + """Construct the description for the metadata.""" + description = f"Data Management Plan for project {self.project.title}." + if self.snapshot is not None: + description += f" {self.snapshot.description}" + if self.set_index is not None: + dataset_title = self.get_text('project/dataset/title', set_index=self.set_index) + if dataset_title: + description += f" {dataset_title}" + return description + + def _get_rights(self) -> List[Dict[str, str]]: + """Retrieve the rights/license information from project metadata.""" + for rights in self.get_values('project/dataset/sharing/conditions', set_index=self.set_index): + if rights.option: + return [{'id': self.rights_uri_options.get(rights.option.uri_path)}] + return [] + + def _get_languages(self) -> List[Dict[str, str]]: + """Retrieve the language setting from configuration.""" + language = self.zenodo_settings.get('language') + return [{'id': language}] if language else [] + + def _get_publisher(self) -> str: + """Retrieve the publisher setting from configuration.""" + return self.zenodo_settings.get('publisher') + + def _get_funding(self) -> str: + """Retrieve the funding information from configuration.""" + return self.zenodo_settings.get('funding') + + def _get_subjects(self) -> List[Dict[str, str]]: + """Retrieve and construct the subjects for the metadata.""" + # Default subjects + subjects = [{'subject': i} for i in self.default_subjects] + # Add project-specific keywords + keywords = self.get_values('project/research_question/keywords') + subjects.extend({'subject': keyword.text} for keyword in keywords) + return subjects From 84f1c1b69a826f5c4457852fe2256a2ac6973655 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 19:20:50 +0100 Subject: [PATCH 14/41] feat: add authorized json headers and rename to records url Signed-off-by: David Wallace --- rdmo_zenodo/exports/base.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index 17878a8..f4d6909 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -40,10 +40,6 @@ def authorize_url(self): def token_url(self): return f'{self.zenodo_url}/oauth/token' - @property - def deposit_url(self): - return f'{self.zenodo_url}/api/records' - @property def redirect_path(self): if self.RDMO_PLUGIN_KEY is None: @@ -58,6 +54,10 @@ def authorization_header(self): def authorized_binary_header(self): return {**binary_header, **self.authorization_header} + @property + def authorized_json_header(self): + return {**json_header, **self.authorization_header} + @property def export_file_format(self): return settings.ZENODO_PROVIDER.get('export_format', 'pdf') @@ -65,14 +65,18 @@ def export_file_format(self): def record_uploads_url(self, record_id): return f"{self.zenodo_url}/uploads/{record_id}" + @property + def records_url(self): + return f'{self.zenodo_url}/api/records' + def record_url(self, record_id): - return f"{self.deposit_url}/{record_id}" + return f"{self.records_url}/{record_id}" def record_draft_url(self, record_id): - return f"{self.deposit_url}/{record_id}/draft" + return f"{self.records_url}/{record_id}/draft" def record_versions_url(self, record_id): - return f"{self.deposit_url}/{record_id}/versions" + return f"{self.records_url}/{record_id}/versions" def record_file_url(self, record_id): return f"{self.record_draft_url(record_id)}/files" From a452b5fbf7e687cb2b2114aaf96a0a6b0f0cb440 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 19:21:31 +0100 Subject: [PATCH 15/41] refactor: move forms to forms.py Signed-off-by: David Wallace --- rdmo_zenodo/exports/forms.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 rdmo_zenodo/exports/forms.py diff --git a/rdmo_zenodo/exports/forms.py b/rdmo_zenodo/exports/forms.py new file mode 100644 index 0000000..7e97d09 --- /dev/null +++ b/rdmo_zenodo/exports/forms.py @@ -0,0 +1,27 @@ +from django import forms +from django.utils.translation import gettext_lazy as _ + + +class ZenodoDatasetForm(forms.Form): + dataset = forms.CharField(label=_('Select dataset of your project')) + + def __init__(self, *args, **kwargs): + dataset_choices = kwargs.pop('dataset_choices') + super().__init__(*args, **kwargs) + + self.fields['dataset'].widget = forms.RadioSelect(choices=dataset_choices) + + +class ZenodoSnapshotForm(forms.Form): + snapshot = forms.ChoiceField( + label=_('Select snapshot of your project'), + required=False, # Allows empty selection + widget=forms.RadioSelect + ) + + def __init__(self, *args, **kwargs): + snapshot_choices = kwargs.pop('snapshot_choices', []) + super().__init__(*args, **kwargs) + snapshot_choices = [(None, _("Create new snapshot")), *snapshot_choices] + self.fields['snapshot'].choices = snapshot_choices + self.fields['snapshot'].initial = None From 3910fc70fbc9b92fdedd8e70a1336fbf4376358a Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 19:22:14 +0100 Subject: [PATCH 16/41] refactor: rename to records url Signed-off-by: David Wallace --- rdmo_zenodo/exports/export_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdmo_zenodo/exports/export_dataset.py b/rdmo_zenodo/exports/export_dataset.py index 550b1e2..802fb57 100644 --- a/rdmo_zenodo/exports/export_dataset.py +++ b/rdmo_zenodo/exports/export_dataset.py @@ -37,7 +37,7 @@ def submit(self): return redirect('project', self.project.id) if form.is_valid(): - url = self.deposit_url + url = self.records_url data = self.get_post_data(form.cleaned_data['dataset']) return self.post(self.request, url, data) else: From 2a617cccded69deee7694c878f2f7a43566d0725 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 19:25:51 +0100 Subject: [PATCH 17/41] feat: add publication and versioning workflow to provider Signed-off-by: David Wallace --- rdmo_zenodo/exports/publish.py | 116 ------------- rdmo_zenodo/exports/publish_snapshot.py | 198 +++++++++++++++++++++++ rdmo_zenodo/exports/zenodo_api_upload.py | 137 ---------------- 3 files changed, 198 insertions(+), 253 deletions(-) delete mode 100644 rdmo_zenodo/exports/publish.py create mode 100644 rdmo_zenodo/exports/publish_snapshot.py delete mode 100644 rdmo_zenodo/exports/zenodo_api_upload.py diff --git a/rdmo_zenodo/exports/publish.py b/rdmo_zenodo/exports/publish.py deleted file mode 100644 index 1bf4b1c..0000000 --- a/rdmo_zenodo/exports/publish.py +++ /dev/null @@ -1,116 +0,0 @@ -import logging - -from django import forms -from django.shortcuts import redirect, render -from django.utils.translation import gettext_lazy as _ - -from .base import BaseZenodoExportProvider - -logger = logging.getLogger(__name__) - - -class ZenodoPublishProvider(BaseZenodoExportProvider): - - RDMO_PLUGIN_KEY = "zenodo-publish" - - rights_uri_options = { - 'dataset_license_types/71': 'cc-by-4.0', - 'dataset_license_types/73': 'cc-by-nc-4.0', - 'dataset_license_types/74': 'cc-by-nd-4.0', - 'dataset_license_types/75': 'cc-by-sa-4.0', - 'dataset_license_types/cc0': 'cc-zero' - } - - class Form(forms.Form): - - snapshot = forms.CharField(label=_('Select snapshot of your project')) - - def __init__(self, *args, **kwargs): - snapshot_choices = kwargs.pop('snapshot_choices') - super().__init__(*args, **kwargs) - - self.fields['snapshot'].widget = forms.RadioSelect(choices=snapshot_choices) - - def render(self): - snapshot_choices = [(i.id,i.title) for i in self.project.snapshots.all()] - - self.store_in_session(self.request, 'snapshot_choices', snapshot_choices) - - form = self.Form( - snapshot_choices=snapshot_choices - ) - - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - - def upload_file_to_zenodo(self, file): - # placeholder for posting the file - return - - def submit(self): - snapshot_choices = self.project.snapshots.all() - form = self.Form(self.request.POST, snapshot_choices=snapshot_choices) - - if 'cancel' in self.request.POST: - return redirect('project', self.project.id) - - if form.is_valid(): - url = self.get_post_url() # deposit url - snapshot_id = form.cleaned_data['snapshot'] - snapshot = self.project.snapshots.get(id=snapshot_id) - data = self.get_post_data(snapshot) - zen_data_response = self.post(self.request, url, data) - rdmo_pdf_response = self.render_snapshot_to_pdf(snapshot) - _zen_pdf_response = self.upload_file_to_zenodo(rdmo_pdf_response.content) - return zen_data_response - else: - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) - - def post_success(self, request, response): - zenodo_url = response.json().get('links', {}).get('self_html') - if zenodo_url: - return redirect(zenodo_url) - else: - return render(request, 'core/error.html', { - 'title': _('ZENODO error'), - 'errors': [_('The URL of the new dataset could not be retrieved.')] - }, status=200) - - def get_post_url(self): - return self.deposit_url - - def get_post_data(self, snapshot): - # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata - metadata = {} - - # set the title from the title or id or the running index - metadata['title'] = snapshot.title - - # set the resource_type from the settings - metadata['resource_type'] = {'id': 'publication-datamanagementplan'} - - # set the description - description = snapshot.description or \ - f"Data Management Plan for project {snapshot.title}" - if description: - metadata['description'] = description - - # set subjects - metadata['subjects'] = [ - { - 'subject': 'Data Management Plan' - }, - { - 'subject': 'DMP' - } - ] - - # set keywords - # keywords = self.get_values('project/research_question/keywords', set_index=set_index) - # for keyword in keywords: - # metadata['subjects'].append({ - # 'subject': keyword.text - # }) - - return { - 'metadata': metadata - } diff --git a/rdmo_zenodo/exports/publish_snapshot.py b/rdmo_zenodo/exports/publish_snapshot.py new file mode 100644 index 0000000..7caaadf --- /dev/null +++ b/rdmo_zenodo/exports/publish_snapshot.py @@ -0,0 +1,198 @@ +import logging + +from django.shortcuts import redirect, render +from django.utils.formats import localize +from django.utils.translation import gettext_lazy as _ + +import requests + +from rdmo.projects.models import Project + +from .base import BaseZenodoExportProvider +from .forms import ZenodoSnapshotForm +from .metadata import ZenodoMetadataExport +from .utils import ( + clear_record_id_from_project_value, + get_or_create_snapshot, + get_record_id_from_project_value, + render_project_views, + set_record_id_on_project_value, +) + +logger = logging.getLogger(__name__) + + +class ZenodoPublishProvider(BaseZenodoExportProvider): + + RDMO_PLUGIN_KEY = "zenodo-publish" + + def get_snapshot_choices(self): + snapshots = self.project.snapshots.order_by('-created') + formatted_list = [(i.id, f"{i.title} (id={i.id}, {localize(i.created)})") + for i in snapshots] + return formatted_list + + def render(self): + snapshot_choices = self.get_snapshot_choices() + + self.store_in_session(self.request, 'snapshot_choices', snapshot_choices) + + form = ZenodoSnapshotForm( + snapshot_choices=snapshot_choices + ) + context = {'form': form } + + record_id = get_record_id_from_project_value(self.project) + if record_id: + context['record_id'] = self.record_uploads_url(record_id) + + return render(self.request, 'plugins/exports_zenodo.html', context=context, status=200) + + def submit(self): + snapshot_choices = self.get_from_session(self.request, 'snapshot_choices') + form = ZenodoSnapshotForm(self.request.POST, snapshot_choices=snapshot_choices) + + if 'cancel' in self.request.POST: + return redirect('project', self.project.id) + + if form.is_valid(): + url = self.records_url # deposit url + snapshot_id = form.cleaned_data['snapshot'] or None + snapshot = get_or_create_snapshot(self.project, snapshot_id=snapshot_id) + self.snapshot = snapshot # set class attribute for Export.get_values + + # store project and snapshot in session else they get lost after post + self.store_in_session(self.request, 'project_id', self.project.id) + self.store_in_session(self.request, 'snapshot_id', self.snapshot.id) + + record_versions_url = self.validate_record_id_from_project_value_at_zenodo() + if record_versions_url: + # if record exists then post new version to zenodo, no data required + url = record_versions_url + return self.post(self.request, url, {}) + else: + # else create new draft record + data = self.get_post_data(self.project, self.snapshot) + return self.post(self.request, url, data) + else: + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) + + def validate_record_id_from_project_value_at_zenodo(self): + """Validate the Zenodo record_id stored in the project.""" + + # Retrieve record_id from the project's stored values + record_id = get_record_id_from_project_value(self.project) + + if not record_id: + logger.warning("No record ID found in project values.") + return + + # Send a GET request to Zenodo to validate the record ID + response = requests.get(self.record_url(record_id), headers=self.authorization_header) + # response = self.get(self.request, record_url) + + # Check if the response is successful + if response.status_code == 200: + logger.info(f"Record ID {record_id} is valid.") + # the conceptrecid is the concept record identifier for all verions of this zenodo record + # https://inveniordm.docs.cern.ch/reference/metadata/#system-managed-persistent-identifiers + # in invenioRDM it is the parent.id field + concept_record_id = response.json()['conceptrecid'] + set_record_id_on_project_value(self.project, concept_record_id) + + versions_url = response.json().get('links', {}).get('versions') + return versions_url + elif response.status_code == 404: + logger.warning(f"Record ID {record_id} is invalid or not found in Zenodo.") + # the record_id does not exist, delete it from the project.value.text + clear_record_id_from_project_value(self.project) + else: + # Log any other unexpected response code + logger.error(f"Error validating record ID {record_id}: {response.status_code}") + + def get_project_and_snapshot_from_session(self, request): + project_id = self.get_from_session(request, 'project_id') + snapshot_id = self.get_from_session(request, 'snapshot_id') + project = Project.objects.filter_user(request.user).get(id=project_id) + snapshot = project.snapshots.get(id=snapshot_id) + return project, snapshot + + def post_success(self, request, response): + # the class attributes on self need to be redefined + # Retrieve project,snapshot from session + project, snapshot = self.get_project_and_snapshot_from_session(request) + self.project = project + self.snapshot = snapshot + + # and set request on self + self.request = request + + + if 'versions' in response.request.url and 'publication_date' not in response.json().get('metadata',{}): + # metadata needs to be posted to the new version + zenodo_api_url = response.json().get('links', {}).get('self') + data = self.get_post_data(self.project, self.snapshot) + version_update_response = requests.put(zenodo_api_url, json=data, headers=self.authorized_json_header) + logger.debug("PUT to %s", zenodo_api_url) + zenodo_url = response.json().get('links', {}).get('self_html') + response = version_update_response + else: + zenodo_url = response.json().get('links', {}).get('self_html') + + if zenodo_url: + record_id = response.json().get('id') + concept_record_id = response.json().get('conceptrecid') + files_url = response.json().get('links', {}).get('files') + _data_commit_pdf_response = self.post_export_file_to_zenodo(record_id=record_id, files_url=files_url, + attachment_format=self.export_file_format) + _publish_response = self.publish_draft_record(record_id=record_id) + + set_record_id_on_project_value(self.project, concept_record_id) + + return redirect(zenodo_url) + else: + return render(request, 'core/error.html', { + 'title': _('ZENODO error'), + 'errors': [_('The URL of the new dataset could not be retrieved.')] + }, status=200) + + def post_export_file_to_zenodo(self, record_id=None, files_url=None, attachment_format=None, export_filename=None): + # https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records/#draft-files + if record_id is None or files_url is None or attachment_format is None: + return + + rdmo_pdf_response = render_project_views(self.project, self.snapshot, attachment_format) + binary = rdmo_pdf_response.content + export_filename = export_filename or "rdmo_dmp" + filename = f"{export_filename}.{attachment_format}" + + # get access token from the session + draft_file_post_response = requests.post(files_url, headers=self.authorization_header, json=[{'key': filename}]) + entries = draft_file_post_response.json().get('entries', []) + draft_file_entry = next(filter(lambda i: i["key"] == filename, entries), None) + if draft_file_entry is None: + return + + content_url = draft_file_entry.get('links', {}).get('content') + _data_content_response = requests.put(content_url, headers=self.authorized_binary_header, data=binary) + logger.debug("PUT to %s", content_url) + + commit_url = draft_file_entry.get('links', {}).get('commit') + data_commit_response = requests.post(commit_url, headers=self.authorization_header) + logger.debug("POST to %s", commit_url) + + return data_commit_response + + def publish_draft_record(self, record_id=None): + # https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records/#publish-a-draft-record + if record_id is None: + return + publish_url = self.record_publish_url(record_id) + response = requests.post(publish_url, headers=self.authorization_header) + logger.debug("POST to %s", publish_url) + return response + + def get_post_data(self, project, snapshot): + # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata + metadata_builder = ZenodoMetadataExport(project=project, snapshot=snapshot) + return metadata_builder.build_metadata() diff --git a/rdmo_zenodo/exports/zenodo_api_upload.py b/rdmo_zenodo/exports/zenodo_api_upload.py deleted file mode 100644 index 87230ad..0000000 --- a/rdmo_zenodo/exports/zenodo_api_upload.py +++ /dev/null @@ -1,137 +0,0 @@ -import requests - -token = "token_generated_from_Zenodo_for_RDMO" - -API_URL = "https://sandbox.zenodo.org/api/" -URL = "https://sandbox.zenodo.org/" -DOI_URL = "https://doi.org/10.5072/zenodo." -PUBLISHER = "RDMO" - -json_header = { - "Accept": "application/json", - 'Content-Type': 'application/json', - 'Authorization': 'Bearer {}'.format(token) -} -binary_header= { - "Accept": "application/json", - 'Content-Type': 'application/octet-stream', - 'Authorization': 'Bearer {}'.format(token) -} -plain_header = { - "Accept": "application/json", - 'Authorization': 'Bearer {}'.format(token) -} - -# sample json data -sample_data = { - "metadata": { - "title": "RDMO_02", - "description": "schubidu, schubida", - "creators": [ - { - "person_or_org": { - "given_name": "Max", - "family_name": "Mustermann", - "type": "personal", - "identifiers": [{"identifier": "0001-0002-0003-0004"}] - }, - "affiliations": [{"name": "University of Test"}] - } - ], - "publication_date": "2024-10-17", - "publisher": PUBLISHER, - "resource_type": {"id": "publication-datamanagementplan"}, - "subjects": [{"subject": "Data Management Plan"}, {"subject": "DMP"}] - } -} - -def convert_json(input_json, publication_date, publisher): - """ - Convert the metadata output of a Zenodo response into a - format that gets accepted as input. - """ - output_json = {} - output_json['metadata'] = { - 'title': input_json['metadata']['title'], - 'description': input_json['metadata']['description'], - 'publication_date': publication_date, - 'publisher': publisher, - 'creators': [{ - 'person_or_org': { - 'given_name': creator['name'].split(', ')[1], - 'family_name': creator['name'].split(', ')[0], - 'type': 'personal', - 'identifiers': [{'identifier': creator['orcid']}] - }, - 'affiliations': [{'name': creator['affiliation']}] - } for creator in input_json['metadata']['creators']], - 'resource_type': { - 'id': "publication-datamanagementplan" - }, - 'subjects': [{'subject': keyword} for keyword in input_json['metadata']['keywords']] - } - - return output_json - -def upload_file(id, binary=None): - # POST data / PDF - files_url = API_URL+f"records/{id}/draft/files" - file_data = [{"key": "DMP.pdf"}] - data_initialization_response = requests.post(files_url, headers=json_header, json=file_data) - - # upload the actual file content as binary stream - files_content_url = API_URL+f"records/{id}/draft/files/{file_data[0]['key']}/content" - ## multiple files as upload realistic??? ^^^ - - # upload data from a file path - file_path = r'C:\Users\path\to\my.pdf' - - # Open the file in binary mode - with open(file_path, 'rb') as file: - data_content_response = requests.put(files_content_url, headers=binary_header, data=file) - - # alternative with binary data - #data_content_response = requests.put(files_content_url, headers=binary_header, data=binary) - - # commit the file upload - file_commit_url = API_URL+f"records/{id}/draft/files/{file_data[0]['key']}/commit" - data_commit_response = requests.post(file_commit_url, headers=plain_header) - -def create_record(metadata): - records_url = API_URL+"records" - draft_response = requests.post(records_url, headers=json_header, json=metadata) - # return the id of the draft - return draft_response.json()["id"] - -def get_publication_url(id): - return URL+f"uploads/{id}" - -def get_doi(id): - """ - id is the Zenodo record id - """ - return DOI_URL+f"{id}" - -def get_overarching_doi(id_v1): - """ - we need the id of v1 - overarching id = id of v1 -1 - """ - return DOI_URL+f"{int(id_v1) - 1}" - - -def create_new_version(id_v1, new_date, publisher=PUBLISHER): - """ - id of v1 needed, NOT "overall" id!!! - new_date: e.g. "2024-10-17" - returns id of the new draft - """ - new_version_response = requests.post(API_URL+f"records/{id_v1}/versions", headers=plain_header).json() - new_id = new_version_response["id"] - # get metadata from the draft and add new publication date - new_data = convert_json(new_version_response, new_date, publisher) - update_url = API_URL+f"records/{new_id}/draft" - # update draft dataset with the new publication date - draft_response = requests.put(update_url, headers=json_header, json=new_data) - - return draft_response.json()["id"] From ac3ace1882753f7c5ba4be28328b25cdf6d5e9c8 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 19:26:27 +0100 Subject: [PATCH 18/41] refactor: move rdmo db methods to utils.py Signed-off-by: David Wallace --- rdmo_zenodo/exports/utils.py | 83 ++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 rdmo_zenodo/exports/utils.py diff --git a/rdmo_zenodo/exports/utils.py b/rdmo_zenodo/exports/utils.py new file mode 100644 index 0000000..045ce55 --- /dev/null +++ b/rdmo_zenodo/exports/utils.py @@ -0,0 +1,83 @@ +from django.http import HttpResponse +from django.template import TemplateSyntaxError + +from rdmo.core.utils import render_to_format +from rdmo.domain.models import Attribute +from rdmo.projects.models.snapshot import Snapshot +from rdmo.projects.models.value import Value +from rdmo.projects.utils import get_value_path +from rdmo.views.models import View + +attribute_doi_uri_prefix = "https://rdmorganiser.github.io/terms" +attribute_doi_uri_key = "project/metadata/publication/zenodo_id" + +def get_or_create_snapshot(project, snapshot_id=None): + if snapshot_id is None: + new_snapshot_title_id = project.snapshots.count() + 1 + description = f"{project.description}." + description += f"\nThis snapshot({new_snapshot_title_id}.) was automatically generated." + snapshot = Snapshot(project=project, + title=f"{project.title} {new_snapshot_title_id}", + description=description) + snapshot.save() + else: + snapshot = project.snapshots.get(id=snapshot_id) + + return snapshot + +def get_project_value_with_record_id(project): + record_id_attribute, _created = Attribute.objects.get_or_create(uri_prefix=attribute_doi_uri_prefix, + key=attribute_doi_uri_key) + project_doi_value = project.values.filter(attribute=record_id_attribute).first() + return project_doi_value, record_id_attribute + + +def get_record_id_from_project_value(project): + # get attribute + + project_doi_value, _ = get_project_value_with_record_id(project) + + if project_doi_value is not None: + return project_doi_value.text + else: + return None + +def set_record_id_on_project_value(project, record_id): + if project is None or record_id is None: + return + + project_doi_value, record_id_attribute = get_project_value_with_record_id(project) + + if project_doi_value is None: + # create the value with text and add it + value = Value(project=project, attribute=record_id_attribute, text=record_id) + value.save() + project.values.add(value) + elif project_doi_value.text != record_id: + # update and overwrite the value.text + project_doi_value.text = record_id + project_doi_value.save() + +def clear_record_id_from_project_value(project): + """Clear the record_id text from the project's values by setting it to an empty string.""" + set_record_id_on_project_value(project, '') + +def render_project_views(project, snapshot, attachments_format, view=None): + + if view is None: + view = View.objects.get(uri="https://rdmorganiser.github.io/terms/views/variable_check") + + try: + rendered_view = view.render(project, snapshot) + except TemplateSyntaxError: + return HttpResponse() + + return render_to_format( + None, attachments_format, project.title, 'projects/project_view_export.html', { + 'format': attachments_format, + 'title': project.title, + 'view': view, + 'rendered_view': rendered_view, + 'resource_path': get_value_path(project, snapshot) + } + ) From fa589c9f8c4aaca4a3e9815b634a6b7fa02f292d Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 4 Nov 2024 19:27:39 +0100 Subject: [PATCH 19/41] feat: add optional zenodo record url to export template Signed-off-by: David Wallace --- rdmo_zenodo/templates/plugins/exports_zenodo.html | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rdmo_zenodo/templates/plugins/exports_zenodo.html b/rdmo_zenodo/templates/plugins/exports_zenodo.html index b336f85..23f9e01 100644 --- a/rdmo_zenodo/templates/plugins/exports_zenodo.html +++ b/rdmo_zenodo/templates/plugins/exports_zenodo.html @@ -5,6 +5,13 @@ {% block page %}

{% trans 'Export to Zenodo' %}

+ {% if record_id %} +

+ {% trans 'This project has a Zenodo record id' %}: {{ record_id }} +

+ {% else %} +

{% trans 'This project does not have a Zenodo record id.' %}

+ {% endif %} {% bootstrap_form submit=_('Export to Zenodo') %} From 82beed17a804a752207bf1294647d2b0313feb6c Mon Sep 17 00:00:00 2001 From: David Wallace Date: Fri, 25 Jul 2025 18:27:23 +0200 Subject: [PATCH 20/41] use plugin.key instead Signed-off-by: David Wallace --- rdmo_zenodo/exports/base.py | 6 +----- rdmo_zenodo/exports/export_dataset.py | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index f4d6909..687e9c9 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -18,8 +18,6 @@ class BaseZenodoExportProvider(OauthProviderMixin, Export): - RDMO_PLUGIN_KEY = None - @property def client_id(self): return settings.ZENODO_PROVIDER['client_id'] @@ -42,9 +40,7 @@ def token_url(self): @property def redirect_path(self): - if self.RDMO_PLUGIN_KEY is None: - raise ValueError("the RDMO_PLUGIN_KEY should be set as a class attribute") - return reverse('oauth_callback', args=[self.RDMO_PLUGIN_KEY]) + return reverse('oauth_callback', args=[self.key]) @property def authorization_header(self): diff --git a/rdmo_zenodo/exports/export_dataset.py b/rdmo_zenodo/exports/export_dataset.py index 802fb57..f7bb2ae 100644 --- a/rdmo_zenodo/exports/export_dataset.py +++ b/rdmo_zenodo/exports/export_dataset.py @@ -12,8 +12,6 @@ class ZenodoExportProvider(BaseZenodoExportProvider): - RDMO_PLUGIN_KEY = "zenodo" - def get_dataset_choices(self): datasets = self.get_set('project/dataset/id') return [(dataset.set_index, dataset.value) for dataset in datasets] From a641b2c7aabb073f7359765daf56503a01d1fedd Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 28 Jul 2025 16:19:09 +0200 Subject: [PATCH 21/41] refactor metadata into package with builder classes Signed-off-by: David Wallace --- rdmo_zenodo/exports/metadata.py | 134 ------------------ rdmo_zenodo/exports/metadata/__init__.py | 0 rdmo_zenodo/exports/metadata/base.py | 79 +++++++++++ rdmo_zenodo/exports/metadata/dataset.py | 24 ++++ rdmo_zenodo/exports/metadata/model.py | 40 ++++++ rdmo_zenodo/exports/metadata/snapshot.py | 29 ++++ .../templates/plugins/publish_zenodo.html | 0 7 files changed, 172 insertions(+), 134 deletions(-) delete mode 100644 rdmo_zenodo/exports/metadata.py create mode 100644 rdmo_zenodo/exports/metadata/__init__.py create mode 100644 rdmo_zenodo/exports/metadata/base.py create mode 100644 rdmo_zenodo/exports/metadata/dataset.py create mode 100644 rdmo_zenodo/exports/metadata/model.py create mode 100644 rdmo_zenodo/exports/metadata/snapshot.py create mode 100644 rdmo_zenodo/templates/plugins/publish_zenodo.html diff --git a/rdmo_zenodo/exports/metadata.py b/rdmo_zenodo/exports/metadata.py deleted file mode 100644 index 84ed5da..0000000 --- a/rdmo_zenodo/exports/metadata.py +++ /dev/null @@ -1,134 +0,0 @@ -from typing import Any, Dict, List - -from django.conf import settings -from django.core.exceptions import ObjectDoesNotExist -from django.utils import timezone - -from rdmo.projects.exports import Export - - -class ZenodoMetadataExport(Export): - - rights_uri_options = { - 'dataset_license_types/71': 'cc-by-4.0', - 'dataset_license_types/73': 'cc-by-nc-4.0', - 'dataset_license_types/74': 'cc-by-nd-4.0', - 'dataset_license_types/75': 'cc-by-sa-4.0', - 'dataset_license_types/cc0': 'cc-zero' - } - default_resource_type = 'publication-datamanagementplan' - default_subjects = ['Data Management Plan', 'DMP'] - - def __init__(self, project=None, snapshot=None, set_index=None): - self.project = project - self.snapshot = snapshot - self.set_index = set_index - # Retrieve settings once at initialization - self.zenodo_settings = settings.ZENODO_PROVIDER - - def build_metadata(self) -> Dict[str, Any]: - """Build the metadata dictionary for Zenodo export, excluding empty fields.""" - metadata = { - 'resource_type': self._get_resource_type(), - 'creators': self._get_creators() if self._should_add_project_members() else [], - 'title': self._get_title(), - 'description': self._get_description(), - 'rights': self._get_rights(), - 'languages': self._get_languages(), - 'publisher': self._get_publisher(), - 'funding': self._get_funding(), - 'upload_type': self.zenodo_settings.get('upload_type', 'dataset'), - 'publication_date': timezone.localdate().isoformat(), - 'subjects': self._get_subjects(), - } - # Filter out empty values - return { - 'metadata': self._filter_empty_values(metadata) - } - - def _filter_empty_values(self, metadata: Dict[str, Any]) -> Dict[str, Any]: - """Remove empty values from the metadata dictionary.""" - return {k: v for k, v in metadata.items() if v not in [None, '', [], {}]} - - def _get_resource_type(self) -> Dict[str, str]: - """Retrieve the resource type from settings or use default.""" - resource_type = self.zenodo_settings.get('resource_type', self.default_resource_type) - if self.snapshot is None and self.set_index is not None: - resource_type = 'dataset' - return {'id': resource_type} - - def _get_creators(self) -> List[Dict[str, Any]]: - """Build the list of creators from project members.""" - creators = [] - for user in self.project.user.all(): - creators.append({ - 'person_or_org': { - 'family_name': user.last_name, - 'given_name': user.first_name, - 'type': 'personal', - 'identifiers': self._get_identifiers(user) - } - }) - return creators - - def _get_identifiers(self, user) -> List[Dict[str, str]]: - """Retrieve ORCID identifier if available for the user.""" - try: - orcid = user.socialaccount_set.get(provider='orcid') - return [{'scheme': 'orcid', 'identifier': orcid.uid}] - except (ObjectDoesNotExist, AttributeError): - return [] - - def _should_add_project_members(self) -> bool: - """Determine if project members should be added as creators.""" - return self.zenodo_settings.get('add_project_members', False) - - def _get_title(self) -> str: - """Construct the title for the metadata.""" - title_from_snapshot = f"{self.project.title} - Snapshot: {self.snapshot.title}" if self.snapshot else None - return ( - title_from_snapshot or - self.get_text('project/dataset/title', set_index=self.set_index) or - self.get_text('project/dataset/id', set_index=self.set_index) or - f'Dataset #{self.set_index + 1}' - ) - - def _get_description(self) -> str: - """Construct the description for the metadata.""" - description = f"Data Management Plan for project {self.project.title}." - if self.snapshot is not None: - description += f" {self.snapshot.description}" - if self.set_index is not None: - dataset_title = self.get_text('project/dataset/title', set_index=self.set_index) - if dataset_title: - description += f" {dataset_title}" - return description - - def _get_rights(self) -> List[Dict[str, str]]: - """Retrieve the rights/license information from project metadata.""" - for rights in self.get_values('project/dataset/sharing/conditions', set_index=self.set_index): - if rights.option: - return [{'id': self.rights_uri_options.get(rights.option.uri_path)}] - return [] - - def _get_languages(self) -> List[Dict[str, str]]: - """Retrieve the language setting from configuration.""" - language = self.zenodo_settings.get('language') - return [{'id': language}] if language else [] - - def _get_publisher(self) -> str: - """Retrieve the publisher setting from configuration.""" - return self.zenodo_settings.get('publisher') - - def _get_funding(self) -> str: - """Retrieve the funding information from configuration.""" - return self.zenodo_settings.get('funding') - - def _get_subjects(self) -> List[Dict[str, str]]: - """Retrieve and construct the subjects for the metadata.""" - # Default subjects - subjects = [{'subject': i} for i in self.default_subjects] - # Add project-specific keywords - keywords = self.get_values('project/research_question/keywords') - subjects.extend({'subject': keyword.text} for keyword in keywords) - return subjects diff --git a/rdmo_zenodo/exports/metadata/__init__.py b/rdmo_zenodo/exports/metadata/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rdmo_zenodo/exports/metadata/base.py b/rdmo_zenodo/exports/metadata/base.py new file mode 100644 index 0000000..3523d1b --- /dev/null +++ b/rdmo_zenodo/exports/metadata/base.py @@ -0,0 +1,79 @@ +from dataclasses import dataclass, field +from typing import Any + +from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist + +from .model import ZenodoMetadata + +RIGHTS_URI_OPTIONS = { + 'dataset_license_types/71': 'cc-by-4.0', + 'dataset_license_types/73': 'cc-by-nc-4.0', + 'dataset_license_types/74': 'cc-by-nd-4.0', + 'dataset_license_types/75': 'cc-by-sa-4.0', + 'dataset_license_types/cc0': 'cc-zero' +} +DEFAULT_SUBJECTS = ['Data Management Plan', 'DMP'] + + +@dataclass +class ZenodoMetadataBuilder: + title: str + description: str + rights_uri_paths: list[str] = field(default_factory=list) + keywords: list[str] = field(default_factory=list) + project_users: list[Any] = field(default_factory=list) + + def get_creators(self) -> list[dict[str, dict]]: + if not settings.ZENODO_PROVIDER.get('add_project_members'): + return [] + + creators = [] + for user in self.project_users: + person = { + "family_name": user.last_name, + "given_name": user.first_name, + "identifiers": self.get_user_identifiers(user), + "type": "personal" + } + creators.append({"person_or_org": person}) + return creators + + @staticmethod + def get_user_identifiers(user) -> list[dict[str, str]]: + # there may also be other providers that have uids in extra_data + try: + orcid = user.socialaccount_set.get(provider="orcid") + except (ObjectDoesNotExist, AttributeError): + return [] + else: + return [{"scheme": "orcid", "identifier": orcid.uid}] + + @staticmethod + def get_rights_from_uri_paths(rights_options) -> list[dict[str, str]]: + for uri_path in rights_options: + license_id = RIGHTS_URI_OPTIONS.get(uri_path) + if license_id: + return [{"id": license_id}] + return [] + + @staticmethod + def get_subjects_from_keywords(keywords) -> list[dict[str, str]]: + subjects = [{"subject": s} for s in DEFAULT_SUBJECTS] + for keyword in keywords: + subjects.append({"subject": keyword}) + return subjects + + @staticmethod + def get_languages() -> list[dict[str, str]]: + language = settings.ZENODO_PROVIDER.get("language") + if language: + return [{"id": language}] + else: + return [] + + def build_metadata(self) -> ZenodoMetadata: + raise NotImplementedError() + + def to_post_data(self, filter_empty=False): + return {'metadata': self.build_metadata().to_dict(filter_empty=filter_empty)} diff --git a/rdmo_zenodo/exports/metadata/dataset.py b/rdmo_zenodo/exports/metadata/dataset.py new file mode 100644 index 0000000..3ab5988 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/dataset.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass + +from django.conf import settings + +from .base import ZenodoMetadataBuilder +from .model import ZenodoMetadata + + +@dataclass +class ZenodoMetadataDatasetBuilder(ZenodoMetadataBuilder): + + def build_metadata(self) -> ZenodoMetadata: + return ZenodoMetadata( + resource_type={"id": settings.ZENODO_PROVIDER.get("resource_type", "dataset")}, + title=self.title, + description=self.description, + rights=self.get_rights_from_uri_paths(self.rights_uri_paths), + creators=self.get_creators(), + subjects=self.get_subjects_from_keywords(self.keywords), + languages=self.get_languages(), + upload_type=settings.ZENODO_PROVIDER.get("upload_type", "dataset"), + publisher=settings.ZENODO_PROVIDER.get("publisher"), + funding=settings.ZENODO_PROVIDER.get("funding") + ) diff --git a/rdmo_zenodo/exports/metadata/model.py b/rdmo_zenodo/exports/metadata/model.py new file mode 100644 index 0000000..6e0f1e5 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/model.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass, field +from typing import Optional + + +@dataclass +class ZenodoMetadata: + resource_type: dict[str, str] + title: str + description: str + creators: list[dict[str, dict]] + upload_type: Optional[str] = None + rights: Optional[list[dict[str, str]]] = None + subjects: Optional[list[dict[str, str]]] = field(default_factory=list) + languages: Optional[list[dict[str, str]]] = None + publisher: Optional[str] = None + funding: Optional[str] = None + publication_date: Optional[str] = None + + def to_dict(self, filter_empty: Optional[bool] = False) -> dict[str, dict]: + """Return dict suitable for POST to Zenodo.""" + metadata = { + "resource_type": self.resource_type, + "title": self.title, + "description": self.description, + "creators": self.creators, + "upload_type": self.upload_type, + "rights": self.rights, + "subjects": self.subjects, + "languages": self.languages, + "publisher": self.publisher, + "funding": self.funding, + "publication_date": self.publication_date, + } + if filter_empty: + return self.filter_empty(metadata) + + return metadata + + def filter_empty(self, metadata: dict[str, any]) -> dict[str, any]: + return {k: v for k, v in metadata.items() if v not in [None, '', [], {}]} diff --git a/rdmo_zenodo/exports/metadata/snapshot.py b/rdmo_zenodo/exports/metadata/snapshot.py new file mode 100644 index 0000000..755cb82 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/snapshot.py @@ -0,0 +1,29 @@ +from dataclasses import dataclass +from typing import Optional + +from django.conf import settings +from django.utils import timezone + +from .base import ZenodoMetadataBuilder +from .model import ZenodoMetadata + + +@dataclass +class ZenodoMetadataSnapshotBuilder(ZenodoMetadataBuilder): + + publication_date: Optional[str] = None + + def build_metadata(self) -> ZenodoMetadata: + return ZenodoMetadata( + resource_type={"id": "publication-datamanagementplan"}, + title=self.title, + description=self.description, + rights=self.get_rights_from_uri_paths(self.rights_uri_paths), + creators=self.get_creators(), + subjects=self.get_subjects_from_keywords(self.keywords), + languages=self.get_languages(), + upload_type=settings.ZENODO_PROVIDER.get("upload_type", "publication-datamanagementplan"), + publisher=settings.ZENODO_PROVIDER.get("publisher"), + funding=settings.ZENODO_PROVIDER.get("funding"), + publication_date=self.publication_date or timezone.localdate().isoformat(), + ) diff --git a/rdmo_zenodo/templates/plugins/publish_zenodo.html b/rdmo_zenodo/templates/plugins/publish_zenodo.html new file mode 100644 index 0000000..e69de29 From f216d40e22b10f652dafaffd2c6b34df2f1cd3a2 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 28 Jul 2025 16:28:27 +0200 Subject: [PATCH 22/41] add view and export format to snapshot form Signed-off-by: David Wallace --- rdmo_zenodo/exports/forms.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/rdmo_zenodo/exports/forms.py b/rdmo_zenodo/exports/forms.py index 7e97d09..60c97e8 100644 --- a/rdmo_zenodo/exports/forms.py +++ b/rdmo_zenodo/exports/forms.py @@ -1,4 +1,5 @@ from django import forms +from django.conf import settings from django.utils.translation import gettext_lazy as _ @@ -18,10 +19,22 @@ class ZenodoSnapshotForm(forms.Form): required=False, # Allows empty selection widget=forms.RadioSelect ) + view = forms.ChoiceField( + label=_("Select the view with which your project will be published"), + required=False, # Allows empty selection + widget=forms.Select, + ) + export_format = forms.ChoiceField( + label=_("Select the export format"), + required=False, widget=forms.Select, + choices=settings.EXPORT_FORMATS + ) def __init__(self, *args, **kwargs): snapshot_choices = kwargs.pop('snapshot_choices', []) + view_choices = kwargs.pop("view_choices", []) super().__init__(*args, **kwargs) snapshot_choices = [(None, _("Create new snapshot")), *snapshot_choices] self.fields['snapshot'].choices = snapshot_choices self.fields['snapshot'].initial = None + self.fields['view'].choices = view_choices From 9c289ad866497dff2542f8dbbee5a457809e9b82 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 28 Jul 2025 17:14:23 +0200 Subject: [PATCH 23/41] add publication type and define resource and upload types Signed-off-by: David Wallace --- rdmo_zenodo/exports/metadata/base.py | 3 +++ rdmo_zenodo/exports/metadata/dataset.py | 7 +++++-- rdmo_zenodo/exports/metadata/model.py | 2 ++ rdmo_zenodo/exports/metadata/snapshot.py | 12 ++++++++++-- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/rdmo_zenodo/exports/metadata/base.py b/rdmo_zenodo/exports/metadata/base.py index 3523d1b..5590181 100644 --- a/rdmo_zenodo/exports/metadata/base.py +++ b/rdmo_zenodo/exports/metadata/base.py @@ -24,6 +24,9 @@ class ZenodoMetadataBuilder: keywords: list[str] = field(default_factory=list) project_users: list[Any] = field(default_factory=list) + def get_resource_type(self): + {"id": settings.ZENODO_PROVIDER.get("resource_type", self.resource_type)} + def get_creators(self) -> list[dict[str, dict]]: if not settings.ZENODO_PROVIDER.get('add_project_members'): return [] diff --git a/rdmo_zenodo/exports/metadata/dataset.py b/rdmo_zenodo/exports/metadata/dataset.py index 3ab5988..f2c5978 100644 --- a/rdmo_zenodo/exports/metadata/dataset.py +++ b/rdmo_zenodo/exports/metadata/dataset.py @@ -9,16 +9,19 @@ @dataclass class ZenodoMetadataDatasetBuilder(ZenodoMetadataBuilder): + resource_type = "dataset" + upload_type = "dataset" + def build_metadata(self) -> ZenodoMetadata: return ZenodoMetadata( - resource_type={"id": settings.ZENODO_PROVIDER.get("resource_type", "dataset")}, + resource_type={"id": settings.ZENODO_PROVIDER.get("resource_type", self.resource_type)}, title=self.title, description=self.description, rights=self.get_rights_from_uri_paths(self.rights_uri_paths), creators=self.get_creators(), subjects=self.get_subjects_from_keywords(self.keywords), languages=self.get_languages(), - upload_type=settings.ZENODO_PROVIDER.get("upload_type", "dataset"), + upload_type=settings.ZENODO_PROVIDER.get("upload_type", self.upload_type), publisher=settings.ZENODO_PROVIDER.get("publisher"), funding=settings.ZENODO_PROVIDER.get("funding") ) diff --git a/rdmo_zenodo/exports/metadata/model.py b/rdmo_zenodo/exports/metadata/model.py index 6e0f1e5..c56264d 100644 --- a/rdmo_zenodo/exports/metadata/model.py +++ b/rdmo_zenodo/exports/metadata/model.py @@ -15,6 +15,7 @@ class ZenodoMetadata: publisher: Optional[str] = None funding: Optional[str] = None publication_date: Optional[str] = None + publication_type: Optional[str] = None def to_dict(self, filter_empty: Optional[bool] = False) -> dict[str, dict]: """Return dict suitable for POST to Zenodo.""" @@ -30,6 +31,7 @@ def to_dict(self, filter_empty: Optional[bool] = False) -> dict[str, dict]: "publisher": self.publisher, "funding": self.funding, "publication_date": self.publication_date, + "publication_type": self.publication_type, } if filter_empty: return self.filter_empty(metadata) diff --git a/rdmo_zenodo/exports/metadata/snapshot.py b/rdmo_zenodo/exports/metadata/snapshot.py index 755cb82..4b29986 100644 --- a/rdmo_zenodo/exports/metadata/snapshot.py +++ b/rdmo_zenodo/exports/metadata/snapshot.py @@ -12,18 +12,26 @@ class ZenodoMetadataSnapshotBuilder(ZenodoMetadataBuilder): publication_date: Optional[str] = None + resource_type = "publication-datamanagementplan" + upload_type = "publication" + publication_type = "datamanagementplan" def build_metadata(self) -> ZenodoMetadata: return ZenodoMetadata( - resource_type={"id": "publication-datamanagementplan"}, + resource_type={"id": settings.ZENODO_PROVIDER.get("resource_type", self.resource_type)}, title=self.title, description=self.description, rights=self.get_rights_from_uri_paths(self.rights_uri_paths), creators=self.get_creators(), subjects=self.get_subjects_from_keywords(self.keywords), languages=self.get_languages(), - upload_type=settings.ZENODO_PROVIDER.get("upload_type", "publication-datamanagementplan"), + upload_type=settings.ZENODO_PROVIDER.get("upload_type", self.upload_type), + publication_type=self.get_publication_type(), publisher=settings.ZENODO_PROVIDER.get("publisher"), funding=settings.ZENODO_PROVIDER.get("funding"), publication_date=self.publication_date or timezone.localdate().isoformat(), ) + + def get_publication_type(self): + if self.upload_type == "publication": + return settings.ZENODO_PROVIDER.get("publication_type", self.publication_type) From ac51f2546b334093e0fff2d9517a69d327ea47de Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 28 Jul 2025 17:15:36 +0200 Subject: [PATCH 24/41] use metadata builder in get post data Signed-off-by: David Wallace --- rdmo_zenodo/exports/export_dataset.py | 31 ++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/rdmo_zenodo/exports/export_dataset.py b/rdmo_zenodo/exports/export_dataset.py index f7bb2ae..3e424b3 100644 --- a/rdmo_zenodo/exports/export_dataset.py +++ b/rdmo_zenodo/exports/export_dataset.py @@ -1,11 +1,13 @@ import logging +from django.conf import settings from django.shortcuts import redirect, render from django.utils.translation import gettext_lazy as _ +from rdmo_zenodo.exports.metadata.dataset import ZenodoMetadataDatasetBuilder + from .base import BaseZenodoExportProvider from .forms import ZenodoDatasetForm -from .metadata import ZenodoMetadataExport logger = logging.getLogger(__name__) @@ -53,5 +55,28 @@ def post_success(self, request, response): def get_post_data(self, set_index): # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata - metadata_builder = ZenodoMetadataExport(project=self.project, set_index=set_index) - return metadata_builder.build_metadata() + dataset_title = self.get_text("project/dataset/title", set_index=set_index) + title = ( + dataset_title or + self.get_text('project/dataset/id', set_index=set_index) or + f'Dataset #{int(set_index) + 1}' + ) + description = f"Data Management Plan for project {self.project.title}." + + if dataset_title: + description += f" {dataset_title}" + + metadata_builder = ZenodoMetadataDatasetBuilder( + title=title, + description=description, + keywords=[ + i.text + for i in self.get_values("project/research_question/keywords") if i.text + ], + rights_uri_paths=[ + i.option.uri_path + for i in self.get_values("project/dataset/sharing/conditions", set_index=set_index) if i.option + ], + project_users=self.project.user.all() if settings.ZENODO_PROVIDER.get("add_project_members") else [], + ) + return metadata_builder.to_post_data(filter_empty=True) From bcf4776dfdf22629aa56d3a2c27ffeb86e11336e Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 28 Jul 2025 17:18:25 +0200 Subject: [PATCH 25/41] add view and format selection and use metadata builder in get post data Signed-off-by: David Wallace --- rdmo_zenodo/exports/base.py | 4 - rdmo_zenodo/exports/publish_snapshot.py | 169 ++++++++++++++++-------- 2 files changed, 111 insertions(+), 62 deletions(-) diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index 687e9c9..e6a34fe 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -54,10 +54,6 @@ def authorized_binary_header(self): def authorized_json_header(self): return {**json_header, **self.authorization_header} - @property - def export_file_format(self): - return settings.ZENODO_PROVIDER.get('export_format', 'pdf') - def record_uploads_url(self, record_id): return f"{self.zenodo_url}/uploads/{record_id}" diff --git a/rdmo_zenodo/exports/publish_snapshot.py b/rdmo_zenodo/exports/publish_snapshot.py index 7caaadf..28b44d6 100644 --- a/rdmo_zenodo/exports/publish_snapshot.py +++ b/rdmo_zenodo/exports/publish_snapshot.py @@ -1,21 +1,24 @@ import logging +from django.conf import settings from django.shortcuts import redirect, render from django.utils.formats import localize +from django.utils.text import slugify from django.utils.translation import gettext_lazy as _ import requests from rdmo.projects.models import Project +from rdmo_zenodo.exports.metadata.snapshot import ZenodoMetadataSnapshotBuilder + from .base import BaseZenodoExportProvider from .forms import ZenodoSnapshotForm -from .metadata import ZenodoMetadataExport from .utils import ( clear_record_id_from_project_value, get_or_create_snapshot, get_record_id_from_project_value, - render_project_views, + render_and_export_project_from_view, set_record_id_on_project_value, ) @@ -24,21 +27,49 @@ class ZenodoPublishProvider(BaseZenodoExportProvider): - RDMO_PLUGIN_KEY = "zenodo-publish" + view = None + export_format = None def get_snapshot_choices(self): - snapshots = self.project.snapshots.order_by('-created') - formatted_list = [(i.id, f"{i.title} (id={i.id}, {localize(i.created)})") - for i in snapshots] - return formatted_list + return [ + (i.id, f"{i.title} ({localize(i.created)})") + for i in self.project.snapshots.order_by('-created') + ] + + def get_view_choices(self): + return [ + (i.id, f"{i.title}") + for i in self.project.views.all() + ] + + def get_from_session_and_set_on_self(self, request): + self.project = self.get_project_from_session(request) + self.snapshot = self.get_snapshot_from_session(request, self.project) + self.view = self.get_view_from_session(request, self.project) + self.export_format = self.get_from_session(request, 'export_format') + + def get_project_from_session(self, request): + project_id = self.get_from_session(request, 'project_id') + return Project.objects.filter_user(request.user).get(id=project_id) + + def get_snapshot_from_session(self, request, project): + snapshot_id = self.get_from_session(request, 'snapshot_id') + return project.snapshots.get(id=snapshot_id) + + def get_view_from_session(self, request, project): + view_id = self.get_from_session(request, 'view_id') + return project.views.get(id=view_id) def render(self): snapshot_choices = self.get_snapshot_choices() + view_choices = self.get_view_choices() self.store_in_session(self.request, 'snapshot_choices', snapshot_choices) + self.store_in_session(self.request, "view_choices", view_choices) form = ZenodoSnapshotForm( - snapshot_choices=snapshot_choices + snapshot_choices=snapshot_choices, + view_choices=view_choices, ) context = {'form': form } @@ -46,11 +77,12 @@ def render(self): if record_id: context['record_id'] = self.record_uploads_url(record_id) - return render(self.request, 'plugins/exports_zenodo.html', context=context, status=200) + return render(self.request, 'plugins/publish_zenodo.html', context=context, status=200) def submit(self): snapshot_choices = self.get_from_session(self.request, 'snapshot_choices') - form = ZenodoSnapshotForm(self.request.POST, snapshot_choices=snapshot_choices) + view_choices = self.get_from_session(self.request, "view_choices") + form = ZenodoSnapshotForm(self.request.POST, snapshot_choices=snapshot_choices, view_choices=view_choices) if 'cancel' in self.request.POST: return redirect('project', self.project.id) @@ -58,21 +90,26 @@ def submit(self): if form.is_valid(): url = self.records_url # deposit url snapshot_id = form.cleaned_data['snapshot'] or None - snapshot = get_or_create_snapshot(self.project, snapshot_id=snapshot_id) - self.snapshot = snapshot # set class attribute for Export.get_values + self.snapshot = get_or_create_snapshot(self.project, snapshot_id=snapshot_id) + view_id = form.cleaned_data['view'] or None + self.view = self.project.views.get(pk=view_id) + self.export_format = form.cleaned_data['export_format'] or None # store project and snapshot in session else they get lost after post self.store_in_session(self.request, 'project_id', self.project.id) self.store_in_session(self.request, 'snapshot_id', self.snapshot.id) + self.store_in_session(self.request, 'view_id', self.view.id) + self.store_in_session(self.request, 'export_format', self.export_format) record_versions_url = self.validate_record_id_from_project_value_at_zenodo() + # TODO, currently the authentication can get stuck when trying out the dataset export + # first and this one afterwards, a 403 needs to be handled in the Export class. if record_versions_url: # if record exists then post new version to zenodo, no data required - url = record_versions_url - return self.post(self.request, url, {}) + return self.post(self.request, record_versions_url, {}) else: # else create new draft record - data = self.get_post_data(self.project, self.snapshot) + data = self.get_post_data() return self.post(self.request, url, data) else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) @@ -84,14 +121,13 @@ def validate_record_id_from_project_value_at_zenodo(self): record_id = get_record_id_from_project_value(self.project) if not record_id: - logger.warning("No record ID found in project values.") + logger.warning("validate record_id: no record ID found in project values.") return # Send a GET request to Zenodo to validate the record ID response = requests.get(self.record_url(record_id), headers=self.authorization_header) - # response = self.get(self.request, record_url) - # Check if the response is successful + # Check if the response was successful if response.status_code == 200: logger.info(f"Record ID {record_id} is valid.") # the conceptrecid is the concept record identifier for all verions of this zenodo record @@ -99,7 +135,6 @@ def validate_record_id_from_project_value_at_zenodo(self): # in invenioRDM it is the parent.id field concept_record_id = response.json()['conceptrecid'] set_record_id_on_project_value(self.project, concept_record_id) - versions_url = response.json().get('links', {}).get('versions') return versions_url elif response.status_code == 404: @@ -110,41 +145,28 @@ def validate_record_id_from_project_value_at_zenodo(self): # Log any other unexpected response code logger.error(f"Error validating record ID {record_id}: {response.status_code}") - def get_project_and_snapshot_from_session(self, request): - project_id = self.get_from_session(request, 'project_id') - snapshot_id = self.get_from_session(request, 'snapshot_id') - project = Project.objects.filter_user(request.user).get(id=project_id) - snapshot = project.snapshots.get(id=snapshot_id) - return project, snapshot - def post_success(self, request, response): - # the class attributes on self need to be redefined - # Retrieve project,snapshot from session - project, snapshot = self.get_project_and_snapshot_from_session(request) - self.project = project - self.snapshot = snapshot - - # and set request on self - self.request = request - - + # Retrieve project,snapshot,view and export_format from session + self.get_from_session_and_set_on_self(request) + self.request = request # and set request on self if 'versions' in response.request.url and 'publication_date' not in response.json().get('metadata',{}): - # metadata needs to be posted to the new version + # metadata needs to be posted to the new version with a new request and response zenodo_api_url = response.json().get('links', {}).get('self') - data = self.get_post_data(self.project, self.snapshot) - version_update_response = requests.put(zenodo_api_url, json=data, headers=self.authorized_json_header) + data = self.get_post_data() + response = requests.put(zenodo_api_url, json=data, headers=self.authorized_json_header) logger.debug("PUT to %s", zenodo_api_url) - zenodo_url = response.json().get('links', {}).get('self_html') - response = version_update_response - else: - zenodo_url = response.json().get('links', {}).get('self_html') + + payload = response.json() + zenodo_url = payload.get("links", {}).get("self_html") if zenodo_url: - record_id = response.json().get('id') - concept_record_id = response.json().get('conceptrecid') - files_url = response.json().get('links', {}).get('files') - _data_commit_pdf_response = self.post_export_file_to_zenodo(record_id=record_id, files_url=files_url, - attachment_format=self.export_file_format) + record_id = payload.get('id') + concept_record_id = payload["conceptrecid"] + files_url = payload.get('links', {}).get('files') + + _data_commit_pdf_response = self.post_export_file_to_zenodo( + record_id=record_id, files_url=files_url, + ) _publish_response = self.publish_draft_record(record_id=record_id) set_record_id_on_project_value(self.project, concept_record_id) @@ -156,15 +178,24 @@ def post_success(self, request, response): 'errors': [_('The URL of the new dataset could not be retrieved.')] }, status=200) - def post_export_file_to_zenodo(self, record_id=None, files_url=None, attachment_format=None, export_filename=None): + def post_export_file_to_zenodo( + self, record_id=None, files_url=None + ): # https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records/#draft-files - if record_id is None or files_url is None or attachment_format is None: + if record_id is None or files_url is None or self.export_format is None: + logger.debug("post export file failed, missing args") return - rdmo_pdf_response = render_project_views(self.project, self.snapshot, attachment_format) - binary = rdmo_pdf_response.content - export_filename = export_filename or "rdmo_dmp" - filename = f"{export_filename}.{attachment_format}" + rdmo_render_response = render_and_export_project_from_view( + self.project, self.snapshot, self.export_format, view=self.view + ) + if rdmo_render_response.status_code != 200: + logger.debug("Render failed: %s", rdmo_render_response.content.decode()) + return + + binary = rdmo_render_response.content + export_filename = slugify(self.snapshot.title) + filename = f"{export_filename}.{self.export_format}" # get access token from the session draft_file_post_response = requests.post(files_url, headers=self.authorization_header, json=[{'key': filename}]) @@ -186,13 +217,35 @@ def post_export_file_to_zenodo(self, record_id=None, files_url=None, attachment_ def publish_draft_record(self, record_id=None): # https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records/#publish-a-draft-record if record_id is None: - return + logger.debug("POST to publish failed, missing record_id") + return None publish_url = self.record_publish_url(record_id) response = requests.post(publish_url, headers=self.authorization_header) logger.debug("POST to %s", publish_url) return response - def get_post_data(self, project, snapshot): + def get_post_data(self): # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata - metadata_builder = ZenodoMetadataExport(project=project, snapshot=snapshot) - return metadata_builder.build_metadata() + if self.project is None or self.snapshot is None: + raise ValueError("Project and Snapshot are required to get post data.") + + title = f"{self.project.title} - Snapshot: {self.snapshot.title}" + description = f"Data Management Plan for project {self.project.title}." + if self.snapshot.description: + description += f" {self.snapshot.description}" + description += f" Exported to {self.export_format} with the {self.view.title} view." + + metadata_builder = ZenodoMetadataSnapshotBuilder( + title=title, + description=description, + keywords=[ + i.text + for i in self.get_values("project/research_question/keywords") if i.text + ], + rights_uri_paths=[ + i.option.uri_path + for i in self.get_values("project/dataset/sharing/conditions") if i.option + ], + project_users=self.project.user.all() if settings.ZENODO_PROVIDER.get("add_project_members") else [], + ) + return metadata_builder.to_post_data(filter_empty=True) From bb103094473418f3529d5341990dac184c2ffa2a Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 28 Jul 2025 17:20:54 +0200 Subject: [PATCH 26/41] clean up utils and handle exceptions Signed-off-by: David Wallace --- rdmo_zenodo/exports/utils.py | 75 ++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/rdmo_zenodo/exports/utils.py b/rdmo_zenodo/exports/utils.py index 045ce55..04dffd8 100644 --- a/rdmo_zenodo/exports/utils.py +++ b/rdmo_zenodo/exports/utils.py @@ -1,40 +1,42 @@ -from django.http import HttpResponse -from django.template import TemplateSyntaxError +from django.http import HttpResponseBadRequest +from django.template import TemplateDoesNotExist, TemplateSyntaxError from rdmo.core.utils import render_to_format from rdmo.domain.models import Attribute from rdmo.projects.models.snapshot import Snapshot from rdmo.projects.models.value import Value from rdmo.projects.utils import get_value_path -from rdmo.views.models import View -attribute_doi_uri_prefix = "https://rdmorganiser.github.io/terms" -attribute_doi_uri_key = "project/metadata/publication/zenodo_id" +ATTRIBUTE_DOI_URI_PREFIX = "https://rdmorganiser.github.io/terms" +ATTRIBUTE_DOI_URI_KEY = "project/metadata/publication/zenodo_id" + def get_or_create_snapshot(project, snapshot_id=None): if snapshot_id is None: - new_snapshot_title_id = project.snapshots.count() + 1 + new_snapshot_count = project.snapshots.count() + 1 description = f"{project.description}." - description += f"\nThis snapshot({new_snapshot_title_id}.) was automatically generated." - snapshot = Snapshot(project=project, - title=f"{project.title} {new_snapshot_title_id}", - description=description) + description += f"\nThis snapshot({new_snapshot_count}.) was automatically generated." + snapshot = Snapshot( + project=project, + title=f"{project.title} #{new_snapshot_count}", # "Cool project #3" + description=description + ) snapshot.save() - else: - snapshot = project.snapshots.get(id=snapshot_id) + return snapshot + + return project.snapshots.get(id=snapshot_id) - return snapshot def get_project_value_with_record_id(project): - record_id_attribute, _created = Attribute.objects.get_or_create(uri_prefix=attribute_doi_uri_prefix, - key=attribute_doi_uri_key) + record_id_attribute, _created = Attribute.objects.get_or_create( + uri_prefix=ATTRIBUTE_DOI_URI_PREFIX, + key=ATTRIBUTE_DOI_URI_KEY + ) project_doi_value = project.values.filter(attribute=record_id_attribute).first() return project_doi_value, record_id_attribute def get_record_id_from_project_value(project): - # get attribute - project_doi_value, _ = get_project_value_with_record_id(project) if project_doi_value is not None: @@ -42,6 +44,7 @@ def get_record_id_from_project_value(project): else: return None + def set_record_id_on_project_value(project, record_id): if project is None or record_id is None: return @@ -49,7 +52,7 @@ def set_record_id_on_project_value(project, record_id): project_doi_value, record_id_attribute = get_project_value_with_record_id(project) if project_doi_value is None: - # create the value with text and add it + # create the value with the record_id and add it to the project value = Value(project=project, attribute=record_id_attribute, text=record_id) value.save() project.values.add(value) @@ -58,26 +61,32 @@ def set_record_id_on_project_value(project, record_id): project_doi_value.text = record_id project_doi_value.save() + def clear_record_id_from_project_value(project): """Clear the record_id text from the project's values by setting it to an empty string.""" set_record_id_on_project_value(project, '') -def render_project_views(project, snapshot, attachments_format, view=None): - if view is None: - view = View.objects.get(uri="https://rdmorganiser.github.io/terms/views/variable_check") +def render_and_export_project_from_view(project, snapshot, export_format, view): try: rendered_view = view.render(project, snapshot) - except TemplateSyntaxError: - return HttpResponse() - - return render_to_format( - None, attachments_format, project.title, 'projects/project_view_export.html', { - 'format': attachments_format, - 'title': project.title, - 'view': view, - 'rendered_view': rendered_view, - 'resource_path': get_value_path(project, snapshot) - } - ) + except (TemplateDoesNotExist,TemplateSyntaxError) as e: + return HttpResponseBadRequest(f"Render from view failed. {e}") + + try: + response = render_to_format( + None, export_format, project.title, 'projects/project_view_export.html', { + 'format': export_format, + 'title': project.title, + 'view': view, + 'rendered_view': rendered_view, + 'resource_path': get_value_path(project, snapshot) + } + ) + except RuntimeError as e: + return HttpResponseBadRequest(f"Render to format failed. {e}") + except (TemplateDoesNotExist,TemplateSyntaxError) as e: + return HttpResponseBadRequest(f"Render to format failed, template error. {e}") + else: + return response From e94879f8e0dae8e651a40231b1fed5f636607f92 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 28 Jul 2025 17:21:41 +0200 Subject: [PATCH 27/41] revert exports template and add one for publish Signed-off-by: David Wallace --- .../templates/plugins/exports_zenodo.html | 7 ------- .../templates/plugins/publish_zenodo.html | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/rdmo_zenodo/templates/plugins/exports_zenodo.html b/rdmo_zenodo/templates/plugins/exports_zenodo.html index 23f9e01..b336f85 100644 --- a/rdmo_zenodo/templates/plugins/exports_zenodo.html +++ b/rdmo_zenodo/templates/plugins/exports_zenodo.html @@ -5,13 +5,6 @@ {% block page %}

{% trans 'Export to Zenodo' %}

- {% if record_id %} -

- {% trans 'This project has a Zenodo record id' %}: {{ record_id }} -

- {% else %} -

{% trans 'This project does not have a Zenodo record id.' %}

- {% endif %} {% bootstrap_form submit=_('Export to Zenodo') %} diff --git a/rdmo_zenodo/templates/plugins/publish_zenodo.html b/rdmo_zenodo/templates/plugins/publish_zenodo.html index e69de29..e8b6f70 100644 --- a/rdmo_zenodo/templates/plugins/publish_zenodo.html +++ b/rdmo_zenodo/templates/plugins/publish_zenodo.html @@ -0,0 +1,18 @@ +{% extends 'core/page.html' %} +{% load i18n %} +{% load core_tags %} + +{% block page %} + +

{% trans 'Export and Publish to Zenodo' %}

+ {% if record_id %} +

+ {% trans 'This project has a Zenodo record id' %}: {{ record_id }} +

+ {% else %} +

{% trans 'This project does not have a Zenodo record id.' %}

+ {% endif %} + + {% bootstrap_form submit=_('Publish to Zenodo') %} + +{% endblock %} From b2a96cea671f0aa8ab5d24338987948d6fb0d3bc Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 6 Aug 2025 10:51:42 +0200 Subject: [PATCH 28/41] docs: add InvenioRDM sanbdox Signed-off-by: David Wallace --- README.md | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f0b5e58..80ab257 100644 --- a/README.md +++ b/README.md @@ -25,22 +25,35 @@ PROJECT_EXPORTS += [ ('zenodo-publish', _('Publish to Zenodo'), 'rdmo_zenodo.exports.ZenodoPublishProvider') ] ``` +When the translation method `_` was not yet imported in your `config/settings/local.py`, then add it this import at the top: +``` +from django.utils.translation import gettext_lazy as _ +``` -An *Developer applications* has to be registered with Zenodo here: https://zenodo.org/account/settings/applications/. For development, you can also use the sandbox instance provided by Zenodo: https://sandbox.zenodo.org/account/settings/applications/. During the registration, you need to enter a **Redirect URI** for your RDMO instance: +### Zenodo configuration + +A *Developer applications* has to be registered with Zenodo here: https://zenodo.org/account/settings/applications/. +For development, you can also use the sandbox instance provided by Zenodo: https://sandbox.zenodo.org/account/settings/applications/. +Or for development against an InvenioRDM Instance the sandbox https://inveniordm.web.cern.ch/ can be used. +During the registration, you need to enter a **Redirect URI** for your RDMO instance: ``` https://rdmo.example.com/services/oauth/zenodo/callback/ -http://localhost:8000/services/oauth/zenodo/callback/ # for development +https://rdmo.example.com/services/oauth/zenodo-publish/callback/ + +# or for local development +http://localhost:8000/services/oauth/zenodo/callback/ +http://localhost:8000/services/oauth/zenodo-publish/callback/ ``` -After registration, you are provided with a `client_id` and a `client_secret`, which need to be added to the RDMO settings, along with some other optional entries: +After registration, you are provided with a `client_id` and a `client_secret`, +which need to be added to the RDMO settings in `config/settings/local.py`, along with some other optional entries: ```python ZENODO_PROVIDER = { 'client_id': os.getenv('ZENODO_CLIENT_ID'), 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), - 'add_project_members': True, # add the members of the project as creators to each dataset - 'resource_type': 'dataset', # specify the resource type + 'add_project_members': True, # add the members of the project as creators to each dataset 'language': 'eng', # specify the language 'publisher': '', # specify the publisher 'funding': [ # specify funding information @@ -64,11 +77,12 @@ ZENODO_PROVIDER = { ] } ``` +The `resource_type` will be set by the specific export provider, e.g. as `'dataset'` or as `'publication-datamanagementplan'`. Usage ----- -The plugins apears as export options on the RDMO project overview. +The plugins appears as export options on the RDMO project overview. Analoges to Zenodo this plugin can also be used with InvenioRDM instances. Currently, the following properties of the Zenodo data model are created from RDMO attributes: From dfcaf592e729a98186d53e029609747b24867342 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 6 Aug 2025 10:31:14 +0200 Subject: [PATCH 29/41] add config for url, attribute and scope and support for InvenioRDM Signed-off-by: David Wallace --- README.md | 17 ++++++++----- rdmo_zenodo/exports/base.py | 2 +- rdmo_zenodo/exports/publish_snapshot.py | 24 +++++++++--------- rdmo_zenodo/exports/utils.py | 33 ++++++++++++++++++++----- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 80ab257..ff7b0bf 100644 --- a/README.md +++ b/README.md @@ -43,17 +43,21 @@ https://rdmo.example.com/services/oauth/zenodo-publish/callback/ # or for local development http://localhost:8000/services/oauth/zenodo/callback/ -http://localhost:8000/services/oauth/zenodo-publish/callback/ +http://localhost:8000/services/oauth/zenodo-publish/callback/ ``` -After registration, you are provided with a `client_id` and a `client_secret`, +After registration, you are provided with a `client_id` and a `client_secret`, which need to be added to the RDMO settings in `config/settings/local.py`, along with some other optional entries: ```python ZENODO_PROVIDER = { 'client_id': os.getenv('ZENODO_CLIENT_ID'), - 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), - 'add_project_members': True, # add the members of the project as creators to each dataset + 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), + 'zenodo_url': 'https://zenodo.org', # or your own InvenioRDM instance url + 'zenodo_auth_scope': 'deposit:write', # optional, default 'deposit:write' or 'user:email' for InvenioRDM + 'add_project_members': True, # add the members of the project as creators to exported record + 'publish_record_id_attribute_prefix': 'https://rdmorganiser.github.io/terms', # optional, default is shown here + 'publish_record_id_attribute_key': 'project/metadata/publication/zenodo/concept_record_id', # optional, default is shown here 'language': 'eng', # specify the language 'publisher': '', # specify the publisher 'funding': [ # specify funding information @@ -77,12 +81,13 @@ ZENODO_PROVIDER = { ] } ``` -The `resource_type` will be set by the specific export provider, e.g. as `'dataset'` or as `'publication-datamanagementplan'`. +The `resource_type` will be set by the specific export provider, e.g. as `'dataset'` or as `'publication-datamanagementplan'` for `zenodo-publish` export. Usage ----- -The plugins appears as export options on the RDMO project overview. Analoges to Zenodo this plugin can also be used with InvenioRDM instances. +The plugins appear as export options on the RDMO project overview. For a Zenodo backend, it was tested against https://sandbox.zenodo.org/. +Analogous to Zenodo this plugin can also be used with InvenioRDM instances for which it was tested against https://inveniordm.web.cern.ch/. Currently, the following properties of the Zenodo data model are created from RDMO attributes: diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index e6a34fe..42f72d6 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -86,7 +86,7 @@ def get_authorize_params(self, request, state): return { 'response_type': 'code', 'client_id': self.client_id, - 'scope': 'deposit:write', + 'scope': settings.ZENODO_PROVIDER.get('zenodo_auth_scope') or 'deposit:write', 'redirect_uri': request.build_absolute_uri(self.redirect_path), 'state': state } diff --git a/rdmo_zenodo/exports/publish_snapshot.py b/rdmo_zenodo/exports/publish_snapshot.py index 28b44d6..9d4a1cb 100644 --- a/rdmo_zenodo/exports/publish_snapshot.py +++ b/rdmo_zenodo/exports/publish_snapshot.py @@ -10,12 +10,13 @@ from rdmo.projects.models import Project -from rdmo_zenodo.exports.metadata.snapshot import ZenodoMetadataSnapshotBuilder +from rdmo_zenodo.exports.metadata.snapshot import SnapshotZenodoMetadataBuilder from .base import BaseZenodoExportProvider from .forms import ZenodoSnapshotForm from .utils import ( clear_record_id_from_project_value, + get_concept_or_parent_id_from_payload, get_or_create_snapshot, get_record_id_from_project_value, render_and_export_project_from_view, @@ -122,7 +123,7 @@ def validate_record_id_from_project_value_at_zenodo(self): if not record_id: logger.warning("validate record_id: no record ID found in project values.") - return + return None # Send a GET request to Zenodo to validate the record ID response = requests.get(self.record_url(record_id), headers=self.authorization_header) @@ -130,20 +131,19 @@ def validate_record_id_from_project_value_at_zenodo(self): # Check if the response was successful if response.status_code == 200: logger.info(f"Record ID {record_id} is valid.") - # the conceptrecid is the concept record identifier for all verions of this zenodo record - # https://inveniordm.docs.cern.ch/reference/metadata/#system-managed-persistent-identifiers - # in invenioRDM it is the parent.id field - concept_record_id = response.json()['conceptrecid'] + + concept_record_id = get_concept_or_parent_id_from_payload(response.json()) set_record_id_on_project_value(self.project, concept_record_id) versions_url = response.json().get('links', {}).get('versions') return versions_url elif response.status_code == 404: - logger.warning(f"Record ID {record_id} is invalid or not found in Zenodo.") + logger.warning(f"Record ID {record_id} is invalid or not found in {response.request.url}.") # the record_id does not exist, delete it from the project.value.text clear_record_id_from_project_value(self.project) else: # Log any other unexpected response code logger.error(f"Error validating record ID {record_id}: {response.status_code}") + return None def post_success(self, request, response): # Retrieve project,snapshot,view and export_format from session @@ -161,7 +161,7 @@ def post_success(self, request, response): if zenodo_url: record_id = payload.get('id') - concept_record_id = payload["conceptrecid"] + concept_record_id = get_concept_or_parent_id_from_payload(payload) files_url = payload.get('links', {}).get('files') _data_commit_pdf_response = self.post_export_file_to_zenodo( @@ -184,14 +184,14 @@ def post_export_file_to_zenodo( # https://inveniordm.docs.cern.ch/reference/rest_api_drafts_records/#draft-files if record_id is None or files_url is None or self.export_format is None: logger.debug("post export file failed, missing args") - return + return None rdmo_render_response = render_and_export_project_from_view( self.project, self.snapshot, self.export_format, view=self.view ) if rdmo_render_response.status_code != 200: logger.debug("Render failed: %s", rdmo_render_response.content.decode()) - return + return None binary = rdmo_render_response.content export_filename = slugify(self.snapshot.title) @@ -202,7 +202,7 @@ def post_export_file_to_zenodo( entries = draft_file_post_response.json().get('entries', []) draft_file_entry = next(filter(lambda i: i["key"] == filename, entries), None) if draft_file_entry is None: - return + return None content_url = draft_file_entry.get('links', {}).get('content') _data_content_response = requests.put(content_url, headers=self.authorized_binary_header, data=binary) @@ -235,7 +235,7 @@ def get_post_data(self): description += f" {self.snapshot.description}" description += f" Exported to {self.export_format} with the {self.view.title} view." - metadata_builder = ZenodoMetadataSnapshotBuilder( + metadata_builder = SnapshotZenodoMetadataBuilder( title=title, description=description, keywords=[ diff --git a/rdmo_zenodo/exports/utils.py b/rdmo_zenodo/exports/utils.py index 04dffd8..7eb9936 100644 --- a/rdmo_zenodo/exports/utils.py +++ b/rdmo_zenodo/exports/utils.py @@ -1,3 +1,4 @@ +from django.conf import settings from django.http import HttpResponseBadRequest from django.template import TemplateDoesNotExist, TemplateSyntaxError @@ -7,15 +8,17 @@ from rdmo.projects.models.value import Value from rdmo.projects.utils import get_value_path -ATTRIBUTE_DOI_URI_PREFIX = "https://rdmorganiser.github.io/terms" -ATTRIBUTE_DOI_URI_KEY = "project/metadata/publication/zenodo_id" +DEFAULT_ATTRIBUTE_DOI_URI_PREFIX = "https://rdmorganiser.github.io/terms" +DEFAULT_ATTRIBUTE_DOI_URI_KEY = "project/metadata/publication/zenodo_id" def get_or_create_snapshot(project, snapshot_id=None): if snapshot_id is None: new_snapshot_count = project.snapshots.count() + 1 - description = f"{project.description}." - description += f"\nThis snapshot({new_snapshot_count}.) was automatically generated." + description = project.description + if description: + description += "\n" + description += f"This snapshot({new_snapshot_count}.) was automatically generated by the zenodo-publish export provider." # noqa: E501 snapshot = Snapshot( project=project, title=f"{project.title} #{new_snapshot_count}", # "Cool project #3" @@ -28,10 +31,18 @@ def get_or_create_snapshot(project, snapshot_id=None): def get_project_value_with_record_id(project): + + uri_prefix = settings.ZENODO_PROVIDER.get("publish_record_id_attribute_prefix") or DEFAULT_ATTRIBUTE_DOI_URI_PREFIX + key = settings.ZENODO_PROVIDER.get("publish_record_id_attribute_key") or DEFAULT_ATTRIBUTE_DOI_URI_KEY + record_id_attribute, _created = Attribute.objects.get_or_create( - uri_prefix=ATTRIBUTE_DOI_URI_PREFIX, - key=ATTRIBUTE_DOI_URI_KEY + uri_prefix=uri_prefix, + key=key ) + if _created: + record_id_attribute.comment = "This attribute was automatically generated by the rdmo_zenodo plugin." + record_id_attribute.save() + project_doi_value = project.values.filter(attribute=record_id_attribute).first() return project_doi_value, record_id_attribute @@ -90,3 +101,13 @@ def render_and_export_project_from_view(project, snapshot, export_format, view): return HttpResponseBadRequest(f"Render to format failed, template error. {e}") else: return response + +def get_concept_or_parent_id_from_payload(data): + # the conceptrecid is the concept record identifier for all versions of this zenodo record + # https://inveniordm.docs.cern.ch/reference/metadata/#system-managed-persistent-identifiers + if 'conceptrecid' in data: + return data['conceptrecid'] + elif 'parent' in data: + # in invenioRDM it is the parent.id field + return data['parent']['id'] + raise KeyError From 17e8c300fee9bd324a0d175fe08bbcc577b5c0cc Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 6 Aug 2025 10:32:18 +0200 Subject: [PATCH 30/41] rename metadata builder classes Signed-off-by: David Wallace --- rdmo_zenodo/exports/export_dataset.py | 4 ++-- rdmo_zenodo/exports/metadata/dataset.py | 2 +- rdmo_zenodo/exports/metadata/snapshot.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rdmo_zenodo/exports/export_dataset.py b/rdmo_zenodo/exports/export_dataset.py index 3e424b3..a59eda5 100644 --- a/rdmo_zenodo/exports/export_dataset.py +++ b/rdmo_zenodo/exports/export_dataset.py @@ -4,7 +4,7 @@ from django.shortcuts import redirect, render from django.utils.translation import gettext_lazy as _ -from rdmo_zenodo.exports.metadata.dataset import ZenodoMetadataDatasetBuilder +from rdmo_zenodo.exports.metadata.dataset import DatasetZenodoMetadataBuilder from .base import BaseZenodoExportProvider from .forms import ZenodoDatasetForm @@ -66,7 +66,7 @@ def get_post_data(self, set_index): if dataset_title: description += f" {dataset_title}" - metadata_builder = ZenodoMetadataDatasetBuilder( + metadata_builder = DatasetZenodoMetadataBuilder( title=title, description=description, keywords=[ diff --git a/rdmo_zenodo/exports/metadata/dataset.py b/rdmo_zenodo/exports/metadata/dataset.py index f2c5978..5223874 100644 --- a/rdmo_zenodo/exports/metadata/dataset.py +++ b/rdmo_zenodo/exports/metadata/dataset.py @@ -7,7 +7,7 @@ @dataclass -class ZenodoMetadataDatasetBuilder(ZenodoMetadataBuilder): +class DatasetZenodoMetadataBuilder(ZenodoMetadataBuilder): resource_type = "dataset" upload_type = "dataset" diff --git a/rdmo_zenodo/exports/metadata/snapshot.py b/rdmo_zenodo/exports/metadata/snapshot.py index 4b29986..b2602eb 100644 --- a/rdmo_zenodo/exports/metadata/snapshot.py +++ b/rdmo_zenodo/exports/metadata/snapshot.py @@ -9,7 +9,7 @@ @dataclass -class ZenodoMetadataSnapshotBuilder(ZenodoMetadataBuilder): +class SnapshotZenodoMetadataBuilder(ZenodoMetadataBuilder): publication_date: Optional[str] = None resource_type = "publication-datamanagementplan" From 012693c5d367bb14aaff623083f759d8303acfe3 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 6 Aug 2025 10:44:15 +0200 Subject: [PATCH 31/41] set default for zenodo_url and add to readme config Signed-off-by: David Wallace --- README.md | 2 +- rdmo_zenodo/exports/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff7b0bf..3d95818 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ which need to be added to the RDMO settings in `config/settings/local.py`, along ZENODO_PROVIDER = { 'client_id': os.getenv('ZENODO_CLIENT_ID'), 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), - 'zenodo_url': 'https://zenodo.org', # or your own InvenioRDM instance url + 'zenodo_url': 'https://zenodo.org', # optional, default https://zenodo.org , or your own InvenioRDM instance url 'zenodo_auth_scope': 'deposit:write', # optional, default 'deposit:write' or 'user:email' for InvenioRDM 'add_project_members': True, # add the members of the project as creators to exported record 'publish_record_id_attribute_prefix': 'https://rdmorganiser.github.io/terms', # optional, default is shown here diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index 42f72d6..5b24701 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -28,7 +28,7 @@ def client_secret(self): @property def zenodo_url(self): - return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://sandbox.zenodo.org').strip('/') + return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://zenodo.org').strip('/') @property def authorize_url(self): From 627f8f5ad2429b59aebf4a8e404aaa4cf647571e Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 6 Aug 2025 10:45:21 +0200 Subject: [PATCH 32/41] style(docs): reorder config Signed-off-by: David Wallace --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3d95818..970c113 100644 --- a/README.md +++ b/README.md @@ -55,9 +55,9 @@ ZENODO_PROVIDER = { 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), 'zenodo_url': 'https://zenodo.org', # optional, default https://zenodo.org , or your own InvenioRDM instance url 'zenodo_auth_scope': 'deposit:write', # optional, default 'deposit:write' or 'user:email' for InvenioRDM - 'add_project_members': True, # add the members of the project as creators to exported record 'publish_record_id_attribute_prefix': 'https://rdmorganiser.github.io/terms', # optional, default is shown here 'publish_record_id_attribute_key': 'project/metadata/publication/zenodo/concept_record_id', # optional, default is shown here + 'add_project_members': True, # add the members of the project as creators to exported record 'language': 'eng', # specify the language 'publisher': '', # specify the publisher 'funding': [ # specify funding information From ee5880411a951a39294eaecd9061f03ba9e51de3 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 6 Aug 2025 10:56:28 +0200 Subject: [PATCH 33/41] docs: update config comments Signed-off-by: David Wallace --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 970c113..625717a 100644 --- a/README.md +++ b/README.md @@ -53,8 +53,8 @@ which need to be added to the RDMO settings in `config/settings/local.py`, along ZENODO_PROVIDER = { 'client_id': os.getenv('ZENODO_CLIENT_ID'), 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), - 'zenodo_url': 'https://zenodo.org', # optional, default https://zenodo.org , or your own InvenioRDM instance url - 'zenodo_auth_scope': 'deposit:write', # optional, default 'deposit:write' or 'user:email' for InvenioRDM + 'zenodo_url': 'https://zenodo.org', # optional, default shown here , or your own InvenioRDM instance url + 'zenodo_auth_scope': 'deposit:write', # optional, default shown here or 'user:email' for InvenioRDM 'publish_record_id_attribute_prefix': 'https://rdmorganiser.github.io/terms', # optional, default is shown here 'publish_record_id_attribute_key': 'project/metadata/publication/zenodo/concept_record_id', # optional, default is shown here 'add_project_members': True, # add the members of the project as creators to exported record From 53de9d59f2880d348b02cb0288a3b5541d33b898 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 7 Aug 2025 16:47:40 +0200 Subject: [PATCH 34/41] use zenodo_record_id_uri as setting instead of prefix and key Signed-off-by: David Wallace --- README.md | 3 +-- rdmo_zenodo/exports/publish_snapshot.py | 3 +-- rdmo_zenodo/exports/utils.py | 29 ++++++++++++++++++++----- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 625717a..ffe575d 100644 --- a/README.md +++ b/README.md @@ -55,8 +55,7 @@ ZENODO_PROVIDER = { 'client_secret': os.getenv('ZENODO_CLIENT_SECRET'), 'zenodo_url': 'https://zenodo.org', # optional, default shown here , or your own InvenioRDM instance url 'zenodo_auth_scope': 'deposit:write', # optional, default shown here or 'user:email' for InvenioRDM - 'publish_record_id_attribute_prefix': 'https://rdmorganiser.github.io/terms', # optional, default is shown here - 'publish_record_id_attribute_key': 'project/metadata/publication/zenodo/concept_record_id', # optional, default is shown here + 'zenodo_record_id_uri': 'https://rdmorganiser.github.io/terms/project/metadata/publication/zenodo/record_id', # optional, default is shown here 'add_project_members': True, # add the members of the project as creators to exported record 'language': 'eng', # specify the language 'publisher': '', # specify the publisher diff --git a/rdmo_zenodo/exports/publish_snapshot.py b/rdmo_zenodo/exports/publish_snapshot.py index 9d4a1cb..a3df3ba 100644 --- a/rdmo_zenodo/exports/publish_snapshot.py +++ b/rdmo_zenodo/exports/publish_snapshot.py @@ -89,7 +89,6 @@ def submit(self): return redirect('project', self.project.id) if form.is_valid(): - url = self.records_url # deposit url snapshot_id = form.cleaned_data['snapshot'] or None self.snapshot = get_or_create_snapshot(self.project, snapshot_id=snapshot_id) view_id = form.cleaned_data['view'] or None @@ -111,7 +110,7 @@ def submit(self): else: # else create new draft record data = self.get_post_data() - return self.post(self.request, url, data) + return self.post(self.request, self.records_url, data) else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) diff --git a/rdmo_zenodo/exports/utils.py b/rdmo_zenodo/exports/utils.py index 7eb9936..6c44b7e 100644 --- a/rdmo_zenodo/exports/utils.py +++ b/rdmo_zenodo/exports/utils.py @@ -1,3 +1,5 @@ +from urllib.parse import urlparse + from django.conf import settings from django.http import HttpResponseBadRequest from django.template import TemplateDoesNotExist, TemplateSyntaxError @@ -8,8 +10,7 @@ from rdmo.projects.models.value import Value from rdmo.projects.utils import get_value_path -DEFAULT_ATTRIBUTE_DOI_URI_PREFIX = "https://rdmorganiser.github.io/terms" -DEFAULT_ATTRIBUTE_DOI_URI_KEY = "project/metadata/publication/zenodo_id" +DEFAULT_RECORD_ATTRIBUTE_URI = "https://rdmorganiser.github.io/terms/project/metadata/publication/zenodo/record_id" def get_or_create_snapshot(project, snapshot_id=None): @@ -32,12 +33,11 @@ def get_or_create_snapshot(project, snapshot_id=None): def get_project_value_with_record_id(project): - uri_prefix = settings.ZENODO_PROVIDER.get("publish_record_id_attribute_prefix") or DEFAULT_ATTRIBUTE_DOI_URI_PREFIX - key = settings.ZENODO_PROVIDER.get("publish_record_id_attribute_key") or DEFAULT_ATTRIBUTE_DOI_URI_KEY - + record_uri = settings.ZENODO_PROVIDER.get("zenodo_record_id_uri") or DEFAULT_RECORD_ATTRIBUTE_URI + uri_prefix, key = split_attribute_uri(record_uri) record_id_attribute, _created = Attribute.objects.get_or_create( uri_prefix=uri_prefix, - key=key + key=key, ) if _created: record_id_attribute.comment = "This attribute was automatically generated by the rdmo_zenodo plugin." @@ -111,3 +111,20 @@ def get_concept_or_parent_id_from_payload(data): # in invenioRDM it is the parent.id field return data['parent']['id'] raise KeyError + +def split_attribute_uri(uri: str) -> tuple[str, str]: + + parsed = urlparse(uri) + parts = parsed.path.strip("/").split("/") + + if not parts: + raise ValueError("URI has no path segments") + + if parts[0] == "terms": + uri_prefix = f"{parsed.scheme}://{parsed.netloc}/terms" + key = "/".join(parts[1:]) + else: + uri_prefix = f"{parsed.scheme}://{parsed.netloc}" + key = "/".join(parts) + + return uri_prefix, key From 53dfc25b925d7ce6827de0982693f5c93c5c9447 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 9 Oct 2025 11:00:19 +0200 Subject: [PATCH 35/41] re-implement metadata for zenodo and invenio --- .../exports/{export_dataset.py => export.py} | 0 rdmo_zenodo/exports/metadata/base.py | 82 ------- rdmo_zenodo/exports/metadata/builder.py | 55 +++++ rdmo_zenodo/exports/metadata/context.py | 29 +++ rdmo_zenodo/exports/metadata/converter.py | 22 ++ rdmo_zenodo/exports/metadata/dataset.py | 27 --- rdmo_zenodo/exports/metadata/exceptions.py | 20 ++ rdmo_zenodo/exports/metadata/extractors.py | 151 +++++++++++++ rdmo_zenodo/exports/metadata/invenio.py | 211 ++++++++++++++++++ .../exports/metadata/mapper_invenio.py | 33 +++ rdmo_zenodo/exports/metadata/mapper_zenodo.py | 35 +++ rdmo_zenodo/exports/metadata/model.py | 42 ---- rdmo_zenodo/exports/metadata/snapshot.py | 37 --- rdmo_zenodo/exports/metadata/utils.py | 17 ++ rdmo_zenodo/exports/metadata/zenodo.py | 128 +++++++++++ .../{publish_snapshot.py => publish.py} | 0 16 files changed, 701 insertions(+), 188 deletions(-) rename rdmo_zenodo/exports/{export_dataset.py => export.py} (100%) delete mode 100644 rdmo_zenodo/exports/metadata/base.py create mode 100644 rdmo_zenodo/exports/metadata/builder.py create mode 100644 rdmo_zenodo/exports/metadata/context.py create mode 100644 rdmo_zenodo/exports/metadata/converter.py delete mode 100644 rdmo_zenodo/exports/metadata/dataset.py create mode 100644 rdmo_zenodo/exports/metadata/exceptions.py create mode 100644 rdmo_zenodo/exports/metadata/extractors.py create mode 100644 rdmo_zenodo/exports/metadata/invenio.py create mode 100644 rdmo_zenodo/exports/metadata/mapper_invenio.py create mode 100644 rdmo_zenodo/exports/metadata/mapper_zenodo.py delete mode 100644 rdmo_zenodo/exports/metadata/model.py delete mode 100644 rdmo_zenodo/exports/metadata/snapshot.py create mode 100644 rdmo_zenodo/exports/metadata/utils.py create mode 100644 rdmo_zenodo/exports/metadata/zenodo.py rename rdmo_zenodo/exports/{publish_snapshot.py => publish.py} (100%) diff --git a/rdmo_zenodo/exports/export_dataset.py b/rdmo_zenodo/exports/export.py similarity index 100% rename from rdmo_zenodo/exports/export_dataset.py rename to rdmo_zenodo/exports/export.py diff --git a/rdmo_zenodo/exports/metadata/base.py b/rdmo_zenodo/exports/metadata/base.py deleted file mode 100644 index 5590181..0000000 --- a/rdmo_zenodo/exports/metadata/base.py +++ /dev/null @@ -1,82 +0,0 @@ -from dataclasses import dataclass, field -from typing import Any - -from django.conf import settings -from django.core.exceptions import ObjectDoesNotExist - -from .model import ZenodoMetadata - -RIGHTS_URI_OPTIONS = { - 'dataset_license_types/71': 'cc-by-4.0', - 'dataset_license_types/73': 'cc-by-nc-4.0', - 'dataset_license_types/74': 'cc-by-nd-4.0', - 'dataset_license_types/75': 'cc-by-sa-4.0', - 'dataset_license_types/cc0': 'cc-zero' -} -DEFAULT_SUBJECTS = ['Data Management Plan', 'DMP'] - - -@dataclass -class ZenodoMetadataBuilder: - title: str - description: str - rights_uri_paths: list[str] = field(default_factory=list) - keywords: list[str] = field(default_factory=list) - project_users: list[Any] = field(default_factory=list) - - def get_resource_type(self): - {"id": settings.ZENODO_PROVIDER.get("resource_type", self.resource_type)} - - def get_creators(self) -> list[dict[str, dict]]: - if not settings.ZENODO_PROVIDER.get('add_project_members'): - return [] - - creators = [] - for user in self.project_users: - person = { - "family_name": user.last_name, - "given_name": user.first_name, - "identifiers": self.get_user_identifiers(user), - "type": "personal" - } - creators.append({"person_or_org": person}) - return creators - - @staticmethod - def get_user_identifiers(user) -> list[dict[str, str]]: - # there may also be other providers that have uids in extra_data - try: - orcid = user.socialaccount_set.get(provider="orcid") - except (ObjectDoesNotExist, AttributeError): - return [] - else: - return [{"scheme": "orcid", "identifier": orcid.uid}] - - @staticmethod - def get_rights_from_uri_paths(rights_options) -> list[dict[str, str]]: - for uri_path in rights_options: - license_id = RIGHTS_URI_OPTIONS.get(uri_path) - if license_id: - return [{"id": license_id}] - return [] - - @staticmethod - def get_subjects_from_keywords(keywords) -> list[dict[str, str]]: - subjects = [{"subject": s} for s in DEFAULT_SUBJECTS] - for keyword in keywords: - subjects.append({"subject": keyword}) - return subjects - - @staticmethod - def get_languages() -> list[dict[str, str]]: - language = settings.ZENODO_PROVIDER.get("language") - if language: - return [{"id": language}] - else: - return [] - - def build_metadata(self) -> ZenodoMetadata: - raise NotImplementedError() - - def to_post_data(self, filter_empty=False): - return {'metadata': self.build_metadata().to_dict(filter_empty=filter_empty)} diff --git a/rdmo_zenodo/exports/metadata/builder.py b/rdmo_zenodo/exports/metadata/builder.py new file mode 100644 index 0000000..8a0078a --- /dev/null +++ b/rdmo_zenodo/exports/metadata/builder.py @@ -0,0 +1,55 @@ +import inspect +from typing import Callable + +from cattr import structure + +from rdmo_zenodo.exports.metadata.context import MetadataContext +from rdmo_zenodo.exports.metadata.converter import converter +from rdmo_zenodo.exports.metadata.exceptions import ExtractionError, SchemaValidationError +from rdmo_zenodo.exports.metadata.invenio import MetadataV6, RecordV6Payload +from rdmo_zenodo.exports.metadata.mapper_invenio import INVENIO_FIELD_MAPPER +from rdmo_zenodo.exports.metadata.mapper_zenodo import ZENODO_FIELD_MAPPER +from rdmo_zenodo.exports.metadata.zenodo import ZenodoDepositionPayload, ZenodoMetadata + + +def call_extractors_on_field_mapping(context: MetadataContext, fields: dict[str, Callable]): + extracted = {} + for name, getter in fields.items(): + # if the callable expects arguments, pass context; else call it directly + try: + sig = inspect.signature(getter) + if len(sig.parameters) == 0: + value = getter() + else: + value = getter(context) + except (TypeError, ValueError): + value = getter(context) + if value not in (None, "", [], {}): + extracted[name] = value + return extracted + + +def build_payload(context, backend: str): + if backend == "zenodo": + mapper, schema, payload_cls = ( + ZENODO_FIELD_MAPPER, ZenodoMetadata, ZenodoDepositionPayload + ) + elif backend == "invenio": + mapper, schema, payload_cls = ( + INVENIO_FIELD_MAPPER, MetadataV6, RecordV6Payload + ) + else: + raise ValueError(f"Unknown backend: {backend}") + try: + metadata_dict = call_extractors_on_field_mapping(context, mapper) + except (TypeError, ValueError) as e: + raise ExtractionError("Failed to extract data from RDMO project", details=str(e)) from e + + try: + metadata_obj = structure(metadata_dict, schema) + except (TypeError, ValueError) as e: + raise SchemaValidationError("Schema validation failed", e) from e + + payload_obj = payload_cls(metadata=metadata_obj) + payload = converter.unstructure(payload_obj) + return payload diff --git a/rdmo_zenodo/exports/metadata/context.py b/rdmo_zenodo/exports/metadata/context.py new file mode 100644 index 0000000..4559100 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/context.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Callable + +from django.conf import settings + +from rdmo.projects.models import Project, Snapshot + + +@dataclass(frozen=True) +class MetadataContext: + project: Project | None + snapshot: Snapshot | None + set_index: int | None + get_values: Callable[..., list[Any]] + get_text: Callable[..., str | None] + zenodo_backend_type: str + view: Any | None = None + export_format: str | None = None + + @property + def project_members(self) -> list: + if ( + settings.ZENODO_PROVIDER.get("add_project_members") + and self.project is not None + ): + return list(self.project.user.all()) + return [] diff --git a/rdmo_zenodo/exports/metadata/converter.py b/rdmo_zenodo/exports/metadata/converter.py new file mode 100644 index 0000000..3a881b1 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/converter.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import Any + +import cattrs + +converter = cattrs.Converter() + +def strip_empty_dict(d: dict[str, Any]) -> dict[str, Any]: + """Remove None, empty strings, lists, and dicts from serialized output.""" + clean = {} + for k, v in d.items(): + if v in (None, "", [], {}): + continue + if isinstance(v, dict): + v = strip_empty_dict(v) + if not v: + continue + clean[k] = v + return clean + +converter.register_unstructure_hook(dict, strip_empty_dict) diff --git a/rdmo_zenodo/exports/metadata/dataset.py b/rdmo_zenodo/exports/metadata/dataset.py deleted file mode 100644 index 5223874..0000000 --- a/rdmo_zenodo/exports/metadata/dataset.py +++ /dev/null @@ -1,27 +0,0 @@ -from dataclasses import dataclass - -from django.conf import settings - -from .base import ZenodoMetadataBuilder -from .model import ZenodoMetadata - - -@dataclass -class DatasetZenodoMetadataBuilder(ZenodoMetadataBuilder): - - resource_type = "dataset" - upload_type = "dataset" - - def build_metadata(self) -> ZenodoMetadata: - return ZenodoMetadata( - resource_type={"id": settings.ZENODO_PROVIDER.get("resource_type", self.resource_type)}, - title=self.title, - description=self.description, - rights=self.get_rights_from_uri_paths(self.rights_uri_paths), - creators=self.get_creators(), - subjects=self.get_subjects_from_keywords(self.keywords), - languages=self.get_languages(), - upload_type=settings.ZENODO_PROVIDER.get("upload_type", self.upload_type), - publisher=settings.ZENODO_PROVIDER.get("publisher"), - funding=settings.ZENODO_PROVIDER.get("funding") - ) diff --git a/rdmo_zenodo/exports/metadata/exceptions.py b/rdmo_zenodo/exports/metadata/exceptions.py new file mode 100644 index 0000000..a822f88 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/exceptions.py @@ -0,0 +1,20 @@ + +class MetadataBuildError(Exception): + """Base class for errors raised during metadata composition.""" + + def __init__(self, message: str, field: str | None = None, details: str | None = None): + self.field = field + self.details = details + super().__init__(message) + + def __str__(self): + if self.details: + return f'{self.field}: {self.details}' + else: + return f'{self.field}' + +class SchemaValidationError(MetadataBuildError): + """Raised when schema (attrs) validation fails.""" + +class ExtractionError(MetadataBuildError): + """Raised when an extractor or mapper fails.""" diff --git a/rdmo_zenodo/exports/metadata/extractors.py b/rdmo_zenodo/exports/metadata/extractors.py new file mode 100644 index 0000000..52bf618 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/extractors.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from typing import Any + +from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist +from django.utils import timezone + +from rdmo_zenodo.exports.metadata.context import MetadataContext + +RIGHTS_URI_OPTIONS = { + "dataset_license_types/71": "cc-by-4.0", + "dataset_license_types/73": "cc-by-nc-4.0", + "dataset_license_types/74": "cc-by-nd-4.0", + "dataset_license_types/75": "cc-by-sa-4.0", + "dataset_license_types/cc0": "cc-zero", +} + +DEFAULT_SUBJECTS = ["Data Management Plan", "DMP"] + + +def get_title_from_project(context: MetadataContext) -> str: + if context.project.title: + return f"Data Management Plan for project {context.project.title}." + return "Data Management Plan." + +def get_title_from_dataset(context: MetadataContext) -> str: + if context.set_index is None: + return "Dataset" + title = context.get_text("project/dataset/title", set_index=context.set_index) + if title: + return title + dataset_id = context.get_text("project/dataset/id", set_index=context.set_index) + if dataset_id: + return dataset_id + index = (int(context.set_index) + 1) if isinstance(context.set_index, int) else 1 + return f"Dataset #{index}" + +def get_title_from_snapshot(context: MetadataContext) -> str: + title = get_title_from_project(context) + if context.snapshot and context.snapshot.title: + title += f" - {context.snapshot.title}" + return title + +def get_title_from_context(context: MetadataContext) -> str: + title = "" + if context.snapshot: + title = get_title_from_snapshot(context) + if context.set_index is not None: + dataset_title = get_title_from_dataset(context) + if title: + title += f" - {dataset_title}" + else: + return dataset_title + return title + +def get_description_from_project(context: MetadataContext) -> str: + desc = get_title_from_project(context) + if context.snapshot is not None: + if context.snapshot.description: + desc += "\n" + desc += context.snapshot.description + if context.set_index is not None: + desc += "\n" + desc += get_title_from_dataset(context) + if context.view and context.export_format: + desc += "\n" + desc += f"Exported to {context.export_format} with the {context.view.title} view." + + return desc + +# === from settings === # + +def get_access_right_from_settings(_,) -> str: + return settings.ZENODO_PROVIDER.get("access_right", "open") + +def get_upload_type_from_settings() -> str: + return settings.ZENODO_PROVIDER.get("upload_type", "dataset") + +def get_publication_type_from_settings() -> str | None: + if settings.ZENODO_PROVIDER.get("upload_type") == "publication": + return settings.ZENODO_PROVIDER.get("publication_type", "datamanagementplan") + return None + +def get_resource_type_from_settings() -> dict[str, str]: + return {"id": settings.ZENODO_PROVIDER.get("resource_type", "publication-datamanagementplan")} + +def get_language_from_settings() -> list: + if language := settings.ZENODO_PROVIDER.get("language"): + return [{"id": language}] + return [] + +def get_publisher_from_settings() -> str | None: + return settings.ZENODO_PROVIDER.get("publisher") + +def get_publication_date_from_today() -> str: + return timezone.localdate().isoformat() + +# === users === # + +def get_orcid_from_user(user: Any) -> str | None: + try: + orcid = user.socialaccount_set.get(provider="orcid") + return orcid + except (ObjectDoesNotExist, AttributeError): + return None + +def get_invenio_creator_from_user(user): + orcid = get_orcid_from_user(user) + identifiers = [{"scheme": "orcid", "identifier": orcid.uid}] if orcid else [] + return { + "family_name": user.last_name, + "given_name": user.first_name, + "identifiers": identifiers, + "type": "personal", + } + +def get_zenodo_creator_from_user(user): + return { + "name": f"{user.last_name}, {user.first_name}".strip(), + "orcid": get_orcid_from_user(user).uid, + "affiliation": None, + } + +def get_creators_from_context(context: MetadataContext) -> list[dict[str, Any]]: + creators = [] + if context.zenodo_backend_type == "zenodo": + user_func = get_zenodo_creator_from_user + elif context.zenodo_backend_type == "invenio": + user_func = get_invenio_creator_from_user + else: + raise ValueError(f"Unsupported backend type: {context.zenodo_backend_type}") + for user in context.project_members: + creators.append(user_func(user)) + return creators + +# === licenses, subjects, keywords === + +def get_license_id_from_context(context: MetadataContext) -> list[dict[str, str]]: + values = context.get_values("project/dataset/sharing/conditions", set_index=context.set_index) + for v in values: + if v.option and (license_id := RIGHTS_URI_OPTIONS.get(v.option.uri_path)): + return [{"id": license_id}] + return [] + +def get_keywords_from_context(context: MetadataContext) -> list[str]: + keywords = [v.text for v in context.get_values("project/research_question/keywords") if v.text] + return DEFAULT_SUBJECTS + keywords + +def get_subjects_from_keywords_and_context(context: MetadataContext) -> list[dict[str, str]]: + return [{"subject": s} for s in get_keywords_from_context(context)] diff --git a/rdmo_zenodo/exports/metadata/invenio.py b/rdmo_zenodo/exports/metadata/invenio.py new file mode 100644 index 0000000..0f3e141 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/invenio.py @@ -0,0 +1,211 @@ +# invenio_v6_models_strict.py +from __future__ import annotations + +from typing import Any, Literal + +import attrs + +from rdmo_zenodo.exports.metadata.utils import is_edtf_l0_date, is_iso_date + +# ------------------------- core CV wrappers ------------------------- # + +@attrs.define +class ResourceType: + """metadata.resource_type: requires {'id': ''}.""" + id: str + + def __attrs_post_init__(self) -> None: + if not self.id: + raise ValueError("resource_type.id must be a non-empty string") + +@attrs.define +class Role: + """Controlled vocabulary wrapper for roles (creators/contributors, dates, etc.).""" + id: str + title: dict[str, str] | None = None # service may add i18n labels + +# ------------------------- identifiers & affiliations ------------------------- # + +@attrs.define +class GenericIdentifier: + scheme: str # e.g. 'orcid', 'isni', 'ror', 'doi', ... + identifier: str + +@attrs.define +class Affiliation: + id: str | None = None # CV id (preferred, if known) + name: str | None = None # free text fallback + + def __attrs_post_init__(self) -> None: + # One of id or name must be present + if not (self.id or self.name): + raise ValueError("affiliation requires either 'id' or 'name'") + +# ------------------------- person_or_org & party entries ------------------------- # + +PersonOrOrgType = Literal["personal", "organizational"] + +@attrs.define +class PersonOrOrg: + type: PersonOrOrgType + given_name: str | None = None + family_name: str | None = None + name: str | None = None + identifiers: list[GenericIdentifier] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + if self.type == "personal": + if not (self.given_name and self.family_name): + raise ValueError("personal person_or_org requires given_name and family_name") + if self.name is not None: + raise ValueError("personal person_or_org must not set 'name'") + else: # organizational + if not self.name: + raise ValueError("organizational person_or_org requires 'name'") + if self.given_name or self.family_name: + raise ValueError("organizational person_or_org must not set given/family names") + +@attrs.define +class Creator: + person_or_org: PersonOrOrg + role: Role | None = None # optional for creators + affiliations: list[Affiliation] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + # affiliations only if personal (per docs) + if self.person_or_org.type == "organizational" and self.affiliations: + raise ValueError("affiliations are only allowed for personal creators") + +@attrs.define +class Contributor: + person_or_org: PersonOrOrg + role: Role # required for contributors + affiliations: list[Affiliation] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + if self.person_or_org.type == "organizational" and self.affiliations: + raise ValueError("affiliations are only allowed for personal contributors") + +# ------------------------- rights, languages, subjects, dates ------------------------- # + +@attrs.define +class Rights: + # Either `id` (CV) or a free-text `title` (localized), but not both. + id: str | None = None + title: dict[str, str] | None = None + description: dict[str, str] | None = None + link: str | None = None + + def __attrs_post_init__(self) -> None: + if bool(self.id) == bool(self.title): + raise ValueError("rights: either 'id' or 'title' must be set, but not both") + +@attrs.define +class Language: + id: str # ISO-639-3 code, e.g. 'eng', 'dan' + +@attrs.define +class Subject: + id: str | None = None # CV id + subject: str | None = None # free keyword + + def __attrs_post_init__(self) -> None: + if bool(self.id) == bool(self.subject): + raise ValueError("subject: set exactly one of 'id' or 'subject'") + +@attrs.define +class DateEntry: + date: str = attrs.field(converter=is_edtf_l0_date) + type: Role + description: str | None = None + +# ------------------------- alternate/related identifiers ------------------------- # + +@attrs.define +class AlternateIdentifier: + identifier: str + scheme: str # CV scheme (doi, isbn, url, ...) + +@attrs.define +class RelatedIdentifier: + identifier: str + scheme: str # CV scheme + relation_type: Role # {'id': ''} + resource_type: ResourceType | None = None # optional + +# ------------------------- funding (kept minimal; docs leave room for variations) ------------------------- # + +@attrs.define +class FundingRef: + funder: dict[str, Any] | None = None # commonly {'id': ''} + award: dict[str, Any] | None = None # commonly {'id': ''} or {'number': '...', 'title': {...}} + +# ------------------------- access ------------------------- # + +@attrs.define +class AccessEmbargo: + active: bool + until: str | None = attrs.field(default=None, converter=lambda v: is_iso_date(v) if v else None) + reason: str | None = None + + def __attrs_post_init__(self) -> None: + if self.active and not self.until: + raise ValueError("embargo.until (YYYY-MM-DD) is required when embargo.active is true") + +@attrs.define +class AccessBlock: + record: Literal["public", "restricted"] + files: Literal["public", "restricted"] + embargo: AccessEmbargo | None = None + +# ------------------------- metadata & top-level payload ------------------------- # + +@attrs.define +class MetadataV6: + resource_type: ResourceType + title: str + publication_date: str = attrs.field(converter=is_edtf_l0_date) + creators: list[Creator] = attrs.field(factory=list) + + description: str | None = None + additional_descriptions: list[dict[str, Any]] = attrs.field(factory=list) + additional_titles: list[dict[str, Any]] = attrs.field(factory=list) + + rights: list[Rights] = attrs.field(factory=list) + contributors: list[Contributor] = attrs.field(factory=list) + subjects: list[Subject] = attrs.field(factory=list) + languages: list[Language] = attrs.field(factory=list) + dates: list[DateEntry] = attrs.field(factory=list) + + version: str | None = None + publisher: str | None = None + + alternate_identifiers: list[AlternateIdentifier] = attrs.field(factory=list) + related_identifiers: list[RelatedIdentifier] = attrs.field(factory=list) + + sizes: list[str] = attrs.field(factory=list) + formats: list[str] = attrs.field(factory=list) + locations: list[dict[str, Any]] = attrs.field(factory=list) + funding: list[FundingRef] = attrs.field(factory=list) + references: list[str] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + # minimal required fields per docs + if not self.creators: + raise ValueError("metadata.creators requires at least one creator") + if not self.title: + raise ValueError("metadata.title is required") + if not self.resource_type or not self.resource_type.id: + raise ValueError("metadata.resource_type.id is required") + +@attrs.define +class RecordV6Payload: + metadata: MetadataV6 + access: AccessBlock + files: dict[str, Any] | None = None + pids: dict[str, Any] | None = None + + def to_dict(self) -> dict[str, Any]: + d = attrs.asdict(self, recurse=True) + # drop empties for cleaner payloads + return {k: v for k, v in d.items() if v not in (None, "", [], {})} diff --git a/rdmo_zenodo/exports/metadata/mapper_invenio.py b/rdmo_zenodo/exports/metadata/mapper_invenio.py new file mode 100644 index 0000000..20f3cd8 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/mapper_invenio.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from typing import Any, Callable + +from rdmo_zenodo.exports.metadata.extractors import ( + get_creators_from_context, + get_description_from_project, + get_language_from_settings, + get_license_id_from_context, + get_publication_date_from_today, + get_publisher_from_settings, + get_resource_type_from_settings, + get_subjects_from_keywords_and_context, + get_title_from_context, +) + +FieldGetter = Callable[[Any], Any] + +INVENIO_FIELD_MAPPER: dict[str, FieldGetter] = { + # required core fields + "resource_type": get_resource_type_from_settings, + "title": get_title_from_context, + "publication_date": get_publication_date_from_today, + "description": get_description_from_project, + "creators": get_creators_from_context, + + # optional metadata + "subjects": get_subjects_from_keywords_and_context, + "languages": get_language_from_settings, + "rights": get_license_id_from_context, + "publisher": get_publisher_from_settings, +} + diff --git a/rdmo_zenodo/exports/metadata/mapper_zenodo.py b/rdmo_zenodo/exports/metadata/mapper_zenodo.py new file mode 100644 index 0000000..f9f875d --- /dev/null +++ b/rdmo_zenodo/exports/metadata/mapper_zenodo.py @@ -0,0 +1,35 @@ +# rdmo_zenodo/exports/metadata/mapper_zenodo.py +from __future__ import annotations + +from typing import Any, Callable + +from rdmo_zenodo.exports.metadata.extractors import ( + get_creators_from_context, + get_description_from_project, + get_keywords_from_context, + get_language_from_settings, + get_license_id_from_context, + get_publication_date_from_today, + get_publication_type_from_settings, + get_publisher_from_settings, + get_title_from_context, + get_upload_type_from_settings, +) + +FieldGetter = Callable[[Any], Any] + +ZENODO_FIELD_MAPPER: dict[str, FieldGetter] = { + # core required metadata + "upload_type": get_upload_type_from_settings, + "publication_type": get_publication_type_from_settings, + "publication_date": get_publication_date_from_today, + "title": get_title_from_context, + "description": get_description_from_project, + "creators": get_creators_from_context, + + # optional fields and extras + "keywords": get_keywords_from_context, + "language": get_language_from_settings, + "license": get_license_id_from_context, + "publisher": get_publisher_from_settings, +} diff --git a/rdmo_zenodo/exports/metadata/model.py b/rdmo_zenodo/exports/metadata/model.py deleted file mode 100644 index c56264d..0000000 --- a/rdmo_zenodo/exports/metadata/model.py +++ /dev/null @@ -1,42 +0,0 @@ -from dataclasses import dataclass, field -from typing import Optional - - -@dataclass -class ZenodoMetadata: - resource_type: dict[str, str] - title: str - description: str - creators: list[dict[str, dict]] - upload_type: Optional[str] = None - rights: Optional[list[dict[str, str]]] = None - subjects: Optional[list[dict[str, str]]] = field(default_factory=list) - languages: Optional[list[dict[str, str]]] = None - publisher: Optional[str] = None - funding: Optional[str] = None - publication_date: Optional[str] = None - publication_type: Optional[str] = None - - def to_dict(self, filter_empty: Optional[bool] = False) -> dict[str, dict]: - """Return dict suitable for POST to Zenodo.""" - metadata = { - "resource_type": self.resource_type, - "title": self.title, - "description": self.description, - "creators": self.creators, - "upload_type": self.upload_type, - "rights": self.rights, - "subjects": self.subjects, - "languages": self.languages, - "publisher": self.publisher, - "funding": self.funding, - "publication_date": self.publication_date, - "publication_type": self.publication_type, - } - if filter_empty: - return self.filter_empty(metadata) - - return metadata - - def filter_empty(self, metadata: dict[str, any]) -> dict[str, any]: - return {k: v for k, v in metadata.items() if v not in [None, '', [], {}]} diff --git a/rdmo_zenodo/exports/metadata/snapshot.py b/rdmo_zenodo/exports/metadata/snapshot.py deleted file mode 100644 index b2602eb..0000000 --- a/rdmo_zenodo/exports/metadata/snapshot.py +++ /dev/null @@ -1,37 +0,0 @@ -from dataclasses import dataclass -from typing import Optional - -from django.conf import settings -from django.utils import timezone - -from .base import ZenodoMetadataBuilder -from .model import ZenodoMetadata - - -@dataclass -class SnapshotZenodoMetadataBuilder(ZenodoMetadataBuilder): - - publication_date: Optional[str] = None - resource_type = "publication-datamanagementplan" - upload_type = "publication" - publication_type = "datamanagementplan" - - def build_metadata(self) -> ZenodoMetadata: - return ZenodoMetadata( - resource_type={"id": settings.ZENODO_PROVIDER.get("resource_type", self.resource_type)}, - title=self.title, - description=self.description, - rights=self.get_rights_from_uri_paths(self.rights_uri_paths), - creators=self.get_creators(), - subjects=self.get_subjects_from_keywords(self.keywords), - languages=self.get_languages(), - upload_type=settings.ZENODO_PROVIDER.get("upload_type", self.upload_type), - publication_type=self.get_publication_type(), - publisher=settings.ZENODO_PROVIDER.get("publisher"), - funding=settings.ZENODO_PROVIDER.get("funding"), - publication_date=self.publication_date or timezone.localdate().isoformat(), - ) - - def get_publication_type(self): - if self.upload_type == "publication": - return settings.ZENODO_PROVIDER.get("publication_type", self.publication_type) diff --git a/rdmo_zenodo/exports/metadata/utils.py b/rdmo_zenodo/exports/metadata/utils.py new file mode 100644 index 0000000..13fc5d3 --- /dev/null +++ b/rdmo_zenodo/exports/metadata/utils.py @@ -0,0 +1,17 @@ +import re + + +def is_edtf_l0_date(value: str) -> str: + """ + Accepts EDTF Level 0 "Date" or "Date Interval": + YYYY | YYYY-MM | YYYY-MM-DD + YYYY/YYYY | YYYY-MM/YYYY-MM | YYYY-MM-DD/YYYY-MM-DD, etc. + """ + if not re.match(r"^\d{4}(-\d{2}(-\d{2})?)?(/\d{4}(-\d{2}(-\d{2})?)?)?$", value): + raise ValueError(f"Invalid EDTF Level 0 date {value!r}") + return value + +def is_iso_date(value: str) -> str: + if not re.match(r"^\d{4}-\d{2}-\d{2}$", value): + raise ValueError(f"Invalid ISO date {value!r}; must be YYYY-MM-DD") + return value diff --git a/rdmo_zenodo/exports/metadata/zenodo.py b/rdmo_zenodo/exports/metadata/zenodo.py new file mode 100644 index 0000000..f357bee --- /dev/null +++ b/rdmo_zenodo/exports/metadata/zenodo.py @@ -0,0 +1,128 @@ +# References, https://zenodraft.github.io/metadata-schema-zenodo/latest/schema.json +# https://developers.zenodo.org/#depositions +from __future__ import annotations + +from typing import Any, Literal + +import attrs + +from rdmo_zenodo.exports.metadata.utils import is_iso_date + +UploadType = Literal[ + "dataset", "image", "publication", "poster", "presentation", + "software", "lesson", "physicalobject", "other" +] + +PublicationType = Literal[ + "annotationcollection", "book", "section", "conferencepaper", "datamanagementplan", + "article", "patent", "preprint", "deliverable", "milestone", + "proposal", "report", "softwaredocumentation", "taxonomictreatment", + "technicalnote", "thesis", "workingpaper", "other" +] + +ImageType = Literal["figure", "plot", "drawing", "diagram", "photo", "other"] + +AccessRight = Literal["open", "embargoed", "restricted", "closed"] + +ContributorType = Literal[ + "ContactPerson", "DataCollector", "DataCurator", "DataManager", + "Distributor", "Editor", "HostingInstitution", "Producer", + "ProjectLeader", "ProjectManager", "ProjectMember", "RegistrationAgency", + "RegistrationAuthority", "RelatedPerson", "ResearchGroup", "RightsHolder", + "Sponsor", "Supervisor", "WorkPackageLeader", "Other", "Annotator" +] + + +@attrs.define +class Creator: + name: str # in the format Family name, Given names + affiliation: str | None = None + orcid: str | None = None + gnd : str | None = None + + def __attrs_post_init__(self) -> None: + if not self.name: + raise ValueError("creator.name is required") + +@attrs.define +class Contributor: + name: str + type: ContributorType + affiliation: str | None = None + orcid: str | None = None + gnd: str | None = None + +@attrs.define +class Identifier: + identifier: str + relation: str | None = None # e.g. "isSupplementTo" + scheme: str | None = None # "doi", "url", etc. + resource_type: str | None = None + +@attrs.define +class Grant: + id: str # e.g. "10.13039/501100000780::101122483" + +@attrs.define +class Community: + identifier: str # e.g. "zenodo-community-id" + +@attrs.define +class RelatedIdentifier: + identifier: str + relation: str + scheme: str | None = None + resource_type: str | None = None + + +@attrs.define +class ZenodoMetadata: + upload_type: UploadType + title: str + description: str + publication_date: str = attrs.field(converter=is_iso_date) + publication_type: str | None = None + creators: list[Creator] = attrs.field(factory=list) + contributors: list[Contributor] = attrs.field(factory=list) + keywords: list[str] = attrs.field(factory=list) + language: str | None = None # ISO 639-1 code + related_identifiers: list[RelatedIdentifier] = attrs.field(factory=list) + alternate_identifiers: list[Identifier] = attrs.field(factory=list) + grants: list[Grant] = attrs.field(factory=list) + references: list[str] = attrs.field(factory=list) + notes: str | None = None + communities: list[Community] = attrs.field(factory=list) + access_right: AccessRight = "open" + license: str | None = None # any SPDX ID or custom name + embargo_date: str | None = attrs.field(default=None, converter=lambda v: is_iso_date(v) if v else None) + access_conditions: str | None = None + publisher: str | None = None + version: str | None = None + + def __attrs_post_init__(self) -> None: + # upload type dependent constraints + if self.upload_type == "publication" and not self.publication_type: + raise ValueError("publication_type required when upload_type='publication'") + + # access_right constraints + if self.access_right in {"open", "embargoed"} and not self.license: + raise ValueError("license required when access_right is open or embargoed") + if self.access_right == "embargoed" and not self.embargo_date: + raise ValueError("embargo_date required when access_right='embargoed'") + if self.access_right == "restricted" and not self.access_conditions: + raise ValueError("access_conditions required when access_right='restricted'") + + # required minimal fields + if not self.creators: + raise ValueError("At least one creator required") + if not self.title or not self.description: + raise ValueError("Both title and description are required") + +@attrs.define +class ZenodoDepositionPayload: + metadata: ZenodoMetadata + + def to_dict(self) -> dict[str, Any]: + d = attrs.asdict(self, recurse=True) + # remove empties + return {k: v for k, v in d.items() if v not in (None, "", [], {})} diff --git a/rdmo_zenodo/exports/publish_snapshot.py b/rdmo_zenodo/exports/publish.py similarity index 100% rename from rdmo_zenodo/exports/publish_snapshot.py rename to rdmo_zenodo/exports/publish.py From 91a701f4f4c0ebb39c570014aacc85beeecb2f31 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 9 Oct 2025 18:52:49 +0200 Subject: [PATCH 36/41] fix export to zenodo --- rdmo_zenodo/exports/metadata/builder.py | 69 +++++++++++++------ rdmo_zenodo/exports/metadata/context.py | 2 +- rdmo_zenodo/exports/metadata/converter.py | 66 ++++++++++++++---- rdmo_zenodo/exports/metadata/exceptions.py | 8 +-- rdmo_zenodo/exports/metadata/extractors.py | 43 +++++++----- rdmo_zenodo/exports/metadata/invenio.py | 14 +++- .../exports/metadata/mapper_invenio.py | 6 +- rdmo_zenodo/exports/metadata/mapper_zenodo.py | 6 +- rdmo_zenodo/exports/metadata/zenodo.py | 13 ++-- 9 files changed, 161 insertions(+), 66 deletions(-) diff --git a/rdmo_zenodo/exports/metadata/builder.py b/rdmo_zenodo/exports/metadata/builder.py index 8a0078a..bf5d51a 100644 --- a/rdmo_zenodo/exports/metadata/builder.py +++ b/rdmo_zenodo/exports/metadata/builder.py @@ -1,18 +1,24 @@ import inspect -from typing import Callable +from typing import Any, Callable -from cattr import structure +from attr import AttrsInstance, fields +from cattrs import ClassValidationError, transform_error from rdmo_zenodo.exports.metadata.context import MetadataContext from rdmo_zenodo.exports.metadata.converter import converter -from rdmo_zenodo.exports.metadata.exceptions import ExtractionError, SchemaValidationError +from rdmo_zenodo.exports.metadata.exceptions import SchemaValidationError from rdmo_zenodo.exports.metadata.invenio import MetadataV6, RecordV6Payload from rdmo_zenodo.exports.metadata.mapper_invenio import INVENIO_FIELD_MAPPER from rdmo_zenodo.exports.metadata.mapper_zenodo import ZENODO_FIELD_MAPPER from rdmo_zenodo.exports.metadata.zenodo import ZenodoDepositionPayload, ZenodoMetadata +BACKENDS = { + "zenodo": (ZENODO_FIELD_MAPPER, ZenodoMetadata, ZenodoDepositionPayload), + "invenio": (INVENIO_FIELD_MAPPER, MetadataV6, RecordV6Payload), +} -def call_extractors_on_field_mapping(context: MetadataContext, fields: dict[str, Callable]): +def extract_metadata(context: MetadataContext, fields: dict[str, Callable]): + # 1: extract metadata from rdmo project values extracted = {} for name, getter in fields.items(): # if the callable expects arguments, pass context; else call it directly @@ -29,27 +35,46 @@ def call_extractors_on_field_mapping(context: MetadataContext, fields: dict[str, return extracted -def build_payload(context, backend: str): - if backend == "zenodo": - mapper, schema, payload_cls = ( - ZENODO_FIELD_MAPPER, ZenodoMetadata, ZenodoDepositionPayload - ) - elif backend == "invenio": - mapper, schema, payload_cls = ( - INVENIO_FIELD_MAPPER, MetadataV6, RecordV6Payload +def validate_schema(metadata_dict: dict[str, Any], schema: AttrsInstance) -> Any: + # 2: validate metadata dict against attrs schema + + # 2.1 check for unknown keys + allowed = {f.name for f in fields(schema)} + unknown = set(metadata_dict) - allowed + if unknown: + raise SchemaValidationError( + f"Unexpected fields in metadata for {schema.__name__}", + details=", ".join(sorted(unknown)) ) - else: - raise ValueError(f"Unknown backend: {backend}") + # 2.2 structure from dict and return validated try: - metadata_dict = call_extractors_on_field_mapping(context, mapper) + return converter.structure(metadata_dict, schema) + except ClassValidationError as e: + raise SchemaValidationError( + "Schema validation failed", details=",".join(transform_error(e)) + ) from e except (TypeError, ValueError) as e: - raise ExtractionError("Failed to extract data from RDMO project", details=str(e)) from e + raise SchemaValidationError("Invalid metadata structure", details=str(e)) from e + + +def build_payload_object(metadata_obj: Any, payload_cls: type) -> Any: + # 3: build payload dataclass instance + return payload_cls(metadata=metadata_obj) + +def serialize_payload(payload_obj: Any) -> dict[str, Any]: + # 4: convert payload object to JSON-serializable dict + return converter.unstructure(payload_obj) + + +def build_payload(context: MetadataContext, backend: str) -> dict[str, Any]: + # main entrypoint: run the extraction → validation → serialization pipeline try: - metadata_obj = structure(metadata_dict, schema) - except (TypeError, ValueError) as e: - raise SchemaValidationError("Schema validation failed", e) from e + mapper, schema, payload_cls = BACKENDS[backend] + except KeyError: + raise ValueError(f"Unknown backend: {backend!r}") from None - payload_obj = payload_cls(metadata=metadata_obj) - payload = converter.unstructure(payload_obj) - return payload + metadata_dict = extract_metadata(context, mapper) + metadata_obj = validate_schema(metadata_dict, schema) + payload_obj = build_payload_object(metadata_obj, payload_cls) + return serialize_payload(payload_obj) diff --git a/rdmo_zenodo/exports/metadata/context.py b/rdmo_zenodo/exports/metadata/context.py index 4559100..6a23711 100644 --- a/rdmo_zenodo/exports/metadata/context.py +++ b/rdmo_zenodo/exports/metadata/context.py @@ -10,7 +10,7 @@ @dataclass(frozen=True) class MetadataContext: - project: Project | None + project: Project snapshot: Snapshot | None set_index: int | None get_values: Callable[..., list[Any]] diff --git a/rdmo_zenodo/exports/metadata/converter.py b/rdmo_zenodo/exports/metadata/converter.py index 3a881b1..1862f6c 100644 --- a/rdmo_zenodo/exports/metadata/converter.py +++ b/rdmo_zenodo/exports/metadata/converter.py @@ -2,21 +2,59 @@ from typing import Any +import attr import cattrs +from rdmo_zenodo.exports.metadata.invenio import Language, ResourceType, Rights, Role + +# Single shared converter instance converter = cattrs.Converter() -def strip_empty_dict(d: dict[str, Any]) -> dict[str, Any]: - """Remove None, empty strings, lists, and dicts from serialized output.""" - clean = {} - for k, v in d.items(): - if v in (None, "", [], {}): - continue - if isinstance(v, dict): - v = strip_empty_dict(v) - if not v: - continue - clean[k] = v - return clean - -converter.register_unstructure_hook(dict, strip_empty_dict) +_EMPTY = (None, "", [], {}) + + +def _strip_empty(value: Any) -> Any: + """Recursively drop None, '', [], {} anywhere in the structure.""" + if isinstance(value, dict): + # Clean nested first, then drop empty keys. + cleaned = {k: _strip_empty(v) for k, v in value.items()} + return {k: v for k, v in cleaned.items() if v not in _EMPTY} + if isinstance(value, list): + # Clean nested first, then drop empty items. + cleaned = [_strip_empty(v) for v in value] + return [v for v in cleaned if v not in _EMPTY] + return value + + +def _unstructure_attrs_and_strip(inst: Any) -> Any: + """ + Unstructure one attrs instance: + - Recursively unstructure every field via the converter (important!) + - Strip empties from the resulting dict + """ + cls = type(inst) + # Use attr.fields to walk declared attrs fields + data = {f.name: converter.unstructure(getattr(inst, f.name)) for f in attr.fields(cls)} + return _strip_empty(data) + + +# Apply to ALL attrs-based classes (top-level and nested). +# Using attr.has as the predicate means: “if this is an @attrs class, use the hook”. +converter.register_unstructure_hook_factory( + attr.has, + lambda _cls: _unstructure_attrs_and_strip, +) + +def make_simple_id_hook(cls): + def _hook(value: Any, _: type) -> Any: + if isinstance(value, cls): + return value + if isinstance(value, str): + return cls(id=value) + if isinstance(value, dict): + return cls(**value) + raise TypeError(f"Cannot structure {value!r} as {cls.__name__}") + return _hook + +for simple_cls in (ResourceType, Language, Role, Rights): + converter.register_structure_hook(simple_cls, make_simple_id_hook(simple_cls)) diff --git a/rdmo_zenodo/exports/metadata/exceptions.py b/rdmo_zenodo/exports/metadata/exceptions.py index a822f88..59d1806 100644 --- a/rdmo_zenodo/exports/metadata/exceptions.py +++ b/rdmo_zenodo/exports/metadata/exceptions.py @@ -2,16 +2,16 @@ class MetadataBuildError(Exception): """Base class for errors raised during metadata composition.""" - def __init__(self, message: str, field: str | None = None, details: str | None = None): - self.field = field + def __init__(self, message: str, details: str | None = None): self.details = details + self.message = message super().__init__(message) def __str__(self): if self.details: - return f'{self.field}: {self.details}' + return f'{self.message}: {self.details}' else: - return f'{self.field}' + return f'{self.message}' class SchemaValidationError(MetadataBuildError): """Raised when schema (attrs) validation fails.""" diff --git a/rdmo_zenodo/exports/metadata/extractors.py b/rdmo_zenodo/exports/metadata/extractors.py index 52bf618..2504d7f 100644 --- a/rdmo_zenodo/exports/metadata/extractors.py +++ b/rdmo_zenodo/exports/metadata/extractors.py @@ -82,17 +82,23 @@ def get_publication_type_from_settings() -> str | None: return settings.ZENODO_PROVIDER.get("publication_type", "datamanagementplan") return None -def get_resource_type_from_settings() -> dict[str, str]: - return {"id": settings.ZENODO_PROVIDER.get("resource_type", "publication-datamanagementplan")} +def get_resource_type_from_settings_and_context(context) -> dict[str, str]: + default = "publication-datamanagementplan" + if context.set_index is not None: + default = "dataset" + return settings.ZENODO_PROVIDER.get("resource_type", default) -def get_language_from_settings() -> list: +def get_language_from_settings() -> str | None: if language := settings.ZENODO_PROVIDER.get("language"): - return [{"id": language}] - return [] + return language + return None def get_publisher_from_settings() -> str | None: return settings.ZENODO_PROVIDER.get("publisher") +def get_funding_from_settings() -> str | None: + return settings.ZENODO_PROVIDER.get("funding") + def get_publication_date_from_today() -> str: return timezone.localdate().isoformat() @@ -100,8 +106,7 @@ def get_publication_date_from_today() -> str: def get_orcid_from_user(user: Any) -> str | None: try: - orcid = user.socialaccount_set.get(provider="orcid") - return orcid + return user.socialaccount_set.get(provider="orcid") except (ObjectDoesNotExist, AttributeError): return None @@ -109,38 +114,44 @@ def get_invenio_creator_from_user(user): orcid = get_orcid_from_user(user) identifiers = [{"scheme": "orcid", "identifier": orcid.uid}] if orcid else [] return { - "family_name": user.last_name, - "given_name": user.first_name, - "identifiers": identifiers, - "type": "personal", + "person_or_org": { + "family_name": user.last_name, + "given_name": user.first_name, + "identifiers": identifiers, + "type": "personal", + } } def get_zenodo_creator_from_user(user): + orcid = get_orcid_from_user(user) return { "name": f"{user.last_name}, {user.first_name}".strip(), - "orcid": get_orcid_from_user(user).uid, + "orcid": orcid.uid if orcid else None, "affiliation": None, } def get_creators_from_context(context: MetadataContext) -> list[dict[str, Any]]: creators = [] if context.zenodo_backend_type == "zenodo": - user_func = get_zenodo_creator_from_user + get_creator = get_invenio_creator_from_user elif context.zenodo_backend_type == "invenio": - user_func = get_invenio_creator_from_user + get_creator = get_invenio_creator_from_user else: raise ValueError(f"Unsupported backend type: {context.zenodo_backend_type}") for user in context.project_members: - creators.append(user_func(user)) + creators.append(get_creator(user)) return creators # === licenses, subjects, keywords === def get_license_id_from_context(context: MetadataContext) -> list[dict[str, str]]: - values = context.get_values("project/dataset/sharing/conditions", set_index=context.set_index) + set_index = context.set_index if context.set_index is not None else 0 + values = context.get_values("project/dataset/sharing/conditions", set_index=set_index) for v in values: if v.option and (license_id := RIGHTS_URI_OPTIONS.get(v.option.uri_path)): return [{"id": license_id}] + if v.option.additional_input == "text" and v.text: + return [{"id": v.text}] return [] def get_keywords_from_context(context: MetadataContext) -> list[str]: diff --git a/rdmo_zenodo/exports/metadata/invenio.py b/rdmo_zenodo/exports/metadata/invenio.py index 0f3e141..b60eadb 100644 --- a/rdmo_zenodo/exports/metadata/invenio.py +++ b/rdmo_zenodo/exports/metadata/invenio.py @@ -134,10 +134,22 @@ class RelatedIdentifier: resource_type: ResourceType | None = None # optional # ------------------------- funding (kept minimal; docs leave room for variations) ------------------------- # +@attrs.define +class Funder: + id: str | None = None # commonly {'id': ''} or {'number': '...', 'title': {...}} + name: str | None = None # commonly {'id': ''} + +@attrs.define +class Award: + id: str | None = None + title: str | None = None + number: str | None = None + identifiers: list[GenericIdentifier] | None = None + @attrs.define class FundingRef: - funder: dict[str, Any] | None = None # commonly {'id': ''} + funder: Funder # commonly {'id': ''} award: dict[str, Any] | None = None # commonly {'id': ''} or {'number': '...', 'title': {...}} # ------------------------- access ------------------------- # diff --git a/rdmo_zenodo/exports/metadata/mapper_invenio.py b/rdmo_zenodo/exports/metadata/mapper_invenio.py index 20f3cd8..c6aa9f5 100644 --- a/rdmo_zenodo/exports/metadata/mapper_invenio.py +++ b/rdmo_zenodo/exports/metadata/mapper_invenio.py @@ -5,11 +5,12 @@ from rdmo_zenodo.exports.metadata.extractors import ( get_creators_from_context, get_description_from_project, + get_funding_from_settings, get_language_from_settings, get_license_id_from_context, get_publication_date_from_today, get_publisher_from_settings, - get_resource_type_from_settings, + get_resource_type_from_settings_and_context, get_subjects_from_keywords_and_context, get_title_from_context, ) @@ -18,13 +19,14 @@ INVENIO_FIELD_MAPPER: dict[str, FieldGetter] = { # required core fields - "resource_type": get_resource_type_from_settings, + "resource_type": get_resource_type_from_settings_and_context, "title": get_title_from_context, "publication_date": get_publication_date_from_today, "description": get_description_from_project, "creators": get_creators_from_context, # optional metadata + "funding": get_funding_from_settings, "subjects": get_subjects_from_keywords_and_context, "languages": get_language_from_settings, "rights": get_license_id_from_context, diff --git a/rdmo_zenodo/exports/metadata/mapper_zenodo.py b/rdmo_zenodo/exports/metadata/mapper_zenodo.py index f9f875d..8edf769 100644 --- a/rdmo_zenodo/exports/metadata/mapper_zenodo.py +++ b/rdmo_zenodo/exports/metadata/mapper_zenodo.py @@ -6,12 +6,14 @@ from rdmo_zenodo.exports.metadata.extractors import ( get_creators_from_context, get_description_from_project, + get_funding_from_settings, get_keywords_from_context, get_language_from_settings, get_license_id_from_context, get_publication_date_from_today, get_publication_type_from_settings, get_publisher_from_settings, + get_resource_type_from_settings_and_context, get_title_from_context, get_upload_type_from_settings, ) @@ -21,6 +23,7 @@ ZENODO_FIELD_MAPPER: dict[str, FieldGetter] = { # core required metadata "upload_type": get_upload_type_from_settings, + "resource_type": get_resource_type_from_settings_and_context, "publication_type": get_publication_type_from_settings, "publication_date": get_publication_date_from_today, "title": get_title_from_context, @@ -28,8 +31,9 @@ "creators": get_creators_from_context, # optional fields and extras + "funding": get_funding_from_settings, "keywords": get_keywords_from_context, - "language": get_language_from_settings, + "languages": get_language_from_settings, "license": get_license_id_from_context, "publisher": get_publisher_from_settings, } diff --git a/rdmo_zenodo/exports/metadata/zenodo.py b/rdmo_zenodo/exports/metadata/zenodo.py index f357bee..3219a88 100644 --- a/rdmo_zenodo/exports/metadata/zenodo.py +++ b/rdmo_zenodo/exports/metadata/zenodo.py @@ -6,6 +6,7 @@ import attrs +from rdmo_zenodo.exports.metadata.invenio import Creator, FundingRef, Language, ResourceType from rdmo_zenodo.exports.metadata.utils import is_iso_date UploadType = Literal[ @@ -34,7 +35,7 @@ @attrs.define -class Creator: +class ZenodoCreator: name: str # in the format Family name, Given names affiliation: str | None = None orcid: str | None = None @@ -77,17 +78,19 @@ class RelatedIdentifier: @attrs.define class ZenodoMetadata: - upload_type: UploadType + resource_type: ResourceType title: str - description: str publication_date: str = attrs.field(converter=is_iso_date) - publication_type: str | None = None creators: list[Creator] = attrs.field(factory=list) + upload_type: UploadType | None = None + description: str | None = None + publication_type: str | None = None contributors: list[Contributor] = attrs.field(factory=list) keywords: list[str] = attrs.field(factory=list) - language: str | None = None # ISO 639-1 code + languages: list[Language] = attrs.field(factory=list) related_identifiers: list[RelatedIdentifier] = attrs.field(factory=list) alternate_identifiers: list[Identifier] = attrs.field(factory=list) + funding: list[FundingRef] = attrs.field(factory=list) grants: list[Grant] = attrs.field(factory=list) references: list[str] = attrs.field(factory=list) notes: str | None = None From 5d4aa90c100836ca6d42b4a3501d59b6dc06f275 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 9 Oct 2025 18:53:20 +0200 Subject: [PATCH 37/41] docs: update README --- README.md | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ffe575d..b88c283 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ This plugin implements an [export provider](https://rdmo.readthedocs.io/en/latest/plugins/index.html#export-providers) for RDMO, which lets users push metadata from RDMO to Zenodo work packages. The plugin uses [OAUTH 2.0](https://oauth.net/2/), so that users use their respective accounts in both systems. It creates only the metadata in Zenodo, so that users need to upload the actual data on Zenodo themselfes. -Setup ------ +## Setup Install the plugin in your RDMO virtual environment using pip (directly from GitHub): @@ -30,7 +29,9 @@ When the translation method `_` was not yet imported in your `config/settings/lo from django.utils.translation import gettext_lazy as _ ``` -### Zenodo configuration +## Configuration + +### Register a *Developer application* for authentication A *Developer applications* has to be registered with Zenodo here: https://zenodo.org/account/settings/applications/. For development, you can also use the sandbox instance provided by Zenodo: https://sandbox.zenodo.org/account/settings/applications/. @@ -46,6 +47,7 @@ http://localhost:8000/services/oauth/zenodo/callback/ http://localhost:8000/services/oauth/zenodo-publish/callback/ ``` +### Configure the RDMO settings After registration, you are provided with a `client_id` and a `client_secret`, which need to be added to the RDMO settings in `config/settings/local.py`, along with some other optional entries: @@ -82,18 +84,25 @@ ZENODO_PROVIDER = { ``` The `resource_type` will be set by the specific export provider, e.g. as `'dataset'` or as `'publication-datamanagementplan'` for `zenodo-publish` export. -Usage ------ +## Usage The plugins appear as export options on the RDMO project overview. For a Zenodo backend, it was tested against https://sandbox.zenodo.org/. Analogous to Zenodo this plugin can also be used with InvenioRDM instances for which it was tested against https://inveniordm.web.cern.ch/. Currently, the following properties of the Zenodo data model are created from RDMO attributes: -| Zenodo field | RDMO attribute | -| ------------- | ---------------------------------------------------------------------------------| -| `title` | `project/dataset/title` or `project/dataset/id` or `f'Dataset #{set_index + 1}'` | -| `description` | `project/dataset/description` | -| `rights` | `project/dataset/sharing/conditions` | - +| Zenodo field | RDMO attribute | +|-----------------------|-----------------------------------------------------------------------------------------------------------------------| +| `title` | `project/dataset/title` or `project/dataset/id` or `f'Dataset #{set_index + 1}'` or `project.title` or `snapshot.title` | +| `description` | `project/dataset/description` | +| `license` or `rights` | `project/dataset/sharing/conditions` | +| `subjects` | `project/research_question/keywords` | +| `creators` | from `project.member` | In addition, several fields can be configured in the settings as shown above. + +### Development +Information about the API schemas can be found at: +* https://inveniordm.docs.cern.ch/reference/metadata/#metadata +* https://github.com/inveniosoftware/invenio-rdm-records/tree/master/invenio_rdm_records/records/jsonschemas/records +* https://zenodraft.github.io/metadata-schema-zenodo/latest/schema.json +* https://developers.zenodo.org/#depositions From 59377ff7a1038b34379708b89551c3be732d5f6e Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 9 Oct 2025 18:55:08 +0200 Subject: [PATCH 38/41] update Provider classes and use get_metadata --- rdmo_zenodo/exports/__init__.py | 4 +-- rdmo_zenodo/exports/base.py | 31 +++++++++++++++++++- rdmo_zenodo/exports/export.py | 40 ++++++-------------------- rdmo_zenodo/exports/forms.py | 3 +- rdmo_zenodo/exports/publish.py | 50 +++++++++------------------------ rdmo_zenodo/exports/utils.py | 4 +-- 6 files changed, 56 insertions(+), 76 deletions(-) diff --git a/rdmo_zenodo/exports/__init__.py b/rdmo_zenodo/exports/__init__.py index ca69198..036cfa9 100644 --- a/rdmo_zenodo/exports/__init__.py +++ b/rdmo_zenodo/exports/__init__.py @@ -1,2 +1,2 @@ -from .export_dataset import ZenodoExportProvider as ZenodoExportProvider -from .publish_snapshot import ZenodoPublishProvider as ZenodoPublishProvider +from .export import ZenodoExportProvider as ZenodoExportProvider +from .publish import ZenodoPublishProvider as ZenodoPublishProvider diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index 5b24701..bb33500 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -6,6 +6,9 @@ from rdmo.projects.exports import Export from rdmo.services.providers import OauthProviderMixin +from rdmo_zenodo.exports.metadata.builder import build_payload +from rdmo_zenodo.exports.metadata.context import MetadataContext + logger = logging.getLogger(__name__) json_header = { @@ -30,6 +33,12 @@ def client_secret(self): def zenodo_url(self): return settings.ZENODO_PROVIDER.get('zenodo_url', 'https://zenodo.org').strip('/') + @property + def zenodo_backend_type(self): + if 'zenodo' in self.zenodo_url: + return 'zenodo' + return 'invenio' + @property def authorize_url(self): return f'{self.zenodo_url}/oauth/authorize' @@ -46,6 +55,15 @@ def redirect_path(self): def authorization_header(self): return self.get_authorization_headers(self.get_from_session(self.request, 'access_token')) + @property + def authorization_scope(self): + scope = settings.ZENODO_PROVIDER.get('zenodo_auth_scope') + if scope: + return scope + if self.zenodo_backend_type == 'zenodo': + return 'deposit:write' + return 'user:email' + @property def authorized_binary_header(self): return {**binary_header, **self.authorization_header} @@ -86,7 +104,7 @@ def get_authorize_params(self, request, state): return { 'response_type': 'code', 'client_id': self.client_id, - 'scope': settings.ZENODO_PROVIDER.get('zenodo_auth_scope') or 'deposit:write', + 'scope': self.authorization_scope, 'redirect_uri': request.build_absolute_uri(self.redirect_path), 'state': state } @@ -99,3 +117,14 @@ def get_callback_data(self, request): 'redirect_uri': request.build_absolute_uri(self.redirect_path), 'code': request.GET.get('code') } + + def get_metadata(self, set_index=None): + context = MetadataContext( + project=self.project, + snapshot=self.snapshot, + set_index=set_index, + get_values=self.get_values, + get_text=self.get_text, + zenodo_backend_type=self.zenodo_backend_type, + ) + return build_payload(context, self.zenodo_backend_type) diff --git a/rdmo_zenodo/exports/export.py b/rdmo_zenodo/exports/export.py index a59eda5..acb6814 100644 --- a/rdmo_zenodo/exports/export.py +++ b/rdmo_zenodo/exports/export.py @@ -1,13 +1,11 @@ import logging -from django.conf import settings from django.shortcuts import redirect, render from django.utils.translation import gettext_lazy as _ -from rdmo_zenodo.exports.metadata.dataset import DatasetZenodoMetadataBuilder - from .base import BaseZenodoExportProvider from .forms import ZenodoDatasetForm +from .metadata.exceptions import MetadataBuildError logger = logging.getLogger(__name__) @@ -38,8 +36,12 @@ def submit(self): if form.is_valid(): url = self.records_url - data = self.get_post_data(form.cleaned_data['dataset']) - return self.post(self.request, url, data) + try: + payload = self.get_metadata(set_index=form.cleaned_data['dataset']) + except MetadataBuildError as e: + form.add_error(None, str(e)) + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=400) + return self.post(self.request, url, payload) else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) @@ -53,30 +55,4 @@ def post_success(self, request, response): 'errors': [_('The URL of the new dataset could not be retrieved.')] }, status=200) - def get_post_data(self, set_index): - # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata - dataset_title = self.get_text("project/dataset/title", set_index=set_index) - title = ( - dataset_title or - self.get_text('project/dataset/id', set_index=set_index) or - f'Dataset #{int(set_index) + 1}' - ) - description = f"Data Management Plan for project {self.project.title}." - - if dataset_title: - description += f" {dataset_title}" - - metadata_builder = DatasetZenodoMetadataBuilder( - title=title, - description=description, - keywords=[ - i.text - for i in self.get_values("project/research_question/keywords") if i.text - ], - rights_uri_paths=[ - i.option.uri_path - for i in self.get_values("project/dataset/sharing/conditions", set_index=set_index) if i.option - ], - project_users=self.project.user.all() if settings.ZENODO_PROVIDER.get("add_project_members") else [], - ) - return metadata_builder.to_post_data(filter_empty=True) + diff --git a/rdmo_zenodo/exports/forms.py b/rdmo_zenodo/exports/forms.py index 60c97e8..0182ec4 100644 --- a/rdmo_zenodo/exports/forms.py +++ b/rdmo_zenodo/exports/forms.py @@ -34,7 +34,6 @@ def __init__(self, *args, **kwargs): snapshot_choices = kwargs.pop('snapshot_choices', []) view_choices = kwargs.pop("view_choices", []) super().__init__(*args, **kwargs) - snapshot_choices = [(None, _("Create new snapshot")), *snapshot_choices] - self.fields['snapshot'].choices = snapshot_choices + self.fields['snapshot'].choices = [(None, _("Create new snapshot")), *snapshot_choices] self.fields['snapshot'].initial = None self.fields['view'].choices = view_choices diff --git a/rdmo_zenodo/exports/publish.py b/rdmo_zenodo/exports/publish.py index a3df3ba..1e7402f 100644 --- a/rdmo_zenodo/exports/publish.py +++ b/rdmo_zenodo/exports/publish.py @@ -1,6 +1,5 @@ import logging -from django.conf import settings from django.shortcuts import redirect, render from django.utils.formats import localize from django.utils.text import slugify @@ -10,17 +9,16 @@ from rdmo.projects.models import Project -from rdmo_zenodo.exports.metadata.snapshot import SnapshotZenodoMetadataBuilder - from .base import BaseZenodoExportProvider from .forms import ZenodoSnapshotForm +from .metadata.exceptions import MetadataBuildError from .utils import ( clear_record_id_from_project_value, get_concept_or_parent_id_from_payload, get_or_create_snapshot, get_record_id_from_project_value, render_and_export_project_from_view, - set_record_id_on_project_value, + save_record_id_in_project_value, ) logger = logging.getLogger(__name__) @@ -109,8 +107,12 @@ def submit(self): return self.post(self.request, record_versions_url, {}) else: # else create new draft record - data = self.get_post_data() - return self.post(self.request, self.records_url, data) + try: + payload = self.get_metadata() + except MetadataBuildError as e: + form.add_error(None, str(e)) + return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=400) + return self.post(self.request, self.records_url, payload) else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) @@ -132,7 +134,7 @@ def validate_record_id_from_project_value_at_zenodo(self): logger.info(f"Record ID {record_id} is valid.") concept_record_id = get_concept_or_parent_id_from_payload(response.json()) - set_record_id_on_project_value(self.project, concept_record_id) + save_record_id_in_project_value(self.project, concept_record_id) versions_url = response.json().get('links', {}).get('versions') return versions_url elif response.status_code == 404: @@ -151,7 +153,7 @@ def post_success(self, request, response): if 'versions' in response.request.url and 'publication_date' not in response.json().get('metadata',{}): # metadata needs to be posted to the new version with a new request and response zenodo_api_url = response.json().get('links', {}).get('self') - data = self.get_post_data() + data = self.get_metadata() response = requests.put(zenodo_api_url, json=data, headers=self.authorized_json_header) logger.debug("PUT to %s", zenodo_api_url) @@ -163,12 +165,12 @@ def post_success(self, request, response): concept_record_id = get_concept_or_parent_id_from_payload(payload) files_url = payload.get('links', {}).get('files') - _data_commit_pdf_response = self.post_export_file_to_zenodo( + self.post_export_file_to_zenodo( record_id=record_id, files_url=files_url, ) - _publish_response = self.publish_draft_record(record_id=record_id) + self.publish_draft_record(record_id=record_id) - set_record_id_on_project_value(self.project, concept_record_id) + save_record_id_in_project_value(self.project, concept_record_id) return redirect(zenodo_url) else: @@ -222,29 +224,3 @@ def publish_draft_record(self, record_id=None): response = requests.post(publish_url, headers=self.authorization_header) logger.debug("POST to %s", publish_url) return response - - def get_post_data(self): - # see https://inveniordm.docs.cern.ch/reference/metadata/ for invenio metadata - if self.project is None or self.snapshot is None: - raise ValueError("Project and Snapshot are required to get post data.") - - title = f"{self.project.title} - Snapshot: {self.snapshot.title}" - description = f"Data Management Plan for project {self.project.title}." - if self.snapshot.description: - description += f" {self.snapshot.description}" - description += f" Exported to {self.export_format} with the {self.view.title} view." - - metadata_builder = SnapshotZenodoMetadataBuilder( - title=title, - description=description, - keywords=[ - i.text - for i in self.get_values("project/research_question/keywords") if i.text - ], - rights_uri_paths=[ - i.option.uri_path - for i in self.get_values("project/dataset/sharing/conditions") if i.option - ], - project_users=self.project.user.all() if settings.ZENODO_PROVIDER.get("add_project_members") else [], - ) - return metadata_builder.to_post_data(filter_empty=True) diff --git a/rdmo_zenodo/exports/utils.py b/rdmo_zenodo/exports/utils.py index 6c44b7e..4430b07 100644 --- a/rdmo_zenodo/exports/utils.py +++ b/rdmo_zenodo/exports/utils.py @@ -56,7 +56,7 @@ def get_record_id_from_project_value(project): return None -def set_record_id_on_project_value(project, record_id): +def save_record_id_in_project_value(project, record_id): if project is None or record_id is None: return @@ -75,7 +75,7 @@ def set_record_id_on_project_value(project, record_id): def clear_record_id_from_project_value(project): """Clear the record_id text from the project's values by setting it to an empty string.""" - set_record_id_on_project_value(project, '') + save_record_id_in_project_value(project, '') def render_and_export_project_from_view(project, snapshot, export_format, view): From bcfafd70b99e1a3d12ad2f9346310a24b6bd039b Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 9 Oct 2025 18:55:45 +0200 Subject: [PATCH 39/41] build: add attrs and cattrs to deps --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f5e5f67..0da6357 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,8 @@ classifiers = [ ] dependencies = [ "rdmo", + "attrs", + "cattrs", ] dynamic = ["version"] From e2d07447f638da1704553628ead8c36db5e01fa6 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 13 Oct 2025 14:17:47 +0200 Subject: [PATCH 40/41] fix metadata schemas --- rdmo_zenodo/exports/metadata/builder.py | 48 +-- rdmo_zenodo/exports/metadata/context.py | 2 +- rdmo_zenodo/exports/metadata/converter.py | 57 ++-- rdmo_zenodo/exports/metadata/extractors.py | 22 +- rdmo_zenodo/exports/metadata/invenio.py | 281 ++++++++++++------ .../exports/metadata/mapper_invenio.py | 35 --- .../metadata/{mapper_zenodo.py => mappers.py} | 29 +- rdmo_zenodo/exports/metadata/zenodo.py | 11 +- 8 files changed, 273 insertions(+), 212 deletions(-) delete mode 100644 rdmo_zenodo/exports/metadata/mapper_invenio.py rename rdmo_zenodo/exports/metadata/{mapper_zenodo.py => mappers.py} (63%) diff --git a/rdmo_zenodo/exports/metadata/builder.py b/rdmo_zenodo/exports/metadata/builder.py index bf5d51a..f15156b 100644 --- a/rdmo_zenodo/exports/metadata/builder.py +++ b/rdmo_zenodo/exports/metadata/builder.py @@ -7,31 +7,27 @@ from rdmo_zenodo.exports.metadata.context import MetadataContext from rdmo_zenodo.exports.metadata.converter import converter from rdmo_zenodo.exports.metadata.exceptions import SchemaValidationError -from rdmo_zenodo.exports.metadata.invenio import MetadataV6, RecordV6Payload -from rdmo_zenodo.exports.metadata.mapper_invenio import INVENIO_FIELD_MAPPER -from rdmo_zenodo.exports.metadata.mapper_zenodo import ZENODO_FIELD_MAPPER +from rdmo_zenodo.exports.metadata.invenio import InvenioMetadataV6, InvenioRecordV6Payload +from rdmo_zenodo.exports.metadata.mappers import INVENIO_FIELD_MAPPER, ZENODO_FIELD_MAPPER from rdmo_zenodo.exports.metadata.zenodo import ZenodoDepositionPayload, ZenodoMetadata -BACKENDS = { +METADATA_METHODS = { "zenodo": (ZENODO_FIELD_MAPPER, ZenodoMetadata, ZenodoDepositionPayload), - "invenio": (INVENIO_FIELD_MAPPER, MetadataV6, RecordV6Payload), + "invenio": (INVENIO_FIELD_MAPPER, InvenioMetadataV6, InvenioRecordV6Payload), } def extract_metadata(context: MetadataContext, fields: dict[str, Callable]): # 1: extract metadata from rdmo project values extracted = {} for name, getter in fields.items(): - # if the callable expects arguments, pass context; else call it directly - try: - sig = inspect.signature(getter) - if len(sig.parameters) == 0: - value = getter() - else: - value = getter(context) - except (TypeError, ValueError): + sig = inspect.signature(getter) + if len(sig.parameters) == 0: + value = getter() + elif len(sig.parameters) == 1: value = getter(context) - if value not in (None, "", [], {}): - extracted[name] = value + else: + raise ValueError(f"Unsupported getter signature {sig}") + extracted[name] = value return extracted @@ -51,30 +47,12 @@ def validate_schema(metadata_dict: dict[str, Any], schema: AttrsInstance) -> Any return converter.structure(metadata_dict, schema) except ClassValidationError as e: raise SchemaValidationError( - "Schema validation failed", details=",".join(transform_error(e)) + f"Schema validation failed for {schema.__name__}", details=",".join(transform_error(e)) ) from e except (TypeError, ValueError) as e: raise SchemaValidationError("Invalid metadata structure", details=str(e)) from e -def build_payload_object(metadata_obj: Any, payload_cls: type) -> Any: - # 3: build payload dataclass instance - return payload_cls(metadata=metadata_obj) - - def serialize_payload(payload_obj: Any) -> dict[str, Any]: - # 4: convert payload object to JSON-serializable dict + # convert payload object to JSON-serializable dict return converter.unstructure(payload_obj) - - -def build_payload(context: MetadataContext, backend: str) -> dict[str, Any]: - # main entrypoint: run the extraction → validation → serialization pipeline - try: - mapper, schema, payload_cls = BACKENDS[backend] - except KeyError: - raise ValueError(f"Unknown backend: {backend!r}") from None - - metadata_dict = extract_metadata(context, mapper) - metadata_obj = validate_schema(metadata_dict, schema) - payload_obj = build_payload_object(metadata_obj, payload_cls) - return serialize_payload(payload_obj) diff --git a/rdmo_zenodo/exports/metadata/context.py b/rdmo_zenodo/exports/metadata/context.py index 6a23711..f0946b4 100644 --- a/rdmo_zenodo/exports/metadata/context.py +++ b/rdmo_zenodo/exports/metadata/context.py @@ -12,10 +12,10 @@ class MetadataContext: project: Project snapshot: Snapshot | None - set_index: int | None get_values: Callable[..., list[Any]] get_text: Callable[..., str | None] zenodo_backend_type: str + set_index: int | None = None view: Any | None = None export_format: str | None = None diff --git a/rdmo_zenodo/exports/metadata/converter.py b/rdmo_zenodo/exports/metadata/converter.py index 1862f6c..94470f0 100644 --- a/rdmo_zenodo/exports/metadata/converter.py +++ b/rdmo_zenodo/exports/metadata/converter.py @@ -1,60 +1,69 @@ from __future__ import annotations -from typing import Any +from typing import Any, get_args, get_origin import attr import cattrs -from rdmo_zenodo.exports.metadata.invenio import Language, ResourceType, Rights, Role +from rdmo_zenodo.exports.metadata.invenio import Language, ResourceType +from rdmo_zenodo.exports.metadata.zenodo import Community, Grant -# Single shared converter instance converter = cattrs.Converter() _EMPTY = (None, "", [], {}) -def _strip_empty(value: Any) -> Any: - """Recursively drop None, '', [], {} anywhere in the structure.""" +def strip_empty(value: Any) -> Any: if isinstance(value, dict): # Clean nested first, then drop empty keys. - cleaned = {k: _strip_empty(v) for k, v in value.items()} + cleaned = {k: strip_empty(v) for k, v in value.items()} return {k: v for k, v in cleaned.items() if v not in _EMPTY} if isinstance(value, list): # Clean nested first, then drop empty items. - cleaned = [_strip_empty(v) for v in value] + cleaned = [strip_empty(v) for v in value] return [v for v in cleaned if v not in _EMPTY] return value -def _unstructure_attrs_and_strip(inst: Any) -> Any: - """ - Unstructure one attrs instance: - - Recursively unstructure every field via the converter (important!) - - Strip empties from the resulting dict - """ +def unstructure_attrs_and_strip(inst: Any) -> Any: cls = type(inst) # Use attr.fields to walk declared attrs fields data = {f.name: converter.unstructure(getattr(inst, f.name)) for f in attr.fields(cls)} - return _strip_empty(data) + return strip_empty(data) # Apply to ALL attrs-based classes (top-level and nested). # Using attr.has as the predicate means: “if this is an @attrs class, use the hook”. converter.register_unstructure_hook_factory( attr.has, - lambda _cls: _unstructure_attrs_and_strip, + lambda _cls: unstructure_attrs_and_strip, ) -def make_simple_id_hook(cls): - def _hook(value: Any, _: type) -> Any: - if isinstance(value, cls): - return value +def register_from_string_hook(converter, cls): + converter.register_structure_hook( + cls, + lambda v, _: cls.from_string(v) if isinstance(v, str) else cls(**v) + ) + +for simple_cls in (Language, ResourceType, Grant, Community): + register_from_string_hook(converter, simple_cls) + +def make_list_hook(inner_cls): + def _hook(value: Any, _: Any): if isinstance(value, str): - return cls(id=value) + return [inner_cls.from_string(value)] if isinstance(value, dict): - return cls(**value) - raise TypeError(f"Cannot structure {value!r} as {cls.__name__}") + return [inner_cls(**value)] + if isinstance(value, list): + return [inner_cls.from_string(v) if isinstance(v, str) else inner_cls(**v) for v in value] + raise TypeError(f"Cannot structure {value!r} as list[{inner_cls.__name__}]") return _hook -for simple_cls in (ResourceType, Language, Role, Rights): - converter.register_structure_hook(simple_cls, make_simple_id_hook(simple_cls)) +def make_list_pred(inner_cls): + return lambda tp: get_origin(tp) is list and get_args(tp) == (inner_cls,) + +for simple_cls in (Language, Grant, Community): + converter.register_structure_hook_func( + make_list_pred(simple_cls), + make_list_hook(simple_cls), + ) diff --git a/rdmo_zenodo/exports/metadata/extractors.py b/rdmo_zenodo/exports/metadata/extractors.py index 2504d7f..84205d8 100644 --- a/rdmo_zenodo/exports/metadata/extractors.py +++ b/rdmo_zenodo/exports/metadata/extractors.py @@ -26,7 +26,7 @@ def get_title_from_project(context: MetadataContext) -> str: def get_title_from_dataset(context: MetadataContext) -> str: if context.set_index is None: - return "Dataset" + raise ValueError("Cannot extract title from dataset without set_index") title = context.get_text("project/dataset/title", set_index=context.set_index) if title: return title @@ -82,11 +82,12 @@ def get_publication_type_from_settings() -> str | None: return settings.ZENODO_PROVIDER.get("publication_type", "datamanagementplan") return None -def get_resource_type_from_settings_and_context(context) -> dict[str, str]: - default = "publication-datamanagementplan" +def get_resource_type_from_settings_and_context(context) -> str: + if resource_type := settings.ZENODO_PROVIDER.get("resource_type"): + return resource_type if context.set_index is not None: - default = "dataset" - return settings.ZENODO_PROVIDER.get("resource_type", default) + return "dataset" + return "publication-datamanagementplan" def get_language_from_settings() -> str | None: if language := settings.ZENODO_PROVIDER.get("language"): @@ -132,26 +133,21 @@ def get_zenodo_creator_from_user(user): def get_creators_from_context(context: MetadataContext) -> list[dict[str, Any]]: creators = [] - if context.zenodo_backend_type == "zenodo": - get_creator = get_invenio_creator_from_user - elif context.zenodo_backend_type == "invenio": - get_creator = get_invenio_creator_from_user - else: - raise ValueError(f"Unsupported backend type: {context.zenodo_backend_type}") + get_creator = get_invenio_creator_from_user for user in context.project_members: creators.append(get_creator(user)) return creators # === licenses, subjects, keywords === -def get_license_id_from_context(context: MetadataContext) -> list[dict[str, str]]: +def get_license_id_from_context(context: MetadataContext) -> list[dict[str, str]] | list[dict[str, dict[str,str]]]: set_index = context.set_index if context.set_index is not None else 0 values = context.get_values("project/dataset/sharing/conditions", set_index=set_index) for v in values: if v.option and (license_id := RIGHTS_URI_OPTIONS.get(v.option.uri_path)): return [{"id": license_id}] if v.option.additional_input == "text" and v.text: - return [{"id": v.text}] + return [{"title": {"en": v.text}}] return [] def get_keywords_from_context(context: MetadataContext) -> list[str]: diff --git a/rdmo_zenodo/exports/metadata/invenio.py b/rdmo_zenodo/exports/metadata/invenio.py index b60eadb..99e14da 100644 --- a/rdmo_zenodo/exports/metadata/invenio.py +++ b/rdmo_zenodo/exports/metadata/invenio.py @@ -1,4 +1,6 @@ -# invenio_v6_models_strict.py +# https://inveniordm.docs.cern.ch/reference/metadata/#metadata +# https://github.com/inveniosoftware/invenio-rdm-records/tree/master/invenio_rdm_records/records/jsonschemas/records + from __future__ import annotations from typing import Any, Literal @@ -7,29 +9,43 @@ from rdmo_zenodo.exports.metadata.utils import is_edtf_l0_date, is_iso_date -# ------------------------- core CV wrappers ------------------------- # +ISO639_1 = str +ISO639_3 = str # ISO-639-3 +IdentifierSchemes = Literal[ + 'ark', 'arxiv', 'ads', 'bibcode', 'crossreffunderid', 'doi', 'ean13', + 'eissn', 'grid', 'handle', 'igsn', 'isbn', 'issn', 'istc', 'lissn', + 'lsid', 'pmid', 'purl', 'upc', 'url', 'urn', 'w3id', 'other' +] + @attrs.define class ResourceType: - """metadata.resource_type: requires {'id': ''}.""" id: str + @classmethod + def from_string(cls, value: str) -> ResourceType: + return cls(id=value) + def __attrs_post_init__(self) -> None: if not self.id: raise ValueError("resource_type.id must be a non-empty string") @attrs.define -class Role: - """Controlled vocabulary wrapper for roles (creators/contributors, dates, etc.).""" - id: str - title: dict[str, str] | None = None # service may add i18n labels +class Creator: + person_or_org: PersonalPersonOrOrg | OrganizationalPersonOrOrg + role: Role | None = None # optional for creators + affiliations: list[Affiliation] = attrs.field(factory=list) -# ------------------------- identifiers & affiliations ------------------------- # + def __attrs_post_init__(self) -> None: + if ( + isinstance(self.person_or_org, OrganizationalPersonOrOrg) + and self.affiliations + ): + raise ValueError("affiliations are only allowed for personal creators") @attrs.define -class GenericIdentifier: - scheme: str # e.g. 'orcid', 'isni', 'ror', 'doi', ... - identifier: str +class Role: + id: str @attrs.define class Affiliation: @@ -41,68 +57,84 @@ def __attrs_post_init__(self) -> None: if not (self.id or self.name): raise ValueError("affiliation requires either 'id' or 'name'") -# ------------------------- person_or_org & party entries ------------------------- # - -PersonOrOrgType = Literal["personal", "organizational"] @attrs.define -class PersonOrOrg: - type: PersonOrOrgType - given_name: str | None = None - family_name: str | None = None - name: str | None = None - identifiers: list[GenericIdentifier] = attrs.field(factory=list) +class PersonalPersonOrOrg: + given_name: str + family_name: str + type: Literal["personal"] = "personal" + identifiers: list[CreatorIdentifier] = attrs.field(factory=list) def __attrs_post_init__(self) -> None: - if self.type == "personal": - if not (self.given_name and self.family_name): - raise ValueError("personal person_or_org requires given_name and family_name") - if self.name is not None: - raise ValueError("personal person_or_org must not set 'name'") - else: # organizational - if not self.name: - raise ValueError("organizational person_or_org requires 'name'") - if self.given_name or self.family_name: - raise ValueError("organizational person_or_org must not set given/family names") + if not (self.given_name and self.family_name): + raise ValueError("personal requires given_name and family_name") @attrs.define -class Creator: - person_or_org: PersonOrOrg - role: Role | None = None # optional for creators - affiliations: list[Affiliation] = attrs.field(factory=list) +class OrganizationalPersonOrOrg: + name: str + type: Literal["organizational"] = "organizational" + identifiers: list[AffiliationIdentifier] = attrs.field(factory=list) def __attrs_post_init__(self) -> None: - # affiliations only if personal (per docs) - if self.person_or_org.type == "organizational" and self.affiliations: - raise ValueError("affiliations are only allowed for personal creators") + if not self.name: + raise ValueError("organizational requires 'name'") @attrs.define -class Contributor: - person_or_org: PersonOrOrg - role: Role # required for contributors - affiliations: list[Affiliation] = attrs.field(factory=list) +class CreatorIdentifier: + scheme: Literal['orcid', 'gnd', 'isni', 'ror'] + identifier: str - def __attrs_post_init__(self) -> None: - if self.person_or_org.type == "organizational" and self.affiliations: - raise ValueError("affiliations are only allowed for personal contributors") +@attrs.define +class AffiliationIdentifier: + scheme: Literal['isni', 'ror'] + identifier: str + +@attrs.define +class AdditionTitles: + title: str + type: AdditionTitleType + lang: AdditionalLang | None = None -# ------------------------- rights, languages, subjects, dates ------------------------- # +@attrs.define +class AdditionalLang: + id: ISO639_3 + +@attrs.define +class AdditionTitleType: + id: Literal["alternative-title", "subtitle", "translated-title", "other"] + title: dict[ISO639_1, str] + +@attrs.define +class AdditionalDescriptions: + description: str # free-text + type: AdditionTitleType + lang: AdditionalLang | None = None + +@attrs.define +class AdditionDescriptionType: + id: Literal["abstract", "methods", "series-information", "table-of-contents", "technical-info", "other"] + title: dict[ISO639_1, str] @attrs.define class Rights: - # Either `id` (CV) or a free-text `title` (localized), but not both. - id: str | None = None - title: dict[str, str] | None = None - description: dict[str, str] | None = None + id: str | None = None # CV + title: dict[ISO639_1, str] | None = None # Localized human readable title + description: dict[ISO639_1, str] | None = None # Localized license description text link: str | None = None def __attrs_post_init__(self) -> None: - if bool(self.id) == bool(self.title): + if bool(self.id) and bool(self.title): raise ValueError("rights: either 'id' or 'title' must be set, but not both") @attrs.define -class Language: - id: str # ISO-639-3 code, e.g. 'eng', 'dan' +class Contributor: + person_or_org: PersonalPersonOrOrg | OrganizationalPersonOrOrg + role: Role # required for contributors + affiliations: list[Affiliation] = attrs.field(factory=list) + + def __attrs_post_init__(self) -> None: + if self.person_or_org.type == "organizational" and self.affiliations: + raise ValueError("affiliations are only allowed for personal contributors") @attrs.define class Subject: @@ -110,16 +142,32 @@ class Subject: subject: str | None = None # free keyword def __attrs_post_init__(self) -> None: - if bool(self.id) == bool(self.subject): + if self.id and self.subject: raise ValueError("subject: set exactly one of 'id' or 'subject'") @attrs.define -class DateEntry: +class Language: + id: ISO639_3 # ISO-639-3 code, e.g. 'eng', 'dan' + + @classmethod + def from_string(cls, value: str) -> Language: + if len(value) != 3: + raise ValueError("language must be 3 characters long (ISO639-3)") + return cls(id=value) + +@attrs.define +class Date: date: str = attrs.field(converter=is_edtf_l0_date) - type: Role + type: DateRole description: str | None = None -# ------------------------- alternate/related identifiers ------------------------- # +@attrs.define +class DateRole: + id: Literal[ + 'accepted', 'available', 'collected', 'copyrighted', 'created', 'issued', + 'other', 'submitted', 'updated', 'valid', 'withdrawn' + ] + title: dict[ISO639_1, str] = None # only id needed on the REST API @attrs.define class AlternateIdentifier: @@ -129,67 +177,77 @@ class AlternateIdentifier: @attrs.define class RelatedIdentifier: identifier: str - scheme: str # CV scheme - relation_type: Role # {'id': ''} - resource_type: ResourceType | None = None # optional + scheme: IdentifierSchemes + relation_type: RelationType + resource_type: RelatedIdentifierResourceType | None = None -# ------------------------- funding (kept minimal; docs leave room for variations) ------------------------- # @attrs.define -class Funder: - id: str | None = None # commonly {'id': ''} or {'number': '...', 'title': {...}} - name: str | None = None # commonly {'id': ''} +class RelationType: + id: str # CV + title: dict[ISO639_1, str] | None = None @attrs.define -class Award: - id: str | None = None - title: str | None = None - number: str | None = None - identifiers: list[GenericIdentifier] | None = None - +class RelatedIdentifierResourceType: + id: str + title: dict[ISO639_1, str] @attrs.define class FundingRef: - funder: Funder # commonly {'id': ''} - award: dict[str, Any] | None = None # commonly {'id': ''} or {'number': '...', 'title': {...}} + funder: Funder + award: Award | None = None + +@attrs.define +class Funder: + id: str | None = None # from CV + name: str | None = None # free-text -# ------------------------- access ------------------------- # + def __attrs_post_init__(self): + if self.id and self.name: + raise ValueError("funder: one of 'id' or 'name' must be set") @attrs.define -class AccessEmbargo: - active: bool - until: str | None = attrs.field(default=None, converter=lambda v: is_iso_date(v) if v else None) - reason: str | None = None +class Award: + id: str | None = None + title: dict[ISO639_1,str] | None = None + number: str | None = None + identifiers: list[AwardIdentifier] | None = None - def __attrs_post_init__(self) -> None: - if self.active and not self.until: - raise ValueError("embargo.until (YYYY-MM-DD) is required when embargo.active is true") + def __attrs_post_init__(self): + has_id = bool(self.id) + has_fallback = bool(self.title and self.number) + if not (has_id or has_fallback): + raise ValueError("award: one of 'id' or ('title' and 'number') must be set") @attrs.define -class AccessBlock: - record: Literal["public", "restricted"] - files: Literal["public", "restricted"] - embargo: AccessEmbargo | None = None +class AwardIdentifier: + scheme: IdentifierSchemes + identifier: str -# ------------------------- metadata & top-level payload ------------------------- # +@attrs.define +class References: + reference: str + scheme: IdentifierSchemes | None = None + identifier: str | None = None @attrs.define -class MetadataV6: +class InvenioMetadataV6: resource_type: ResourceType title: str publication_date: str = attrs.field(converter=is_edtf_l0_date) creators: list[Creator] = attrs.field(factory=list) - description: str | None = None - additional_descriptions: list[dict[str, Any]] = attrs.field(factory=list) - additional_titles: list[dict[str, Any]] = attrs.field(factory=list) + additional_titles: list[AdditionTitles] = attrs.field(factory=list) + description: str | None = None # may use certain HTML tags + additional_descriptions: list[AdditionalDescriptions] = attrs.field(factory=list) rights: list[Rights] = attrs.field(factory=list) + copyright: str | None = None # free-text contributors: list[Contributor] = attrs.field(factory=list) subjects: list[Subject] = attrs.field(factory=list) languages: list[Language] = attrs.field(factory=list) - dates: list[DateEntry] = attrs.field(factory=list) + dates: list[Date] = attrs.field(factory=list) - version: str | None = None + version: str | None = None # eg. semantic versioning publisher: str | None = None alternate_identifiers: list[AlternateIdentifier] = attrs.field(factory=list) @@ -211,11 +269,44 @@ def __attrs_post_init__(self) -> None: raise ValueError("metadata.resource_type.id is required") @attrs.define -class RecordV6Payload: - metadata: MetadataV6 - access: AccessBlock - files: dict[str, Any] | None = None - pids: dict[str, Any] | None = None +class Access: + record: Literal["public", "restricted"] = "public" + files: Literal["public", "restricted"] = "public" + embargo: AccessEmbargo | None = None + +@attrs.define +class AccessEmbargo: + active: bool + until: str | None = attrs.field(default=None, converter=lambda v: is_iso_date(v) if v else None) + reason: str | None = None + + def __attrs_post_init__(self) -> None: + if self.active and not self.until: + raise ValueError("embargo.until (YYYY-MM-DD) is required when embargo.active is true") + +@attrs.define +class FilesOptions: + enabled: bool # should (and can) files be attached to this record or not. + default_preview: str | None = None + order: list[str] = attrs.field(factory=list) + +@attrs.define +class ExternalPID: + doi: DOI | None = None + +@attrs.define +class DOI: + identifier: str + provider: str + client: str | None = None + +@attrs.define +class InvenioRecordV6Payload: + metadata: InvenioMetadataV6 + access: Access = attrs.field(factory=Access) + files: FilesOptions | None = None + pids: ExternalPID | None = None + custom_fields: dict | None = None def to_dict(self) -> dict[str, Any]: d = attrs.asdict(self, recurse=True) diff --git a/rdmo_zenodo/exports/metadata/mapper_invenio.py b/rdmo_zenodo/exports/metadata/mapper_invenio.py deleted file mode 100644 index c6aa9f5..0000000 --- a/rdmo_zenodo/exports/metadata/mapper_invenio.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -from typing import Any, Callable - -from rdmo_zenodo.exports.metadata.extractors import ( - get_creators_from_context, - get_description_from_project, - get_funding_from_settings, - get_language_from_settings, - get_license_id_from_context, - get_publication_date_from_today, - get_publisher_from_settings, - get_resource_type_from_settings_and_context, - get_subjects_from_keywords_and_context, - get_title_from_context, -) - -FieldGetter = Callable[[Any], Any] - -INVENIO_FIELD_MAPPER: dict[str, FieldGetter] = { - # required core fields - "resource_type": get_resource_type_from_settings_and_context, - "title": get_title_from_context, - "publication_date": get_publication_date_from_today, - "description": get_description_from_project, - "creators": get_creators_from_context, - - # optional metadata - "funding": get_funding_from_settings, - "subjects": get_subjects_from_keywords_and_context, - "languages": get_language_from_settings, - "rights": get_license_id_from_context, - "publisher": get_publisher_from_settings, -} - diff --git a/rdmo_zenodo/exports/metadata/mapper_zenodo.py b/rdmo_zenodo/exports/metadata/mappers.py similarity index 63% rename from rdmo_zenodo/exports/metadata/mapper_zenodo.py rename to rdmo_zenodo/exports/metadata/mappers.py index 8edf769..4eccf26 100644 --- a/rdmo_zenodo/exports/metadata/mapper_zenodo.py +++ b/rdmo_zenodo/exports/metadata/mappers.py @@ -1,6 +1,3 @@ -# rdmo_zenodo/exports/metadata/mapper_zenodo.py -from __future__ import annotations - from typing import Any, Callable from rdmo_zenodo.exports.metadata.extractors import ( @@ -14,6 +11,7 @@ get_publication_type_from_settings, get_publisher_from_settings, get_resource_type_from_settings_and_context, + get_subjects_from_keywords_and_context, get_title_from_context, get_upload_type_from_settings, ) @@ -21,19 +19,34 @@ FieldGetter = Callable[[Any], Any] ZENODO_FIELD_MAPPER: dict[str, FieldGetter] = { - # core required metadata + # required fields "upload_type": get_upload_type_from_settings, - "resource_type": get_resource_type_from_settings_and_context, "publication_type": get_publication_type_from_settings, - "publication_date": get_publication_date_from_today, + "resource_type": get_resource_type_from_settings_and_context, "title": get_title_from_context, - "description": get_description_from_project, + "publication_date": get_publication_date_from_today, "creators": get_creators_from_context, + "description": get_description_from_project, - # optional fields and extras + # optional metadata fields "funding": get_funding_from_settings, "keywords": get_keywords_from_context, "languages": get_language_from_settings, "license": get_license_id_from_context, "publisher": get_publisher_from_settings, } +INVENIO_FIELD_MAPPER: dict[str, FieldGetter] = { + # required fields + "resource_type": get_resource_type_from_settings_and_context, + "title": get_title_from_context, + "publication_date": get_publication_date_from_today, + "creators": get_creators_from_context, + "description": get_description_from_project, + + # optional metadata fields + "funding": get_funding_from_settings, + "subjects": get_subjects_from_keywords_and_context, + "languages": get_language_from_settings, + "rights": get_license_id_from_context, + "publisher": get_publisher_from_settings, +} diff --git a/rdmo_zenodo/exports/metadata/zenodo.py b/rdmo_zenodo/exports/metadata/zenodo.py index 3219a88..2b1be99 100644 --- a/rdmo_zenodo/exports/metadata/zenodo.py +++ b/rdmo_zenodo/exports/metadata/zenodo.py @@ -1,5 +1,6 @@ -# References, https://zenodraft.github.io/metadata-schema-zenodo/latest/schema.json +# https://zenodraft.github.io/metadata-schema-zenodo/latest/schema.json # https://developers.zenodo.org/#depositions + from __future__ import annotations from typing import Any, Literal @@ -64,10 +65,18 @@ class Identifier: class Grant: id: str # e.g. "10.13039/501100000780::101122483" + @classmethod + def from_string(cls, value: str) -> Grant: + return cls(id=value) + @attrs.define class Community: identifier: str # e.g. "zenodo-community-id" + @classmethod + def from_string(cls, value: str) -> Community: + return cls(identifier=value) + @attrs.define class RelatedIdentifier: identifier: str From cd99917c127f9dd77583d3f38ce3bb64a2d0c9bc Mon Sep 17 00:00:00 2001 From: David Wallace Date: Mon, 13 Oct 2025 14:18:56 +0200 Subject: [PATCH 41/41] handle auth error and refactor get_metadata --- rdmo_zenodo/exports/base.py | 29 +++++++++++--- rdmo_zenodo/exports/export.py | 19 ++++----- rdmo_zenodo/exports/publish.py | 72 ++++++++++++++++++++++++---------- 3 files changed, 85 insertions(+), 35 deletions(-) diff --git a/rdmo_zenodo/exports/base.py b/rdmo_zenodo/exports/base.py index bb33500..380328d 100644 --- a/rdmo_zenodo/exports/base.py +++ b/rdmo_zenodo/exports/base.py @@ -6,7 +6,7 @@ from rdmo.projects.exports import Export from rdmo.services.providers import OauthProviderMixin -from rdmo_zenodo.exports.metadata.builder import build_payload +from rdmo_zenodo.exports.metadata.builder import METADATA_METHODS, extract_metadata, serialize_payload, validate_schema from rdmo_zenodo.exports.metadata.context import MetadataContext logger = logging.getLogger(__name__) @@ -57,8 +57,7 @@ def authorization_header(self): @property def authorization_scope(self): - scope = settings.ZENODO_PROVIDER.get('zenodo_auth_scope') - if scope: + if scope := settings.ZENODO_PROVIDER.get('zenodo_auth_scope'): return scope if self.zenodo_backend_type == 'zenodo': return 'deposit:write' @@ -118,8 +117,16 @@ def get_callback_data(self, request): 'code': request.GET.get('code') } - def get_metadata(self, set_index=None): - context = MetadataContext( + def post_with_retry(self, request, url, data): + response = self.post(request, url, data) + # Hacky way: in case of OAuth error (from e.g. 403), pop access_token and re-try + if 'OAuth' in response.content.decode(): + self.pop_from_session(request, 'access_token') + response = self.post(request, url, data) + return response + + def get_metadata_context(self, set_index=None): + return MetadataContext( project=self.project, snapshot=self.snapshot, set_index=set_index, @@ -127,4 +134,14 @@ def get_metadata(self, set_index=None): get_text=self.get_text, zenodo_backend_type=self.zenodo_backend_type, ) - return build_payload(context, self.zenodo_backend_type) + + def get_metadata(self, set_index=None): + + context = self.get_metadata_context(set_index=set_index) + + mapper, schema, payload_cls = METADATA_METHODS[self.zenodo_backend_type] + metadata_dict = extract_metadata(context, mapper) + metadata_obj = validate_schema(metadata_dict, schema) + + payload_obj = payload_cls(metadata=metadata_obj) + return serialize_payload(payload_obj) diff --git a/rdmo_zenodo/exports/export.py b/rdmo_zenodo/exports/export.py index acb6814..846489c 100644 --- a/rdmo_zenodo/exports/export.py +++ b/rdmo_zenodo/exports/export.py @@ -40,19 +40,20 @@ def submit(self): payload = self.get_metadata(set_index=form.cleaned_data['dataset']) except MetadataBuildError as e: form.add_error(None, str(e)) - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=400) - return self.post(self.request, url, payload) + return render( + self.request, 'plugins/exports_zenodo.html', {'form': form}, status=400 + ) + return self.post_with_retry(self.request, url, payload) + else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) def post_success(self, request, response): - zenodo_url = response.json().get('links', {}).get('self_html') - if zenodo_url: + if zenodo_url := response.json().get('links', {}).get('self_html'): return redirect(zenodo_url) - else: - return render(request, 'core/error.html', { - 'title': _('ZENODO error'), - 'errors': [_('The URL of the new dataset could not be retrieved.')] - }, status=200) + return render(request, 'core/error.html', { + 'title': _('ZENODO error'), + 'errors': [_('The URL of the new dataset could not be retrieved.')] + }, status=200) diff --git a/rdmo_zenodo/exports/publish.py b/rdmo_zenodo/exports/publish.py index 1e7402f..ef7c579 100644 --- a/rdmo_zenodo/exports/publish.py +++ b/rdmo_zenodo/exports/publish.py @@ -1,5 +1,6 @@ import logging +from django.http import HttpResponseBadRequest from django.shortcuts import redirect, render from django.utils.formats import localize from django.utils.text import slugify @@ -99,26 +100,24 @@ def submit(self): self.store_in_session(self.request, 'view_id', self.view.id) self.store_in_session(self.request, 'export_format', self.export_format) - record_versions_url = self.validate_record_id_from_project_value_at_zenodo() - # TODO, currently the authentication can get stuck when trying out the dataset export - # first and this one afterwards, a 403 needs to be handled in the Export class. - if record_versions_url: + if record_versions_url := self.validate_record_id_from_project_value_at_zenodo(): # if record exists then post new version to zenodo, no data required - return self.post(self.request, record_versions_url, {}) + # a 403 post_with_retry handled in retry. + return self.post_with_retry(self.request, record_versions_url, {}) else: # else create new draft record try: payload = self.get_metadata() except MetadataBuildError as e: form.add_error(None, str(e)) - return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=400) - return self.post(self.request, self.records_url, payload) + return render( + self.request, 'plugins/exports_zenodo.html', {'form': form}, status=400 + ) + return self.post_with_retry(self.request, self.records_url, payload) else: return render(self.request, 'plugins/exports_zenodo.html', {'form': form}, status=200) def validate_record_id_from_project_value_at_zenodo(self): - """Validate the Zenodo record_id stored in the project.""" - # Retrieve record_id from the project's stored values record_id = get_record_id_from_project_value(self.project) @@ -150,10 +149,17 @@ def post_success(self, request, response): # Retrieve project,snapshot,view and export_format from session self.get_from_session_and_set_on_self(request) self.request = request # and set request on self - if 'versions' in response.request.url and 'publication_date' not in response.json().get('metadata',{}): + if not response.json()['is_draft']: # and ... response.json()['status'] == ... # metadata needs to be posted to the new version with a new request and response zenodo_api_url = response.json().get('links', {}).get('self') - data = self.get_metadata() + try: + data = self.get_metadata() + except MetadataBuildError as e: + return render(request, 'core/error.html', { + 'title': _('Metadata error'), + 'errors': [_('Error in the metadata'), str(e)] + }, status=200) + response = requests.put(zenodo_api_url, json=data, headers=self.authorized_json_header) logger.debug("PUT to %s", zenodo_api_url) @@ -164,19 +170,44 @@ def post_success(self, request, response): record_id = payload.get('id') concept_record_id = get_concept_or_parent_id_from_payload(payload) files_url = payload.get('links', {}).get('files') - - self.post_export_file_to_zenodo( + export_response = self.post_export_file_to_zenodo( record_id=record_id, files_url=files_url, ) - self.publish_draft_record(record_id=record_id) + if 500 > export_response.status_code >= 400: + if isinstance(export_response, HttpResponseBadRequest): + if export_response.content.decode().startswith('Render to format failed.'): + message = 'Render to format failed. Try another view or format.' + else: + message = export_response.content.decode() + + return render(request, 'core/error.html', { + 'title': _('Export error'), + 'errors': [_('The project could not be exported.'), message], + }, status=200) + + if export_response.url.startswith(self.zenodo_url): + return render(request, 'core/error.html', { + 'title': _('Export error'), + 'errors': [_('The project could not be uploaded.'), response.json().get('message')], + }, status=200) + + + publish_response = self.publish_draft_record(record_id=record_id) + if 500 > publish_response.status_code >= 400: + return render(request, 'core/error.html', { + 'title': _('Publish error'), + 'errors': [_('The project could not be published.'), + publish_response.json()['message'], + publish_response.json()['errors'], + ], + }, status=200) save_record_id_in_project_value(self.project, concept_record_id) - return redirect(zenodo_url) else: return render(request, 'core/error.html', { 'title': _('ZENODO error'), - 'errors': [_('The URL of the new dataset could not be retrieved.')] + 'errors': [_('The URL of the new publication could not be retrieved.')] }, status=200) def post_export_file_to_zenodo( @@ -191,8 +222,8 @@ def post_export_file_to_zenodo( self.project, self.snapshot, self.export_format, view=self.view ) if rdmo_render_response.status_code != 200: - logger.debug("Render failed: %s", rdmo_render_response.content.decode()) - return None + logger.error("Render failed: %s", rdmo_render_response.content.decode()) + return rdmo_render_response binary = rdmo_render_response.content export_filename = slugify(self.snapshot.title) @@ -203,7 +234,8 @@ def post_export_file_to_zenodo( entries = draft_file_post_response.json().get('entries', []) draft_file_entry = next(filter(lambda i: i["key"] == filename, entries), None) if draft_file_entry is None: - return None + breakpoint() + return draft_file_post_response content_url = draft_file_entry.get('links', {}).get('content') _data_content_response = requests.put(content_url, headers=self.authorized_binary_header, data=binary) @@ -222,5 +254,5 @@ def publish_draft_record(self, record_id=None): return None publish_url = self.record_publish_url(record_id) response = requests.post(publish_url, headers=self.authorization_header) - logger.debug("POST to %s", publish_url) + logger.debug("POST to %s with response ", publish_url, response) return response