From 9d849bebcc20749d456110d469a5ce3d585f88f4 Mon Sep 17 00:00:00 2001 From: Saksham Date: Thu, 18 Dec 2025 10:12:38 +0100 Subject: [PATCH 1/2] fix(redirector): Redirect to download for non-previewable files --- site/cds_rdm/legacy/redirector.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/site/cds_rdm/legacy/redirector.py b/site/cds_rdm/legacy/redirector.py index f60e0d7..a6185e4 100644 --- a/site/cds_rdm/legacy/redirector.py +++ b/site/cds_rdm/legacy/redirector.py @@ -8,6 +8,8 @@ """Redirector functions and rules.""" +from pathlib import Path + from flask import ( Blueprint, abort, @@ -58,9 +60,21 @@ def legacy_files_redirect(legacy_id, filename): record = get_record_by_version(parent_pid.pid_value, version) except PermissionDeniedError: return abort(403) + + file_path = Path(filename) + filename_ext = file_path.suffix[1:].lower() if file_path.suffix else "" + # Directly download files from redirected link to replicate the `allfiles-` behaviour from legacy if filename.startswith("allfiles-"): url_path = record["links"]["archive"] + # If the file is not previewable, redirect to the file download link instead + elif filename_ext != "" and filename_ext not in current_app.config["IIIF_FORMATS"]: + url_path = url_for( + "invenio_app_rdm_records.record_file_download", + pid_value=record["id"], + filename=filename, + **query_params, + ) else: url_path = url_for( "invenio_app_rdm_records.record_file_preview", From 1684558820e6413b9a9b76efc2b8eb2eaca2d41d Mon Sep 17 00:00:00 2001 From: Saksham Date: Thu, 18 Dec 2025 15:01:32 +0100 Subject: [PATCH 2/2] fix(redirector): Find file in older versions if no version provided --- site/cds_rdm/legacy/redirector.py | 31 +++++++++++++++++++------------ site/cds_rdm/legacy/resolver.py | 23 ++++++++++++++--------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/site/cds_rdm/legacy/redirector.py b/site/cds_rdm/legacy/redirector.py index a6185e4..42ec18b 100644 --- a/site/cds_rdm/legacy/redirector.py +++ b/site/cds_rdm/legacy/redirector.py @@ -25,7 +25,11 @@ from sqlalchemy.orm.exc import NoResultFound from .errors import VersionNotFound -from .resolver import get_pid_by_legacy_recid, get_record_by_version +from .resolver import ( + get_pid_by_legacy_recid, + get_record_by_version, + get_record_versions, +) HTTP_MOVED_PERMANENTLY = 301 @@ -58,23 +62,26 @@ def legacy_files_redirect(legacy_id, filename): version = query_params.pop("version", None) try: record = get_record_by_version(parent_pid.pid_value, version) + # Directly download files from redirected link to replicate the `allfiles-` behaviour from legacy + if filename.startswith("allfiles-"): + return redirect(record["links"]["archive"], HTTP_MOVED_PERMANENTLY) + + # If no version is provided, trickle down the versions and find the newest version that contains the file + if version is None: + all_versions = get_record_versions(record["id"]) + for version in sorted(all_versions.keys(), reverse=True): + record_version = all_versions[version] + if filename in record_version["files"]["entries"]: + record = record_version + break except PermissionDeniedError: return abort(403) file_path = Path(filename) filename_ext = file_path.suffix[1:].lower() if file_path.suffix else "" - - # Directly download files from redirected link to replicate the `allfiles-` behaviour from legacy - if filename.startswith("allfiles-"): - url_path = record["links"]["archive"] # If the file is not previewable, redirect to the file download link instead - elif filename_ext != "" and filename_ext not in current_app.config["IIIF_FORMATS"]: - url_path = url_for( - "invenio_app_rdm_records.record_file_download", - pid_value=record["id"], - filename=filename, - **query_params, - ) + if filename_ext != "" and filename_ext not in current_app.config["IIIF_FORMATS"]: + url_path = record["files"]["entries"][filename]["links"]["content"] else: url_path = url_for( "invenio_app_rdm_records.record_file_preview", diff --git a/site/cds_rdm/legacy/resolver.py b/site/cds_rdm/legacy/resolver.py index 4413312..90b96bf 100644 --- a/site/cds_rdm/legacy/resolver.py +++ b/site/cds_rdm/legacy/resolver.py @@ -33,6 +33,17 @@ def get_pid_by_legacy_recid(legacy_recid): return parent_pid +def get_record_versions(record_id): + """Get all versions of a record.""" + # Use the version number to get the desired record pid value + search_result = current_rdm_records_service.scan_versions( + identity=g.identity, + id_=record_id, + ) + record_versions = {str(hit["versions"]["index"]): hit for hit in search_result} + return record_versions + + def get_record_by_version(parent_pid_value, version): """Get record by parent pid value and version.""" latest_record = current_rdm_records_service.read_latest( @@ -40,14 +51,8 @@ def get_record_by_version(parent_pid_value, version): ) if not version or version == "all" or latest_record["versions"]["index"] == version: return latest_record - - # Use the version number to get the desired record pid value - hits = current_rdm_records_service.search_versions( - identity=g.identity, - id_=latest_record["id"], - extra_filter=dsl.Q("term", **{"versions.index": version}), - ).to_dict()["hits"]["hits"] - if not hits: + record_versions = get_record_versions(latest_record["id"]) + if version not in record_versions.keys(): # If record is not found, that means the version doesn't exist raise VersionNotFound(version=version, latest_record=latest_record) - return hits[0] + return record_versions[version]