From 294b7628d7fc7e86a33978e3b7f585fccb10f2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Wed, 3 Dec 2025 15:50:28 +0100 Subject: [PATCH] subtitles: support srt files and convert to vtt --- cds/modules/deposit/ext.py | 68 +++++++++++++++++-- .../cds_deposit/types/video/uploader.html | 16 ++--- .../cds_deposit/avc/components/cdsUploader.js | 14 ++-- requirements.txt | 1 + 4 files changed, 83 insertions(+), 16 deletions(-) diff --git a/cds/modules/deposit/ext.py b/cds/modules/deposit/ext.py index f4a9d6466..a9b62addb 100644 --- a/cds/modules/deposit/ext.py +++ b/cds/modules/deposit/ext.py @@ -26,14 +26,18 @@ import re import mimetypes +import tempfile +import os +import shutil from invenio_base.signals import app_loaded from invenio_db import db -from invenio_files_rest.models import ObjectVersionTag +from invenio_files_rest.models import ObjectVersion, ObjectVersionTag from invenio_files_rest.signals import file_uploaded from invenio_files_rest.errors import InvalidKeyError from invenio_indexer.signals import before_record_index from invenio_records_files.utils import sorted_files_from_bucket +from srt_to_vtt import srt_to_vtt from ..invenio_deposit.signals import post_action from .indexer import cdsdeposit_indexer_receiver @@ -45,16 +49,66 @@ ) +def _create_vtt_from_srt(srt_obj): + """Create a VTT file from an SRT file. + + :param srt_obj: ObjectVersion of the SRT file + :returns: ObjectVersion of the created VTT file or None + """ + # Generate VTT filename from SRT filename + vtt_key = srt_obj.key.rsplit(".", 1)[0] + ".vtt" + + # Check if VTT file already exists + existing_vtt = ObjectVersion.get(srt_obj.bucket_id, vtt_key) + if existing_vtt: + # If it exists, skip + return existing_vtt + + # Ensure the SRT file has a file instance + if not srt_obj.file or not srt_obj.file.uri: + return None + + srt_path = srt_obj.file.uri + tmp_dir = None + try: + # Create temporary directory for VTT file + tmp_dir = tempfile.mkdtemp() + vtt_path = os.path.join(tmp_dir, vtt_key) + + # Convert using srt-to-vtt library + srt_to_vtt(srt_path, vtt_path) + + # Create VTT ObjectVersion + vtt_obj = ObjectVersion.create( + bucket=srt_obj.bucket, + key=vtt_key, + stream=open(vtt_path, "rb"), + size=os.path.getsize(vtt_path), + ) + _create_tags(vtt_obj) + return vtt_obj + except (OSError, IOError, AttributeError, Exception): + return None + finally: + # Clean up temporary directory + if tmp_dir and os.path.exists(tmp_dir): + try: + shutil.rmtree(tmp_dir) + except OSError: + pass + + def _create_tags(obj): """Create additional tags for file.""" pattern_subtitle = re.compile(r".*_([a-zA-Z]{2})\.vtt$") pattern_poster = re.compile(r"^poster\.(jpg|png)$") - + # Get the media_type and content_type(file ext) file_name = obj.key mimetypes.add_type("subtitle/vtt", ".vtt") + mimetypes.add_type("text/srt", ".srt") guessed_type = mimetypes.guess_type(file_name)[0] - if guessed_type is None: + if guessed_type is None: raise InvalidKeyError(description=f"Unsupported File: {file_name}") media_type = guessed_type.split("/")[0] @@ -73,7 +127,13 @@ def _create_tags(obj): # other tags ObjectVersionTag.create_or_update(obj, "content_type", "vtt") ObjectVersionTag.create_or_update(obj, "context_type", "subtitle") - # poster tag + elif file_ext == "srt": + # Create VTT version from SRT + try: + _create_vtt_from_srt(obj) + except Exception: + pass + # poster tag elif pattern_poster.match(file_name): ObjectVersionTag.create_or_update(obj, "context_type", "poster") diff --git a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html index b1a588580..4a8b6b623 100644 --- a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html +++ b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html @@ -148,8 +148,8 @@
Tips and suggestions
ngf-model-options="{allowInvalid: false}" ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)" ngf-select="" - ngf-pattern="'.vtt'" - ngf-accept="'.vtt'" + ngf-pattern="'.vtt,.srt'" + ngf-accept="'.vtt,.srt'" ngf-validate-fn="$ctrl.validateSubtitles($file)" ngf-max-size="500GB" > @@ -167,8 +167,8 @@
Tips and suggestions
ng-if="!$ctrl.cdsDepositCtrl.isPublished()" ngf-select="" ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)" - ngf-pattern="'.vtt'" - ngf-accept="'text/vtt'" + ngf-pattern="'.vtt,.srt'" + ngf-accept="'text/vtt,.vtt,.srt'" ngf-validate-fn="$ctrl.validateSubtitles($file)" ngf-max-size="500GB" ngf-multiple="true" @@ -183,15 +183,15 @@
Tips and suggestions
ngf-model-options="{allowInvalid: false}" ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)" ngf-select="" - ngf-pattern="'.vtt'" - ngf-accept="'.vtt'" + ngf-pattern="'.vtt,.srt'" + ngf-accept="'.vtt,.srt'" ngf-validate-fn="$ctrl.validateSubtitles($file)" - ngf-max-size="500GB">select .vtt files. + ngf-max-size="500GB">select .vtt or .srt files.
Tips and suggestions

diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js index ce70be33d..97379e694 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js @@ -284,7 +284,12 @@ function cdsUploaderCtrl( // Filter out files without a valid MIME type or with zero size _files = _files.filter((file) => { - if (!file.type || file.type.trim() === "") { + // Allow SRT and VTT files even if they don't have a MIME type + var fileName = file.name.toLowerCase(); + var isSubtitleFile = + fileName.endsWith(".vtt") || fileName.endsWith(".srt"); + + if ((!file.type || file.type.trim() === "") && !isSubtitleFile) { toaster.pop( "warning", "Invalid File Type", @@ -544,13 +549,14 @@ function cdsUploaderCtrl( this.validateSubtitles = function (_file) { // Check if the filename matches the pattern and is a valid ISO language // i.e. jessica_jones-en.vtt - var match = _file.name.match(/(?:.+)[_|-]([a-zA-Z]{2}).vtt/) || []; + var match = _file.name.match(/(?:.+)[_|-]([a-zA-Z]{2})\.(vtt|srt)/) || []; return match.length > 1 && match[1] in isoLanguages; }; this.validateAdditionalFiles = function (_file) { - // If it's a .vtt file, validate as subtitle - if (_file.name.toLowerCase().endsWith(".vtt")) { + // If it's a .vtt or .srt file, validate as subtitle + var fileName = _file.name.toLowerCase(); + if (fileName.endsWith(".vtt") || fileName.endsWith(".srt")) { return this.validateSubtitles(_file); } // Accept other types diff --git a/requirements.txt b/requirements.txt index ecf4153ab..97be8e26f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -184,6 +184,7 @@ simplekv==0.14.1 six==1.17.0 soupsieve==2.6 speaklater==1.3 +srt-to-vtt==1.0.0 SQLAlchemy==1.4.54 SQLAlchemy-Continuum==1.4.1 SQLAlchemy-Utils==0.38.3