From b2f43285870fb12af167f0a02c91b93948c04687 Mon Sep 17 00:00:00 2001 From: Sebastian Annies Date: Wed, 5 Mar 2025 19:07:06 +0200 Subject: [PATCH] reinstate extended SRT --- pycaption/srt.py | 57 +++++++++++++++++++++++++++++------- tests/conftest.py | 5 ++-- tests/fixtures/webvtt.py | 36 +++++++++++++++++++++++ tests/test_srt_conversion.py | 7 +++++ 4 files changed, 93 insertions(+), 12 deletions(-) diff --git a/pycaption/srt.py b/pycaption/srt.py index f32224b3..359978f2 100644 --- a/pycaption/srt.py +++ b/pycaption/srt.py @@ -1,4 +1,4 @@ -import re +import os from copy import deepcopy from .base import ( @@ -6,6 +6,9 @@ ) from .exceptions import CaptionReadNoCaptions, InvalidInputError +import re +from PIL import Image, ImageFont, ImageDraw + class SRTReader(BaseReader): RE_HTML = re.compile(r'<[^>]+>') @@ -102,20 +105,29 @@ def _find_text_line(self, start_line, lines): class SRTWriter(BaseWriter): - def write(self, caption_set): + VALID_POSITION = ['top', 'bottom'] + + def write(self, caption_set, position='bottom'): + position = position.lower().strip() + if position not in SRTWriter.VALID_POSITION: + raise ValueError('Unknown position. Supported: {}'.format(','.join(SRTWriter.VALID_POSITION))) + + if position == 'top' and not all([self.video_width, self.video_height]): + raise ValueError('Top position requires video width and height.') + caption_set = deepcopy(caption_set) srt_captions = [] for lang in caption_set.get_languages(): srt_captions.append( - self._recreate_lang(caption_set.get_captions(lang)) + self._recreate_lang(caption_set.get_captions(lang), position) ) caption_content = 'MULTI-LANGUAGE SRT\n'.join(srt_captions) return caption_content - def _recreate_lang(self, captions): + def _recreate_lang(self, captions, position='bottom'): # Merge caption's that are on the exact same timestamp otherwise some # players will play them in reversed order, libass specifically which is # used quite a lot, including VLC and MPV. @@ -140,22 +152,47 @@ def _recreate_lang(self, captions): srt = '' count = 1 - for caption in captions: - srt += f'{count}\n' - - start = caption.format_start(msec_separator=',') - end = caption.format_end(msec_separator=',') + fnt = ImageFont.truetype(os.path.dirname(__file__) + '/NotoSansDisplay-Regular-Note-Math.ttf', 30) - srt += f'{start[:12]} --> {end[:12]}\n' + img = None + draw = None + if position == 'top': + img = Image.new('RGB', (self.video_width, self.video_height), (0, 255, 0)) + draw = ImageDraw.Draw(img) + for caption in captions: + # Generate the text new_content = '' for node in caption.nodes: new_content = self._recreate_line(new_content, node) # Eliminate excessive line breaks new_content = new_content.strip() + while '\n\n' in new_content: + new_content = new_content.replace('\n\n', '\n') + + srt += f'{count}\n' + start = caption.format_start(msec_separator=',') + end = caption.format_end(msec_separator=',') + if position == 'bottom': + # "bottom" is standard (no position info). + # Use the old behavior, output just the timestamp, no coordinates. + timestamp = '%s --> %s' % (start[:12], end[:12]) + elif position == 'top': + padding_top = 10 + l, t, r, b = draw.textbbox((0, 0), new_content, font=fnt) + l, t, r, b = draw.textbbox((self.video_width / 2 - r / 2, padding_top), new_content, font=fnt) + x1 = str(round(l)).zfill(3) + x2 = str(round(r)).zfill(3) + y1 = str(round(t)).zfill(3) + y2 = str(round(b)).zfill(3) + timestamp = '%s --> %s X1:%s X2:%s Y1:%s Y2:%s' % (start[:12], end[:12], x1, x2, y1, y2) + else: + raise ValueError('Unsupported position: %s' % position) + srt += f'{timestamp}\n' srt += f"{new_content}\n\n" + count += 1 return srt[:-1] # remove unwanted newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 92168b7e..44d477d2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -81,7 +81,7 @@ sample_srt_blank_lines, sample_srt_trailing_blanks, samples_srt_same_time, sample_srt_empty_cue_output, sample_srt_timestamps_without_microseconds, - sample_srt_arabic + sample_srt_arabic, ) from tests.fixtures.translated_scc import ( # noqa: F401 sample_translated_scc_custom_brackets, sample_translated_scc_success, @@ -101,5 +101,6 @@ sample_webvtt_from_scc_properly_writes_newlines_output, sample_webvtt_last_cue_zero_start, sample_webvtt_empty_cue, sample_webvtt_multi_lang_en, sample_webvtt_multi_lang_de, - sample_webvtt_empty_cue_output, sample_webvtt_timestamps + sample_webvtt_empty_cue_output, sample_webvtt_timestamps, + sample_srt_top ) diff --git a/tests/fixtures/webvtt.py b/tests/fixtures/webvtt.py index c8ce9ea8..f49ce242 100644 --- a/tests/fixtures/webvtt.py +++ b/tests/fixtures/webvtt.py @@ -1,5 +1,41 @@ import pytest +@pytest.fixture(scope="session") +def sample_srt_top(): + return """1 +00:00:09,209 --> 00:00:12,312 X1:262 X2:458 Y1:020 Y2:051 +( clock ticking ) + +2 +00:00:14,848 --> 00:00:17,000 X1:233 X2:487 Y1:021 Y2:125 +MAN: +When we think +♪ ...say bow, wow, ♪ + +3 +00:00:17,000 --> 00:00:18,752 X1:162 X2:558 Y1:020 Y2:044 +we have this vision of Einstein + +4 +00:00:18,752 --> 00:00:20,887 X1:208 X2:512 Y1:020 Y2:081 +as an old, wrinkly man +with white hair. + +5 +00:00:20,887 --> 00:00:26,760 X1:190 X2:530 Y1:021 Y2:118 +MAN 2: +E equals m c-squared is +not about an old Einstein. + +6 +00:00:26,760 --> 00:00:32,200 X1:147 X2:573 Y1:021 Y2:081 +MAN 2: +It's all about an eternal Einstein. + +7 +00:00:32,200 --> 00:00:36,200 X1:187 X2:533 Y1:021 Y2:044 + +""" @pytest.fixture(scope="session") def sample_webvtt(): diff --git a/tests/test_srt_conversion.py b/tests/test_srt_conversion.py index e95d4b84..ef495cfe 100644 --- a/tests/test_srt_conversion.py +++ b/tests/test_srt_conversion.py @@ -64,3 +64,10 @@ def test_webvtt_to_srt_conversion(self, sample_srt, sample_webvtt): assert isinstance(results, str) self.assert_srt_equals(sample_srt, results) + + def test_webvtt_to_srt_conversion_pos_top(self, sample_srt_top, sample_webvtt): + caption_set = WebVTTReader().read(sample_webvtt) + results = SRTWriter(video_width=720, video_height=480 ).write(caption_set, position='top') + + assert isinstance(results, str) + self.assert_srt_equals(sample_srt_top, results)