castlabs · sannies · Mar 5, 2025 · Mar 5, 2025
diff --git a/pycaption/srt.py b/pycaption/srt.py
@@ -1,11 +1,14 @@
-import re
+import os
 from copy import deepcopy
 
 from .base import (
     BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode,
 )
 from .exceptions import CaptionReadNoCaptions, InvalidInputError
 
+import re
+from PIL import Image, ImageFont, ImageDraw
+
 
 class SRTReader(BaseReader):
     RE_HTML = re.compile(r'<[^>]+>')
@@ -102,20 +105,29 @@ def _find_text_line(self, start_line, lines):
 
 
 class SRTWriter(BaseWriter):
-    def write(self, caption_set):
+    VALID_POSITION = ['top', 'bottom']
+
+    def write(self, caption_set, position='bottom'):
+        position = position.lower().strip()
+        if position not in SRTWriter.VALID_POSITION:
+            raise ValueError('Unknown position. Supported: {}'.format(','.join(SRTWriter.VALID_POSITION)))
+
+        if position == 'top' and not all([self.video_width, self.video_height]):
+            raise ValueError('Top position requires video width and height.')
+
         caption_set = deepcopy(caption_set)
 
         srt_captions = []
 
         for lang in caption_set.get_languages():
             srt_captions.append(
-                self._recreate_lang(caption_set.get_captions(lang))
+                self._recreate_lang(caption_set.get_captions(lang), position)
             )
 
         caption_content = 'MULTI-LANGUAGE SRT\n'.join(srt_captions)
         return caption_content
 
-    def _recreate_lang(self, captions):
+    def _recreate_lang(self, captions, position='bottom'):
         # Merge caption's that are on the exact same timestamp otherwise some
         # players will play them in reversed order, libass specifically which is
         # used quite a lot, including VLC and MPV.
@@ -140,22 +152,47 @@ def _recreate_lang(self, captions):
         srt = ''
         count = 1
 
-        for caption in captions:
-            srt += f'{count}\n'
-
-            start = caption.format_start(msec_separator=',')
-            end = caption.format_end(msec_separator=',')
+        fnt = ImageFont.truetype(os.path.dirname(__file__) + '/NotoSansDisplay-Regular-Note-Math.ttf', 30)
 
-            srt += f'{start[:12]} --> {end[:12]}\n'
+        img = None
+        draw = None
+        if position == 'top':
+            img = Image.new('RGB', (self.video_width, self.video_height), (0, 255, 0))
+            draw = ImageDraw.Draw(img)
 
+        for caption in captions:
+            # Generate the text
             new_content = ''
             for node in caption.nodes:
                 new_content = self._recreate_line(new_content, node)
 
             # Eliminate excessive line breaks
             new_content = new_content.strip()
+            while '\n\n' in new_content:
+                new_content = new_content.replace('\n\n', '\n')
+
+            srt += f'{count}\n'
 
+            start = caption.format_start(msec_separator=',')
+            end = caption.format_end(msec_separator=',')
+            if position == 'bottom':
+                # "bottom" is standard (no position info).
+                # Use the old behavior, output just the timestamp, no coordinates.
+                timestamp = '%s --> %s' % (start[:12], end[:12])
+            elif position == 'top':
+                padding_top = 10
+                l, t, r, b = draw.textbbox((0, 0), new_content, font=fnt)
+                l, t, r, b = draw.textbbox((self.video_width / 2 - r / 2, padding_top), new_content, font=fnt)
+                x1 = str(round(l)).zfill(3)
+                x2 = str(round(r)).zfill(3)
+                y1 = str(round(t)).zfill(3)
+                y2 = str(round(b)).zfill(3)
+                timestamp = '%s --> %s X1:%s X2:%s Y1:%s Y2:%s' % (start[:12], end[:12], x1, x2, y1, y2)
+            else:
+                raise ValueError('Unsupported position: %s' % position)
+            srt += f'{timestamp}\n'
             srt += f"{new_content}\n\n"
+
             count += 1
 
         return srt[:-1]  # remove unwanted newline at end of file

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -81,7 +81,7 @@
     sample_srt_blank_lines, sample_srt_trailing_blanks,
     samples_srt_same_time, sample_srt_empty_cue_output,
     sample_srt_timestamps_without_microseconds,
-    sample_srt_arabic
+    sample_srt_arabic,
 )
 from tests.fixtures.translated_scc import (  # noqa: F401
     sample_translated_scc_custom_brackets, sample_translated_scc_success,
@@ -101,5 +101,6 @@
     sample_webvtt_from_scc_properly_writes_newlines_output,
     sample_webvtt_last_cue_zero_start, sample_webvtt_empty_cue,
     sample_webvtt_multi_lang_en, sample_webvtt_multi_lang_de,
-    sample_webvtt_empty_cue_output, sample_webvtt_timestamps
+    sample_webvtt_empty_cue_output, sample_webvtt_timestamps,
+    sample_srt_top
 )
diff --git a/tests/fixtures/webvtt.py b/tests/fixtures/webvtt.py
@@ -1,5 +1,41 @@
 import pytest
 
+@pytest.fixture(scope="session")
+def sample_srt_top():
+    return """1
+00:00:09,209 --> 00:00:12,312 X1:262 X2:458 Y1:020 Y2:051
+( clock ticking )
+
+2
+00:00:14,848 --> 00:00:17,000 X1:233 X2:487 Y1:021 Y2:125
+MAN: 
+When we think 
+♪ ...say bow, wow, ♪
+
+3
+00:00:17,000 --> 00:00:18,752 X1:162 X2:558 Y1:020 Y2:044
+we have this vision of Einstein
+
+4
+00:00:18,752 --> 00:00:20,887 X1:208 X2:512 Y1:020 Y2:081
+as an old, wrinkly man 
+with white hair.
+
+5
+00:00:20,887 --> 00:00:26,760 X1:190 X2:530 Y1:021 Y2:118
+MAN 2: 
+E equals m c-squared is 
+not about an old Einstein.
+
+6
+00:00:26,760 --> 00:00:32,200 X1:147 X2:573 Y1:021 Y2:081
+MAN 2: 
+It's all about an eternal Einstein.
+
+7
+00:00:32,200 --> 00:00:36,200 X1:187 X2:533 Y1:021 Y2:044
+<LAUGHING & WHOOPS!>
+"""
 
 @pytest.fixture(scope="session")
 def sample_webvtt():

diff --git a/tests/test_srt_conversion.py b/tests/test_srt_conversion.py
@@ -64,3 +64,10 @@ def test_webvtt_to_srt_conversion(self, sample_srt, sample_webvtt):
 
         assert isinstance(results, str)
         self.assert_srt_equals(sample_srt, results)
+
+    def test_webvtt_to_srt_conversion_pos_top(self, sample_srt_top, sample_webvtt):
+        caption_set = WebVTTReader().read(sample_webvtt)
+        results = SRTWriter(video_width=720, video_height=480 ).write(caption_set, position='top')
+
+        assert isinstance(results, str)
+        self.assert_srt_equals(sample_srt_top, results)