Support Filtergraph output format #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

sannies merged 15 commits into main from support-ass

Jan 12, 2026

pycaption/__init__.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -7,6 +7,7 @@ @@
     from .microdvd import MicroDVDReader, MicroDVDWriter
     from .sami import SAMIReader, SAMIWriter
     from .scenarist import ScenaristDVDWriter
+    from .filtergraph import FiltergraphWriter
     from .srt import SRTReader, SRTWriter
     from .scc import SCCReader, SCCWriter
     from .scc.translator import translate_scc
@@ Expand All / @@ -22,8 +23,8 @@ @@
         'MicroDVDWriter', 'SAMIReader', 'SAMIWriter', 'SRTReader', 'SRTWriter',
         'SCCReader', 'SCCWriter', 'translate_scc', 'WebVTTReader', 'WebVTTWriter',
         'CaptionReadError', 'CaptionReadNoCaptions', 'CaptionReadSyntaxError',
-        'detect_format', 'CaptionNode', 'Caption', 'CaptionList', 'CaptionSet', 'ScenaristDVDWriter'
-        'TranscriptWriter'
+        'detect_format', 'CaptionNode', 'Caption', 'CaptionList', 'CaptionSet',
+        'ScenaristDVDWriter', 'FiltergraphWriter', 'TranscriptWriter'
     ]
     SUPPORTED_READERS = (
@@ Expand Down @@

pycaption/filtergraph.py

-Original file line number
+Diff line change
@@ -0,0 +1,130 @@
+    import tempfile
+    import zipfile
+    from io import BytesIO
+    from pycaption.base import CaptionSet
+    from pycaption.subtitler_image_based import SubtitleImageBasedWriter
+    class FiltergraphWriter(SubtitleImageBasedWriter):
+        """
+        FFmpeg filtergraph writer for image-based subtitles.
+        Generates PNG subtitle images and an FFmpeg filtergraph that can be used
+        to create a transparent WebM video with subtitle overlays.
+        By default, generates Full HD (1920x1080) images. The filtergraph uses
+        the overlay filter with timing to display each subtitle at the correct time.
+        Uses PNG format for images with 4-color indexed palette for optimal
+        compression (~6 KB per Full HD image).
+        """
+        def __init__(self, relativize=True, video_width=1920, video_height=1080,
+                     fit_to_screen=True, frame_rate=25, output_dir=None):
+            """
+            Initialize the filtergraph writer.
+            :param relativize: Convert absolute positioning to percentages
+            :param video_width: Width of generated subtitle images (default: 1920 for Full HD)
+            :param video_height: Height of generated subtitle images (default: 1080 for Full HD)
+            :param fit_to_screen: Ensure captions fit within screen bounds
+            :param frame_rate: Frame rate for timing calculations
+            """
+            if output_dir is None:
+                self.output_dir = 'embedded_subs'
+            else:
+                self.output_dir = output_dir
+            super().__init__(relativize, video_width, video_height, fit_to_screen, frame_rate)
+        def save_image(self, tmp_dir, index, img):
+            """Save RGBA image as PNG with transparency."""
+            img.save(
+                tmp_dir + '/subtitle%04d.png' % index,
+                optimize=True,
+                compress_level=9
+            )
+        def format_ts_seconds(self, value):
+            """
+            Format timestamp as seconds with 3 decimal places for FFmpeg.
+            :param value: Time in microseconds
+            :return: Seconds as float string
+            """
+            return f"{value / 1_000_000:.3f}"
+        def write(
+                self,
+                caption_set: CaptionSet,
+                position='bottom',
+                avoid_same_next_start_prev_end=False,
+                align='center'
+        ):
+            """
+            Write captions as PNG images with an FFmpeg filtergraph for creating
+            a transparent WebM video overlay.
+            Returns a ZIP file containing:
+            - PNG subtitle images in the specified image_dir
+            - filtergraph.txt: FFmpeg filter_complex script
+            :param caption_set: CaptionSet containing the captions to write
+            :param position: Position of subtitles ('top', 'bottom', 'source')
+            :param avoid_same_next_start_prev_end: Adjust timing to avoid overlaps
+            :param align: Text alignment ('left', 'center', 'right')
+            :return: ZIP file contents as bytes
+            """
+            lang = caption_set.get_languages().pop()
+            caps = caption_set.get_captions(lang)
+            buf = BytesIO()
+            with tempfile.TemporaryDirectory() as tmpDir:
+                caps_final, overlapping = self.write_images(
+                    caps, lang, tmpDir, position, align, avoid_same_next_start_prev_end
+                )
+                # Calculate total duration (last end time)
+                max_end = max(cap_list[0].end for cap_list in caps_final)
+                duration_seconds = max_end / 1_000_000 + 1  # Add 1 second buffer
+                # Build FFmpeg filtergraph
+                # Start with transparent base
+                filter_parts = []
+                filter_parts.append(
+                    f"color=c=black@0:s={self.video_width}x{self.video_height}:d={duration_seconds:.3f},format=yuva444p[base]"
+                )
+                # Load each image (paths relative to where ffmpeg is run)
+                for i in range(1, len(caps_final) + 1):
+                    filter_parts.append(
+                        f"movie={self.output_dir}/subtitle{i:04d}.png,format=yuva444p[s{i}]"
+                    )
+                # Chain overlays
+                prev_label = "base"
+                for i, cap_list in enumerate(caps_final, 1):
+                    start_sec = self.format_ts_seconds(cap_list[0].start)
+                    end_sec = self.format_ts_seconds(cap_list[0].end)
+                    next_label = f"v{i}" if i < len(caps_final) else "out"
+                    filter_parts.append(
+                        f"[{prev_label}][s{i}]overlay=x=0:y=0:enable='between(t,{start_sec},{end_sec})':format=auto[{next_label}]"
+                    )
+                    prev_label = next_label
+                filtergraph = ";\n".join(filter_parts)
+                # Create ZIP archive
+                with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
+                    # Add images
+                    for i in range(1, len(caps_final) + 1):
+                        img_path = tmpDir + '/subtitle%04d.png' % i
+                        zf.write(img_path, f'{self.output_dir}/subtitle{i:04d}.png')
+                    # Add filtergraph
+                    zf.writestr(f'{self.output_dir}/filtergraph.txt', filtergraph)
+            buf.seek(0)
+            return buf.read()

pycaption/scenarist.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,8 @@ @@
     from datetime import timedelta
     from io import BytesIO
+    from PIL import Image
     from pycaption.base import CaptionSet
     from pycaption.subtitler_image_based import SubtitleImageBasedWriter
@@ Expand Down Expand Up / @@ -73,12 +75,22 @@ class ScenaristDVDWriter(SubtitleImageBasedWriter): @@
         tiff_compression = None
+        # DVD subtitle palette colors
+        paColor = (255, 255, 255)  # letter body (white)
+        e1Color = (190, 190, 190)  # antialiasing color (gray)
+        e2Color = (0, 0, 0)  # border color (black)
+        bgColor = (0, 255, 0)  # background color (green - will be transparent on DVD)
         def __init__(self, relativize=True, video_width=720, video_height=480, fit_to_screen=True, tape_type='NON_DROP',
                      frame_rate=25, compat=False):
             super().__init__(relativize, video_width, video_height, fit_to_screen, frame_rate)
             self.tape_type = tape_type
             self.frame_rate = frame_rate
+            # Create palette image for quantization (4 colors only - smaller output)
+            self.palette_image = Image.new("P", (1, 1))
+            self.palette_image.putpalette([*self.paColor, *self.e1Color, *self.e2Color, *self.bgColor])
             if compat:
                 self.color = '(1 2 3 4)'
                 self.contrast = '(15 15 15 0)'
@@ Expand All @@
                 self.contrast = '(7 7 7 7)'
         def save_image(self, tmp_dir, index, img):
-            img.save(tmp_dir + '/subtitle%04d.tif' % index, compression=self.tiff_compression)
+            """Convert RGBA to paletted image for DVD subtitles."""
+            # Replace transparent pixels with green background
+            background = Image.new('RGB', img.size, self.bgColor)
+            background.paste(img, mask=img.split()[3])  # Use alpha channel as mask
+            # Quantize to 4-color palette
+            img_quant = background.quantize(palette=self.palette_image, dither=0)
+            img_quant.save(tmp_dir + '/subtitle%04d.tif' % index, compression=self.tiff_compression)
         def write(
                 self,
@@ Expand Down @@

pycaption/subtitler_image_based.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -30,22 +30,14 @@ def get_sst_pixel_display_params(video_width, video_height):
  
    class SubtitleImageBasedWriter(BaseWriter):

        VALID_POSITION = ['top', 'bottom', 'source']

        paColor = (255, 255, 255)  # letter body

        e1Color = (190, 190, 190)  # antialiasing color

        e2Color = (0, 0, 0)  # border color

        bgColor = (0, 255, 0)  # background color

        palette_image = Image.new("P", (1, 1))

        palette_image.putpalette([*paColor, *e1Color, *e2Color, *bgColor] + [0, 0, 0] * 252)

        # Default colors for RGBA rendering

        fontColor = (255, 255, 255)  # white text

        borderColor = (0, 0, 0)  # black border

        def __init__(self, relativize=True, video_width=720, video_height=480, fit_to_screen=True, frame_rate=25):

            super().__init__(relativize, video_width, video_height, fit_to_screen)

            self.palette = [self.paColor, self.e1Color, self.e2Color, self.bgColor]

            self.frame_rate = frame_rate

            palette_image = Image.new("P", (1, 1))

            palette_image.putpalette([*self.paColor, *self.e1Color, *self.e2Color, *self.bgColor] + [0, 0, 0] * 252)

            self.font_langs = {

                Language.get('en'): {'fontfile': f"{os.path.dirname(__file__)}/NotoSansDisplay-Regular-Note-Math.ttf"},

                Language.get('ru'): {'fontfile': f"{os.path.dirname(__file__)}/NotoSansDisplay-Regular-Note-Math.ttf"},

    @@ -217,21 +209,21 @@ def write_images(
  
            if missing_glyphs:

                raise ValueError(f'Selected font was missing glyphs: {" ".join(missing_glyphs.keys())}')

            font_size = int(self.video_width * 0.05 * 0.6)  # rough estimate but should work

            min_font_px = 16

            font_size = max(min_font_px, int(self.video_width * 0.05 * 0.6))  # rough estimate but should work

            fnt = ImageFont.truetype(fnt, font_size)

            index = 1

            for i, cap_list in enumerate(caps_final):

                img = Image.new('RGB', (self.video_width, self.video_height), self.bgColor)

                # Create RGBA image with transparent background

                img = Image.new('RGBA', (self.video_width, self.video_height), (0, 0, 0, 0))

                draw = ImageDraw.Draw(img)

                self.printLine(draw, cap_list, fnt, position, align)

                # quantize the image to our palette

                img_quant = img.quantize(palette=self.palette_image, dither=0)

                self.save_image(tmpDir, index, img_quant)

                # Pass RGBA image to subclass - each subclass converts as needed

                self.save_image(tmpDir, index, img)

                index = index + 1

    @@ -285,25 +277,25 @@ def printLine(self, draw: ImageDraw, caption_list: Caption, fnt: ImageFont, posi
  
                    else:

                        raise ValueError('Unknown "position": {}'.format(position))

                borderColor = self.e2Color

                fontColor = self.paColor

                border = (*self.borderColor, 255)  # Add alpha for RGBA

                font = (*self.fontColor, 255)  # Add alpha for RGBA

                for adj in range(2):

                    # move right

                    draw.text((x - adj, y), text, font=fnt, fill=borderColor, align=align)

                    draw.text((x - adj, y), text, font=fnt, fill=border, align=align)

                    # move left

                    draw.text((x + adj, y), text, font=fnt, fill=borderColor, align=align)

                    draw.text((x + adj, y), text, font=fnt, fill=border, align=align)

                    # move up

                    draw.text((x, y + adj), text, font=fnt, fill=borderColor, align=align)

                    draw.text((x, y + adj), text, font=fnt, fill=border, align=align)

                    # move down

                    draw.text((x, y - adj), text, font=fnt, fill=borderColor, align=align)

                    draw.text((x, y - adj), text, font=fnt, fill=border, align=align)

                    # diagnal left up

                    draw.text((x - adj, y + adj), text, font=fnt, fill=borderColor, align=align)

                    draw.text((x - adj, y + adj), text, font=fnt, fill=border, align=align)

                    # diagnal right up

                    draw.text((x + adj, y + adj), text, font=fnt, fill=borderColor, align=align)

                    draw.text((x + adj, y + adj), text, font=fnt, fill=border, align=align)

                    # diagnal left down

                    draw.text((x - adj, y - adj), text, font=fnt, fill=borderColor, align=align)

                    draw.text((x - adj, y - adj), text, font=fnt, fill=border, align=align)

                    # diagnal right down

                    draw.text((x + adj, y - adj), text, font=fnt, fill=borderColor, align=align)

                    draw.text((x + adj, y - adj), text, font=fnt, fill=border, align=align)

                draw.text((x, y), text, font=fnt, fill=fontColor, align=align)

                draw.text((x, y), text, font=fnt, fill=font, align=align)

                lines_written += 1

pycaption/ttml_background.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -45,15 +45,31 @@ @@
     class TTMLBackgroundWriter(SubtitleImageBasedWriter):
+        # Palette colors for TTML background images
+        paColor = (255, 255, 255)  # letter body (white)
+        e1Color = (190, 190, 190)  # antialiasing color (gray)
+        e2Color = (0, 0, 0)  # border color (black)
+        bgColor = (0, 255, 0)  # background color (green - index 3 = transparent)
         def __init__(self, relativize=True, video_width=720, video_height=480, fit_to_screen=True, tape_type='NON_DROP',
                      frame_rate=25, compat=False):
             super().__init__(relativize, video_width, video_height, fit_to_screen, frame_rate)
             self.tape_type = tape_type
             self.frame_rate = frame_rate
+            # Create palette image for quantization (4 colors only - smaller output)
+            self.palette_image = Image.new("P", (1, 1))
+            self.palette_image.putpalette([*self.paColor, *self.e1Color, *self.e2Color, *self.bgColor])
         def save_image(self, tmp_dir, index, img):
-            # Jetzt speichern mit Transparenz
-            img.save(tmp_dir + '/subtitle%04d.png' % index, transparency=3)
+            """Convert RGBA to paletted PNG with transparency."""
+            # Replace transparent pixels with green background
+            background = Image.new('RGB', img.size, self.bgColor)
+            background.paste(img, mask=img.split()[3])  # Use alpha channel as mask
+            # Quantize to 4-color palette
+            img_quant = background.quantize(palette=self.palette_image, dither=0)
+            img_quant.save(tmp_dir + '/subtitle%04d.png' % index, transparency=3)
         def to_ttml_timestamp(self, ms: int) -> str:
             hours = ms // 3_600_000
@@ Expand Down @@

test.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ @@
     srtReader = SRTReader()
-    c = srtReader.read(content=open("a.srt", "rb").read().decode('UTF-8-SIG'), lang='zh-Hans')
+    c = srtReader.read(content=open("cookoff-1080p-h264-tidpix.srt", "rb").read().decode('UTF-8-SIG'), lang='zh-Hans')
     w = ScenaristDVDWriter()
-    w.write(c)
+    open("cookoff-1080p-h264-tidpix.zip", "wb").write(w.write(c))

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Support Filtergraph output format #31

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!