Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pycaption/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .microdvd import MicroDVDReader, MicroDVDWriter
from .sami import SAMIReader, SAMIWriter
from .scenarist import ScenaristDVDWriter
from .filtergraph import FiltergraphWriter
from .srt import SRTReader, SRTWriter
from .scc import SCCReader, SCCWriter
from .scc.translator import translate_scc
Expand All @@ -22,8 +23,8 @@
'MicroDVDWriter', 'SAMIReader', 'SAMIWriter', 'SRTReader', 'SRTWriter',
'SCCReader', 'SCCWriter', 'translate_scc', 'WebVTTReader', 'WebVTTWriter',
'CaptionReadError', 'CaptionReadNoCaptions', 'CaptionReadSyntaxError',
'detect_format', 'CaptionNode', 'Caption', 'CaptionList', 'CaptionSet', 'ScenaristDVDWriter'
'TranscriptWriter'
'detect_format', 'CaptionNode', 'Caption', 'CaptionList', 'CaptionSet',
'ScenaristDVDWriter', 'FiltergraphWriter', 'TranscriptWriter'
]

SUPPORTED_READERS = (
Expand Down
130 changes: 130 additions & 0 deletions pycaption/filtergraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import tempfile
import zipfile
from io import BytesIO

from pycaption.base import CaptionSet
from pycaption.subtitler_image_based import SubtitleImageBasedWriter


class FiltergraphWriter(SubtitleImageBasedWriter):
"""
FFmpeg filtergraph writer for image-based subtitles.

Generates PNG subtitle images and an FFmpeg filtergraph that can be used
to create a transparent WebM video with subtitle overlays.

By default, generates Full HD (1920x1080) images. The filtergraph uses
the overlay filter with timing to display each subtitle at the correct time.

Uses PNG format for images with 4-color indexed palette for optimal
compression (~6 KB per Full HD image).
"""

def __init__(self, relativize=True, video_width=1920, video_height=1080,
fit_to_screen=True, frame_rate=25, output_dir=None):
"""
Initialize the filtergraph writer.

:param relativize: Convert absolute positioning to percentages
:param video_width: Width of generated subtitle images (default: 1920 for Full HD)
:param video_height: Height of generated subtitle images (default: 1080 for Full HD)
:param fit_to_screen: Ensure captions fit within screen bounds
:param frame_rate: Frame rate for timing calculations
"""
if output_dir is None:
self.output_dir = 'embedded_subs'
else:
self.output_dir = output_dir
super().__init__(relativize, video_width, video_height, fit_to_screen, frame_rate)

def save_image(self, tmp_dir, index, img):
"""Save RGBA image as PNG with transparency."""
img.save(
tmp_dir + '/subtitle%04d.png' % index,
optimize=True,
compress_level=9
)

def format_ts_seconds(self, value):
"""
Format timestamp as seconds with 3 decimal places for FFmpeg.

:param value: Time in microseconds
:return: Seconds as float string
"""
return f"{value / 1_000_000:.3f}"

def write(
self,
caption_set: CaptionSet,
position='bottom',
avoid_same_next_start_prev_end=False,
align='center'
):
"""
Write captions as PNG images with an FFmpeg filtergraph for creating
a transparent WebM video overlay.

Returns a ZIP file containing:
- PNG subtitle images in the specified image_dir
- filtergraph.txt: FFmpeg filter_complex script

:param caption_set: CaptionSet containing the captions to write
:param position: Position of subtitles ('top', 'bottom', 'source')
:param avoid_same_next_start_prev_end: Adjust timing to avoid overlaps
:param align: Text alignment ('left', 'center', 'right')
:return: ZIP file contents as bytes
"""
lang = caption_set.get_languages().pop()
caps = caption_set.get_captions(lang)

buf = BytesIO()
with tempfile.TemporaryDirectory() as tmpDir:
caps_final, overlapping = self.write_images(
caps, lang, tmpDir, position, align, avoid_same_next_start_prev_end
)

# Calculate total duration (last end time)
max_end = max(cap_list[0].end for cap_list in caps_final)
duration_seconds = max_end / 1_000_000 + 1 # Add 1 second buffer

# Build FFmpeg filtergraph
# Start with transparent base
filter_parts = []
filter_parts.append(
f"color=c=black@0:s={self.video_width}x{self.video_height}:d={duration_seconds:.3f},format=yuva444p[base]"
)

# Load each image (paths relative to where ffmpeg is run)
for i in range(1, len(caps_final) + 1):
filter_parts.append(
f"movie={self.output_dir}/subtitle{i:04d}.png,format=yuva444p[s{i}]"
)

# Chain overlays
prev_label = "base"
for i, cap_list in enumerate(caps_final, 1):
start_sec = self.format_ts_seconds(cap_list[0].start)
end_sec = self.format_ts_seconds(cap_list[0].end)
next_label = f"v{i}" if i < len(caps_final) else "out"

filter_parts.append(
f"[{prev_label}][s{i}]overlay=x=0:y=0:enable='between(t,{start_sec},{end_sec})':format=auto[{next_label}]"
)
prev_label = next_label

filtergraph = ";\n".join(filter_parts)


# Create ZIP archive
with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
# Add images
for i in range(1, len(caps_final) + 1):
img_path = tmpDir + '/subtitle%04d.png' % i
zf.write(img_path, f'{self.output_dir}/subtitle{i:04d}.png')

# Add filtergraph
zf.writestr(f'{self.output_dir}/filtergraph.txt', filtergraph)

buf.seek(0)
return buf.read()
21 changes: 20 additions & 1 deletion pycaption/scenarist.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from datetime import timedelta
from io import BytesIO

from PIL import Image

from pycaption.base import CaptionSet
from pycaption.subtitler_image_based import SubtitleImageBasedWriter

Expand Down Expand Up @@ -73,12 +75,22 @@ class ScenaristDVDWriter(SubtitleImageBasedWriter):

tiff_compression = None

# DVD subtitle palette colors
paColor = (255, 255, 255) # letter body (white)
e1Color = (190, 190, 190) # antialiasing color (gray)
e2Color = (0, 0, 0) # border color (black)
bgColor = (0, 255, 0) # background color (green - will be transparent on DVD)

def __init__(self, relativize=True, video_width=720, video_height=480, fit_to_screen=True, tape_type='NON_DROP',
frame_rate=25, compat=False):
super().__init__(relativize, video_width, video_height, fit_to_screen, frame_rate)
self.tape_type = tape_type
self.frame_rate = frame_rate

# Create palette image for quantization (4 colors only - smaller output)
self.palette_image = Image.new("P", (1, 1))
self.palette_image.putpalette([*self.paColor, *self.e1Color, *self.e2Color, *self.bgColor])

if compat:
self.color = '(1 2 3 4)'
self.contrast = '(15 15 15 0)'
Expand All @@ -87,7 +99,14 @@ def __init__(self, relativize=True, video_width=720, video_height=480, fit_to_sc
self.contrast = '(7 7 7 7)'

def save_image(self, tmp_dir, index, img):
img.save(tmp_dir + '/subtitle%04d.tif' % index, compression=self.tiff_compression)
"""Convert RGBA to paletted image for DVD subtitles."""
# Replace transparent pixels with green background
background = Image.new('RGB', img.size, self.bgColor)
background.paste(img, mask=img.split()[3]) # Use alpha channel as mask

# Quantize to 4-color palette
img_quant = background.quantize(palette=self.palette_image, dither=0)
img_quant.save(tmp_dir + '/subtitle%04d.tif' % index, compression=self.tiff_compression)

def write(
self,
Expand Down
48 changes: 20 additions & 28 deletions pycaption/subtitler_image_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,14 @@ def get_sst_pixel_display_params(video_width, video_height):
class SubtitleImageBasedWriter(BaseWriter):
VALID_POSITION = ['top', 'bottom', 'source']

paColor = (255, 255, 255) # letter body
e1Color = (190, 190, 190) # antialiasing color
e2Color = (0, 0, 0) # border color
bgColor = (0, 255, 0) # background color

palette_image = Image.new("P", (1, 1))
palette_image.putpalette([*paColor, *e1Color, *e2Color, *bgColor] + [0, 0, 0] * 252)
# Default colors for RGBA rendering
fontColor = (255, 255, 255) # white text
borderColor = (0, 0, 0) # black border

def __init__(self, relativize=True, video_width=720, video_height=480, fit_to_screen=True, frame_rate=25):
super().__init__(relativize, video_width, video_height, fit_to_screen)
self.palette = [self.paColor, self.e1Color, self.e2Color, self.bgColor]
self.frame_rate = frame_rate

palette_image = Image.new("P", (1, 1))
palette_image.putpalette([*self.paColor, *self.e1Color, *self.e2Color, *self.bgColor] + [0, 0, 0] * 252)

self.font_langs = {
Language.get('en'): {'fontfile': f"{os.path.dirname(__file__)}/NotoSansDisplay-Regular-Note-Math.ttf"},
Language.get('ru'): {'fontfile': f"{os.path.dirname(__file__)}/NotoSansDisplay-Regular-Note-Math.ttf"},
Expand Down Expand Up @@ -217,21 +209,21 @@ def write_images(
if missing_glyphs:
raise ValueError(f'Selected font was missing glyphs: {" ".join(missing_glyphs.keys())}')

font_size = int(self.video_width * 0.05 * 0.6) # rough estimate but should work
min_font_px = 16
font_size = max(min_font_px, int(self.video_width * 0.05 * 0.6)) # rough estimate but should work

fnt = ImageFont.truetype(fnt, font_size)
index = 1

for i, cap_list in enumerate(caps_final):

img = Image.new('RGB', (self.video_width, self.video_height), self.bgColor)
# Create RGBA image with transparent background
img = Image.new('RGBA', (self.video_width, self.video_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
self.printLine(draw, cap_list, fnt, position, align)

# quantize the image to our palette
img_quant = img.quantize(palette=self.palette_image, dither=0)
self.save_image(tmpDir, index, img_quant)

# Pass RGBA image to subclass - each subclass converts as needed
self.save_image(tmpDir, index, img)

index = index + 1

Expand Down Expand Up @@ -285,25 +277,25 @@ def printLine(self, draw: ImageDraw, caption_list: Caption, fnt: ImageFont, posi
else:
raise ValueError('Unknown "position": {}'.format(position))

borderColor = self.e2Color
fontColor = self.paColor
border = (*self.borderColor, 255) # Add alpha for RGBA
font = (*self.fontColor, 255) # Add alpha for RGBA
for adj in range(2):
# move right
draw.text((x - adj, y), text, font=fnt, fill=borderColor, align=align)
draw.text((x - adj, y), text, font=fnt, fill=border, align=align)
# move left
draw.text((x + adj, y), text, font=fnt, fill=borderColor, align=align)
draw.text((x + adj, y), text, font=fnt, fill=border, align=align)
# move up
draw.text((x, y + adj), text, font=fnt, fill=borderColor, align=align)
draw.text((x, y + adj), text, font=fnt, fill=border, align=align)
# move down
draw.text((x, y - adj), text, font=fnt, fill=borderColor, align=align)
draw.text((x, y - adj), text, font=fnt, fill=border, align=align)
# diagnal left up
draw.text((x - adj, y + adj), text, font=fnt, fill=borderColor, align=align)
draw.text((x - adj, y + adj), text, font=fnt, fill=border, align=align)
# diagnal right up
draw.text((x + adj, y + adj), text, font=fnt, fill=borderColor, align=align)
draw.text((x + adj, y + adj), text, font=fnt, fill=border, align=align)
# diagnal left down
draw.text((x - adj, y - adj), text, font=fnt, fill=borderColor, align=align)
draw.text((x - adj, y - adj), text, font=fnt, fill=border, align=align)
# diagnal right down
draw.text((x + adj, y - adj), text, font=fnt, fill=borderColor, align=align)
draw.text((x + adj, y - adj), text, font=fnt, fill=border, align=align)

draw.text((x, y), text, font=fnt, fill=fontColor, align=align)
draw.text((x, y), text, font=fnt, fill=font, align=align)
lines_written += 1
20 changes: 18 additions & 2 deletions pycaption/ttml_background.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,31 @@

class TTMLBackgroundWriter(SubtitleImageBasedWriter):

# Palette colors for TTML background images
paColor = (255, 255, 255) # letter body (white)
e1Color = (190, 190, 190) # antialiasing color (gray)
e2Color = (0, 0, 0) # border color (black)
bgColor = (0, 255, 0) # background color (green - index 3 = transparent)

def __init__(self, relativize=True, video_width=720, video_height=480, fit_to_screen=True, tape_type='NON_DROP',
frame_rate=25, compat=False):
super().__init__(relativize, video_width, video_height, fit_to_screen, frame_rate)
self.tape_type = tape_type
self.frame_rate = frame_rate

# Create palette image for quantization (4 colors only - smaller output)
self.palette_image = Image.new("P", (1, 1))
self.palette_image.putpalette([*self.paColor, *self.e1Color, *self.e2Color, *self.bgColor])

def save_image(self, tmp_dir, index, img):
# Jetzt speichern mit Transparenz
img.save(tmp_dir + '/subtitle%04d.png' % index, transparency=3)
"""Convert RGBA to paletted PNG with transparency."""
# Replace transparent pixels with green background
background = Image.new('RGB', img.size, self.bgColor)
background.paste(img, mask=img.split()[3]) # Use alpha channel as mask

# Quantize to 4-color palette
img_quant = background.quantize(palette=self.palette_image, dither=0)
img_quant.save(tmp_dir + '/subtitle%04d.png' % index, transparency=3)

def to_ttml_timestamp(self, ms: int) -> str:
hours = ms // 3_600_000
Expand Down
5 changes: 3 additions & 2 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


srtReader = SRTReader()
c = srtReader.read(content=open("a.srt", "rb").read().decode('UTF-8-SIG'), lang='zh-Hans')
c = srtReader.read(content=open("cookoff-1080p-h264-tidpix.srt", "rb").read().decode('UTF-8-SIG'), lang='zh-Hans')
w = ScenaristDVDWriter()
w.write(c)
open("cookoff-1080p-h264-tidpix.zip", "wb").write(w.write(c))

Loading
Loading