Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions pycaption/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
from collections import defaultdict
from datetime import timedelta
from numbers import Number
Expand Down Expand Up @@ -306,6 +307,9 @@ class CaptionSet:
by all the children.
"""

RE_HTML_STRIP = re.compile(r"<[^>]+>")
RE_ASS_STRIP = re.compile(r"{[^}]+}")

def __init__(self, captions, styles={}, layout_info=None):
"""
:param captions: A dictionary of the format {'language': CaptionList}
Expand Down Expand Up @@ -377,6 +381,40 @@ def adjust_caption_timing(self, offset=0, rate_skew=1.0):
out_captions.append(caption)
self.set_captions(lang, out_captions)

def strip_html_tags(self):
for lang in self.get_languages():
captions = self.get_captions(lang)
out_captions = CaptionList()
for caption in captions:
for node in caption.nodes:
if node.type_ == CaptionNode.TEXT:
node.content = self.RE_HTML_STRIP.sub("", node.content)
out_captions.append(caption)
self.set_captions(lang, out_captions)

def strip_ass_tags(self):
for lang in self.get_languages():
captions = self.get_captions(lang)
out_captions = CaptionList()
for caption in captions:
for node in caption.nodes:
if node.type_ == CaptionNode.TEXT:
node.content = self.RE_ASS_STRIP.sub("", node.content)
out_captions.append(caption)
self.set_captions(lang, out_captions)

def remove_empty_captions(self):
for lang in self.get_languages():
captions = self.get_captions(lang)
out_captions = CaptionList()
for caption in captions:
valid_text_nodes = [
node for node in caption.nodes if node.type_ == CaptionNode.TEXT and node.content.strip()
]
if valid_text_nodes:
out_captions.append(caption)
self.set_captions(lang, out_captions)


# Functions
def merge_concurrent_captions(caption_set):
Expand Down
10 changes: 10 additions & 0 deletions pycaption/srt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

import re
from PIL import Image, ImageFont, ImageDraw
import warnings

warnings.simplefilter('once', DeprecationWarning)

class SRTReader(BaseReader):
RE_HTML = re.compile(r'<[^>]+>')
Expand All @@ -22,6 +24,14 @@ def detect(self, content):
return False

def read(self, content, lang='en-US', strip_html=False, strip_ass_tags=False):
if strip_html:
warnings.warn("Using strip_html in the read function is deprecated. "
"Use CaptionSet.strip_html_tags() instead", DeprecationWarning)

if strip_ass_tags:
warnings.warn("Using strip_ass_tags in the read function is deprecated. "
"Use CaptionSet.strip_ass_tags() instead", DeprecationWarning)

if not isinstance(content, str):
raise InvalidInputError('The content is not a unicode string.')

Expand Down
Loading