From 5336615375c66000e63a492d9f82a6cb99cf1d4d Mon Sep 17 00:00:00 2001 From: Seth Falco Date: Fri, 21 Jul 2023 14:50:00 +0100 Subject: [PATCH] fix: remove html tags in overviews --- resources/lib/item_functions.py | 4 +- resources/lib/plainhtmlparser.py | 123 +++++++++++++++++++++++++++++++ resources/lib/utils.py | 14 ++++ 3 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 resources/lib/plainhtmlparser.py diff --git a/resources/lib/item_functions.py b/resources/lib/item_functions.py index 45dd2e1c..d1188a2e 100644 --- a/resources/lib/item_functions.py +++ b/resources/lib/item_functions.py @@ -10,7 +10,7 @@ import xbmcgui from .utils import ( - datetime_from_string, get_art_url, image_url, get_current_datetime + datetime_from_string, get_art_url, image_url, get_current_datetime, plainify_html ) from .lazylogger import LazyLogger @@ -300,7 +300,7 @@ def extract_item_info(item, gui_options): item_details.resume_time = int(reasonable_ticks / 10000) item_details.series_name = item.get("SeriesName", '') - item_details.plot = item.get("Overview", '') + item_details.plot = plainify_html(item.get("Overview", '')) runtime = item.get("RunTimeTicks") if item_details.is_folder is False and runtime: diff --git a/resources/lib/plainhtmlparser.py b/resources/lib/plainhtmlparser.py new file mode 100644 index 00000000..3c9ee36c --- /dev/null +++ b/resources/lib/plainhtmlparser.py @@ -0,0 +1,123 @@ +from html.parser import HTMLParser + + +class PlainHTMLParser(HTMLParser): + ''' + HTMLParser implementation that strips HTML tags, preserving the content. + This is not intended to interpret HTML, nor output sanitized and secure HTML + that's safe to use in a web browser. + + This parses a string that may contain HTML, and removes HTML tags, and content + that isn't intended for users to read, such as