Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions html2text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ def __init__(
# Stack of abbreviations to write later
self.abbr_list = {} # type: Dict[str, str]
self.baseurl = baseurl
self.stressed = False
self.start_stressed = False
self.within_stressed_cnt = 0
self.preceding_stressed = False
self.preceding_data = ""
self.current_tag = ""
Expand Down Expand Up @@ -407,6 +408,16 @@ def handle_tag(
self.blockquote -= 1
self.p()

def enter_stressed():
if start:
self.start_stressed = True
self.within_stressed_cnt += 1
else:
self.within_stressed_cnt -= 1
self.start_stressed = False
self.preceding_stressed = True


if tag in ["em", "i", "u"] and not self.ignore_emphasis:
# Separate with a space if we immediately follow an alphanumeric
# character, since otherwise Markdown won't render the emphasis
Expand All @@ -425,8 +436,7 @@ def handle_tag(
emphasis = self.emphasis_mark

self.o(emphasis)
if start:
self.stressed = True
enter_stressed()

if tag in ["strong", "b"] and not self.ignore_emphasis:
# Separate with space if we immediately follow an * character, since
Expand All @@ -444,8 +454,7 @@ def handle_tag(
strong = self.strong_mark

self.o(strong)
if start:
self.stressed = True
enter_stressed()

if tag in ["del", "strike", "s"]:
if start and self.preceding_data and self.preceding_data[-1] == "~":
Expand All @@ -455,8 +464,7 @@ def handle_tag(
strike = "~~"

self.o(strike)
if start:
self.stressed = True
enter_stressed()

if self.google_doc:
if not self.inheader:
Expand Down Expand Up @@ -852,10 +860,15 @@ def handle_data(self, data: str, entity_char: bool = False) -> None:
# LEFT-TO-RIGHT MARK.
return

if self.stressed:
data = data.strip()
self.stressed = False
self.preceding_stressed = True
stripped_data = data.strip()
if self.start_stressed:
data = stripped_data
self.start_stressed = False
elif self.within_stressed_cnt:
if stripped_data:
data = " " + stripped_data
else:
data = stripped_data
elif self.preceding_stressed:
if (
re.match(r"[^][(){}\s.!?]", data[0])
Expand Down
5 changes: 5 additions & 0 deletions test/inline_within_stressed.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<p><b>TEXT <a href="http://test.org">LINK</a> </b></p>

<p><b>TE<i>X</i>T </b></p>

<p><b>TE<i>XT</i> </b></p>
6 changes: 6 additions & 0 deletions test/inline_within_stressed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
**TEXT[ LINK](http://test.org)**

**TE _X_ T**

**TE _XT_**