diff --git a/html2text/__init__.py b/html2text/__init__.py index 7e1a279..a5f9a6d 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -130,7 +130,8 @@ def __init__( # Stack of abbreviations to write later self.abbr_list = {} # type: Dict[str, str] self.baseurl = baseurl - self.stressed = False + self.start_stressed = False + self.within_stressed_cnt = 0 self.preceding_stressed = False self.preceding_data = "" self.current_tag = "" @@ -407,6 +408,16 @@ def handle_tag( self.blockquote -= 1 self.p() + def enter_stressed(): + if start: + self.start_stressed = True + self.within_stressed_cnt += 1 + else: + self.within_stressed_cnt -= 1 + self.start_stressed = False + self.preceding_stressed = True + + if tag in ["em", "i", "u"] and not self.ignore_emphasis: # Separate with a space if we immediately follow an alphanumeric # character, since otherwise Markdown won't render the emphasis @@ -425,8 +436,7 @@ def handle_tag( emphasis = self.emphasis_mark self.o(emphasis) - if start: - self.stressed = True + enter_stressed() if tag in ["strong", "b"] and not self.ignore_emphasis: # Separate with space if we immediately follow an * character, since @@ -444,8 +454,7 @@ def handle_tag( strong = self.strong_mark self.o(strong) - if start: - self.stressed = True + enter_stressed() if tag in ["del", "strike", "s"]: if start and self.preceding_data and self.preceding_data[-1] == "~": @@ -455,8 +464,7 @@ def handle_tag( strike = "~~" self.o(strike) - if start: - self.stressed = True + enter_stressed() if self.google_doc: if not self.inheader: @@ -852,10 +860,15 @@ def handle_data(self, data: str, entity_char: bool = False) -> None: # LEFT-TO-RIGHT MARK. return - if self.stressed: - data = data.strip() - self.stressed = False - self.preceding_stressed = True + stripped_data = data.strip() + if self.start_stressed: + data = stripped_data + self.start_stressed = False + elif self.within_stressed_cnt: + if stripped_data: + data = " " + stripped_data + else: + data = stripped_data elif self.preceding_stressed: if ( re.match(r"[^][(){}\s.!?]", data[0]) diff --git a/test/inline_within_stressed.html b/test/inline_within_stressed.html new file mode 100644 index 0000000..e7c1e36 --- /dev/null +++ b/test/inline_within_stressed.html @@ -0,0 +1,5 @@ +

TEXT LINK

+ +

TEXT

+ +

TEXT

\ No newline at end of file diff --git a/test/inline_within_stressed.md b/test/inline_within_stressed.md new file mode 100644 index 0000000..9a73358 --- /dev/null +++ b/test/inline_within_stressed.md @@ -0,0 +1,6 @@ +**TEXT[ LINK](http://test.org)** + +**TE _X_ T** + +**TE _XT_** +