From 139e5756dcbfa60adcb27afeb2ab7a48f4540154 Mon Sep 17 00:00:00 2001 From: zhangboyi <1907830620@qq.com> Date: Sun, 13 Mar 2022 15:10:03 +0000 Subject: [PATCH 1/2] fix: The export format is incorrect when the table tag contains < p > or < br > #378 --- .gitpod.yml | 8 ++++++++ html2text/__init__.py | 23 +++++++++++++++++------ 2 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 .gitpod.yml diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 0000000..14ecd7b --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,8 @@ +# This configuration file was automatically generated by Gitpod. +# Please adjust to your needs (see https://www.gitpod.io/docs/config-gitpod-file) +# and commit this file to your remote git repository to share the goodness with others. + +tasks: + - init: pip install . + + diff --git a/html2text/__init__.py b/html2text/__init__.py index c59ae16..57bb36d 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -53,6 +53,7 @@ def __init__( self.split_next_td = False self.td_count = 0 self.table_start = False + self.table_label_start = False self.unicode_snob = config.UNICODE_SNOB # covered in cli self.escape_snob = config.ESCAPE_SNOB # covered in cli self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH @@ -366,18 +367,26 @@ def handle_tag( self.p() else: self.soft_br() - elif self.astack: + elif self.astack and tag == "div" : pass + elif self.table_label_start and tag == "p" and start: + pass + elif self.table_label_start and tag == "p" and not start: + self.o("
") else: self.p() if tag == "br" and start: - if self.astack: - self.space = True - elif self.blockquote > 0: - self.o(" \n> ") + if start: + if self.blockquote > 0: + self.o(" \n> ") + elif self.table_label_start: + pass + else: + self.o(" \n") else: - self.o(" \n") + if self.table_label_start: + self.o("
") if tag == "hr" and start: self.p() @@ -683,10 +692,12 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: if tag == "table": if start: self.table_start = True + self.table_label_start = True if self.pad_tables: self.o("<" + config.TABLE_MARKER_FOR_PAD + ">") self.o(" \n") else: + self.table_label_start = False if self.pad_tables: # add break in case the table is empty or its 1 row table self.soft_br() From 5962504bc6edc37ae28ac8e198aebc3f09aa7c76 Mon Sep 17 00:00:00 2001 From: zhangboyi <40347463+zhangboyi@users.noreply.github.com> Date: Mon, 14 Mar 2022 09:31:34 +0800 Subject: [PATCH 2/2] Delete .gitpod.yml --- .gitpod.yml | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 .gitpod.yml diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 14ecd7b..0000000 --- a/.gitpod.yml +++ /dev/null @@ -1,8 +0,0 @@ -# This configuration file was automatically generated by Gitpod. -# Please adjust to your needs (see https://www.gitpod.io/docs/config-gitpod-file) -# and commit this file to your remote git repository to share the goodness with others. - -tasks: - - init: pip install . - -