From 698393fc16d6424f85b7dee602fbea9d9aa86208 Mon Sep 17 00:00:00 2001 From: skelmis Date: Sat, 14 Jun 2025 17:31:26 +1200 Subject: [PATCH 1/5] feat: implement partial table of contents CCo-authored-by: scottzach1 --- src/skelmis/docx/text/paragraph.py | 142 ++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 1 deletion(-) diff --git a/src/skelmis/docx/text/paragraph.py b/src/skelmis/docx/text/paragraph.py index f9a4253..fe1310c 100644 --- a/src/skelmis/docx/text/paragraph.py +++ b/src/skelmis/docx/text/paragraph.py @@ -5,16 +5,18 @@ from typing import TYPE_CHECKING, Iterator, List, cast, Literal from skelmis.docx.enum.style import WD_STYLE_TYPE +from skelmis.docx.opc.oxml import BaseOxmlElement from skelmis.docx.oxml import OxmlElement from skelmis.docx.oxml.ns import qn from skelmis.docx.oxml.text.run import CT_R -from skelmis.docx.shared import StoryChild +from skelmis.docx.shared import StoryChild, Inches, Cm, Length from skelmis.docx.styles.style import ParagraphStyle from skelmis.docx.text.hyperlink import Hyperlink from skelmis.docx.text.pagebreak import RenderedPageBreak from skelmis.docx.text.parfmt import ParagraphFormat from skelmis.docx.text.run import Run from skelmis.docx.opc.constants import RELATIONSHIP_TYPE +from skelmis.docx.enum.text import WD_TAB_LEADER, WD_TAB_ALIGNMENT if TYPE_CHECKING: import skelmis.docx.types as t @@ -30,6 +32,144 @@ def __init__(self, p: CT_P, parent: t.ProvidesStoryPart): super(Paragraph, self).__init__(parent) self._p = self._element = p + def insert_table_of_contents( + self, + *, + levels: int = 3, + starting_level: int = 1, + styles: Literal[ + "Current", + "Simple", + "Online", + "Standard", + "Modern", + "Classic", + ] = "Current", + format_table_as_links: bool = True, + show_page_numbers: bool = True, + fill_space_with: WD_TAB_LEADER = WD_TAB_LEADER.SPACES, + toc_width: Length | None = None, + hide_tab_leader_and_page_numbers_in_web_layout_view: bool = False, + ): + r""" + Insert a blank table of contents. + + :param levels: Number of headings to include. Default is 3. + :param starting_level: Starting heading level, useful if you want to only do say second heading levels and up. Default is 1. + :param toc_width: The width of the table of contents. This essentially tells Word how much space between the ToC content and the page number. The default value is usually fine. + :param fill_space_with: How to fill the remaining space on a line. Referred to technically as the tab stops. + :param hide_tab_leader_and_page_numbers_in_web_layout_view: Hides tab leader and page numbers in Web layout view. + + Derived from the following comment: https://github.com/python-openxml/python-docx/issues/36#issuecomment-2739396561 + + Raw XML to insert: + ```xml + + + + + + + + + + + TOC \o "1-3" \h \z \u + + + + + + + + + + + + Right click to update TOC + + + + + + + + + + + ``` + """ + # + # Pos here is how wide to make the tabs. + # I.e. if we want to make this the width of the page + # then we need to set the tab stop to the max page width or close + # If too wide then it will spill over lines + if toc_width is not None or fill_space_with != WD_TAB_LEADER.SPACES: + # We don't always add this as by default word with auto + # calculate the width which is a better way to do it if defaults are provided + if toc_width is None: + # Best guess at expected 'default' width + # noinspection PyUnresolvedReferences,PyProtectedMember + base_section = self._parent._parent.sections[0] + toc_width = base_section.page_width - ( + base_section.right_margin + base_section.left_margin + ) + + self.paragraph_format.tab_stops.add_tab_stop( + toc_width, WD_TAB_ALIGNMENT.RIGHT, fill_space_with + ) + + # noinspection PyListCreation + items: list[list[BaseOxmlElement]] = [] + + # + items += [[OxmlElement("w:fldChar", attrs={qn("w:fldCharType"): "begin"})]] + + # TOC \o "1-3" \h \z \u + items += [[OxmlElement("w:instrText", attrs={qn("xml:space"): "preserve"})]] + + # MERGEFORMAT switches are as defined here: http://officeopenxml.com/WPtableOfContents.php + format_table_as_links: str = "\\h" if format_table_as_links is True else "" + z_flag = "\\z" if hide_tab_leader_and_page_numbers_in_web_layout_view is True else "" + items[-1][ + 0 + ].text = f' TOC \\o "{starting_level}-{levels}" {format_table_as_links} {z_flag} \\u ' + # + items += [[OxmlElement("w:fldChar", attrs={qn("w:fldCharType"): "separate"})]] + # + # + # + # + # + # + # No table of contents entries found. + items += [[OxmlElement("w:rPr"), OxmlElement("w:t")]] + items[-1][0].append(OxmlElement("w:b")) + items[-1][0].append(OxmlElement("w:bCs")) + items[-1][0].append(OxmlElement("w:noProof")) + items[-1][0].append(OxmlElement("w:lang", attrs={qn("w:val"): "en-US"})) + items[-1][1].text = "Right click to update TOC" + + # + # + # + # + # + # + items += [ + [OxmlElement("w:rPr"), OxmlElement("w:fldChar", attrs={qn("w:fldCharType"): "end"})] + ] + items[-1][0].append(OxmlElement("w:b")) + items[-1][0].append(OxmlElement("w:bCs")) + items[-1][0].append(OxmlElement("w:noProof")) + + for run_contents in items: + run = self.add_run() + + for item in run_contents: + # noinspection PyProtectedMember + run._r.append(item) + def add_external_hyperlink( self, url: str, From 1e6d44410c93ca18a9817c7450d914bd016d22e2 Mon Sep 17 00:00:00 2001 From: skelmis Date: Sat, 14 Jun 2025 17:31:26 +1200 Subject: [PATCH 2/5] feat: implement partial table of contents Co-authored-by: scottzach1 --- src/skelmis/docx/text/paragraph.py | 142 ++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 1 deletion(-) diff --git a/src/skelmis/docx/text/paragraph.py b/src/skelmis/docx/text/paragraph.py index f9a4253..fe1310c 100644 --- a/src/skelmis/docx/text/paragraph.py +++ b/src/skelmis/docx/text/paragraph.py @@ -5,16 +5,18 @@ from typing import TYPE_CHECKING, Iterator, List, cast, Literal from skelmis.docx.enum.style import WD_STYLE_TYPE +from skelmis.docx.opc.oxml import BaseOxmlElement from skelmis.docx.oxml import OxmlElement from skelmis.docx.oxml.ns import qn from skelmis.docx.oxml.text.run import CT_R -from skelmis.docx.shared import StoryChild +from skelmis.docx.shared import StoryChild, Inches, Cm, Length from skelmis.docx.styles.style import ParagraphStyle from skelmis.docx.text.hyperlink import Hyperlink from skelmis.docx.text.pagebreak import RenderedPageBreak from skelmis.docx.text.parfmt import ParagraphFormat from skelmis.docx.text.run import Run from skelmis.docx.opc.constants import RELATIONSHIP_TYPE +from skelmis.docx.enum.text import WD_TAB_LEADER, WD_TAB_ALIGNMENT if TYPE_CHECKING: import skelmis.docx.types as t @@ -30,6 +32,144 @@ def __init__(self, p: CT_P, parent: t.ProvidesStoryPart): super(Paragraph, self).__init__(parent) self._p = self._element = p + def insert_table_of_contents( + self, + *, + levels: int = 3, + starting_level: int = 1, + styles: Literal[ + "Current", + "Simple", + "Online", + "Standard", + "Modern", + "Classic", + ] = "Current", + format_table_as_links: bool = True, + show_page_numbers: bool = True, + fill_space_with: WD_TAB_LEADER = WD_TAB_LEADER.SPACES, + toc_width: Length | None = None, + hide_tab_leader_and_page_numbers_in_web_layout_view: bool = False, + ): + r""" + Insert a blank table of contents. + + :param levels: Number of headings to include. Default is 3. + :param starting_level: Starting heading level, useful if you want to only do say second heading levels and up. Default is 1. + :param toc_width: The width of the table of contents. This essentially tells Word how much space between the ToC content and the page number. The default value is usually fine. + :param fill_space_with: How to fill the remaining space on a line. Referred to technically as the tab stops. + :param hide_tab_leader_and_page_numbers_in_web_layout_view: Hides tab leader and page numbers in Web layout view. + + Derived from the following comment: https://github.com/python-openxml/python-docx/issues/36#issuecomment-2739396561 + + Raw XML to insert: + ```xml + + + + + + + + + + + TOC \o "1-3" \h \z \u + + + + + + + + + + + + Right click to update TOC + + + + + + + + + + + ``` + """ + # + # Pos here is how wide to make the tabs. + # I.e. if we want to make this the width of the page + # then we need to set the tab stop to the max page width or close + # If too wide then it will spill over lines + if toc_width is not None or fill_space_with != WD_TAB_LEADER.SPACES: + # We don't always add this as by default word with auto + # calculate the width which is a better way to do it if defaults are provided + if toc_width is None: + # Best guess at expected 'default' width + # noinspection PyUnresolvedReferences,PyProtectedMember + base_section = self._parent._parent.sections[0] + toc_width = base_section.page_width - ( + base_section.right_margin + base_section.left_margin + ) + + self.paragraph_format.tab_stops.add_tab_stop( + toc_width, WD_TAB_ALIGNMENT.RIGHT, fill_space_with + ) + + # noinspection PyListCreation + items: list[list[BaseOxmlElement]] = [] + + # + items += [[OxmlElement("w:fldChar", attrs={qn("w:fldCharType"): "begin"})]] + + # TOC \o "1-3" \h \z \u + items += [[OxmlElement("w:instrText", attrs={qn("xml:space"): "preserve"})]] + + # MERGEFORMAT switches are as defined here: http://officeopenxml.com/WPtableOfContents.php + format_table_as_links: str = "\\h" if format_table_as_links is True else "" + z_flag = "\\z" if hide_tab_leader_and_page_numbers_in_web_layout_view is True else "" + items[-1][ + 0 + ].text = f' TOC \\o "{starting_level}-{levels}" {format_table_as_links} {z_flag} \\u ' + # + items += [[OxmlElement("w:fldChar", attrs={qn("w:fldCharType"): "separate"})]] + # + # + # + # + # + # + # No table of contents entries found. + items += [[OxmlElement("w:rPr"), OxmlElement("w:t")]] + items[-1][0].append(OxmlElement("w:b")) + items[-1][0].append(OxmlElement("w:bCs")) + items[-1][0].append(OxmlElement("w:noProof")) + items[-1][0].append(OxmlElement("w:lang", attrs={qn("w:val"): "en-US"})) + items[-1][1].text = "Right click to update TOC" + + # + # + # + # + # + # + items += [ + [OxmlElement("w:rPr"), OxmlElement("w:fldChar", attrs={qn("w:fldCharType"): "end"})] + ] + items[-1][0].append(OxmlElement("w:b")) + items[-1][0].append(OxmlElement("w:bCs")) + items[-1][0].append(OxmlElement("w:noProof")) + + for run_contents in items: + run = self.add_run() + + for item in run_contents: + # noinspection PyProtectedMember + run._r.append(item) + def add_external_hyperlink( self, url: str, From 84461443b2c515af05565cdcc0270d85fd6d4bce Mon Sep 17 00:00:00 2001 From: skelmis Date: Sun, 15 Jun 2025 14:40:39 +1200 Subject: [PATCH 3/5] feat: finish implementing the toc --- src/skelmis/docx/text/paragraph.py | 35 ++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/skelmis/docx/text/paragraph.py b/src/skelmis/docx/text/paragraph.py index fe1310c..1b06175 100644 --- a/src/skelmis/docx/text/paragraph.py +++ b/src/skelmis/docx/text/paragraph.py @@ -2,6 +2,7 @@ from __future__ import annotations +import io from typing import TYPE_CHECKING, Iterator, List, cast, Literal from skelmis.docx.enum.style import WD_STYLE_TYPE @@ -37,16 +38,10 @@ def insert_table_of_contents( *, levels: int = 3, starting_level: int = 1, - styles: Literal[ - "Current", - "Simple", - "Online", - "Standard", - "Modern", - "Classic", - ] = "Current", + styles: list[tuple[int, str]] | None = None, format_table_as_links: bool = True, show_page_numbers: bool = True, + hide_page_numbers_for_heading_range: str | None = None, fill_space_with: WD_TAB_LEADER = WD_TAB_LEADER.SPACES, toc_width: Length | None = None, hide_tab_leader_and_page_numbers_in_web_layout_view: bool = False, @@ -56,9 +51,13 @@ def insert_table_of_contents( :param levels: Number of headings to include. Default is 3. :param starting_level: Starting heading level, useful if you want to only do say second heading levels and up. Default is 1. + :param format_table_as_links: If true, ToC entries are hyperlinks to within the document. :param toc_width: The width of the table of contents. This essentially tells Word how much space between the ToC content and the page number. The default value is usually fine. + :param show_page_numbers: If false, don't include page numbers within the toc. + :param hide_page_numbers_for_heading_range: Only show page numbers for headings outside of this range. Format is -, i.e. 2-5 to only show page numbers for first level headings. ``show_page_numbers`` must be ``True`` for this setting to work. :param fill_space_with: How to fill the remaining space on a line. Referred to technically as the tab stops. :param hide_tab_leader_and_page_numbers_in_web_layout_view: Hides tab leader and page numbers in Web layout view. + :param styles: The paragraph styles to use instead of the built-in ones. Format is list[tuple[int(HeadingLevel), str(StyleName)]]. N.b this field follows the spec, but is not tested for correctness currently. Derived from the following comment: https://github.com/python-openxml/python-docx/issues/36#issuecomment-2739396561 @@ -131,9 +130,27 @@ def insert_table_of_contents( # MERGEFORMAT switches are as defined here: http://officeopenxml.com/WPtableOfContents.php format_table_as_links: str = "\\h" if format_table_as_links is True else "" z_flag = "\\z" if hide_tab_leader_and_page_numbers_in_web_layout_view is True else "" + n_flag = "" if show_page_numbers is True else "\\n" + if hide_page_numbers_for_heading_range is not None: + if show_page_numbers is False: + raise ValueError( + "hide_page_numbers_for_heading_range " + "and show_page_numbers are mutually exclusive options." + ) + + n_flag = f"\\n {hide_page_numbers_for_heading_range}" + + t_flag: str = "" + if styles is not None: + entries = [] + for heading, style in sorted(styles, key=lambda s: s[0]): + entries.append(f"{style},{heading}") + + t_flag = f"\\t \"{','.join(entries)}\"" + items[-1][ 0 - ].text = f' TOC \\o "{starting_level}-{levels}" {format_table_as_links} {z_flag} \\u ' + ].text = f' TOC \\o "{starting_level}-{levels}" {format_table_as_links} {t_flag} {z_flag} {n_flag} \\u ' # items += [[OxmlElement("w:fldChar", attrs={qn("w:fldCharType"): "separate"})]] # From cf99b9a4ac9885ef914efa875f2d7d7422a3520f Mon Sep 17 00:00:00 2001 From: skelmis Date: Sun, 15 Jun 2025 14:41:45 +1200 Subject: [PATCH 4/5] Add ToC to README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d045ab3..3cea60b 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Key differences at a glance: - Supporting the ability to transform word documents into PDF's ([1](https://skelmis-docx.readthedocs.io/en/latest/api/utility.html#docx.utility.document_to_pdf)) - Horizontal rules + paragraph bounding boxes / borders ([1](https://skelmis-docx.readthedocs.io/en/latest/api/text.html#docx.text.paragraph.Paragraph.insert_horizontal_rule), [2](https://skelmis-docx.readthedocs.io/en/latest/api/text.html#docx.text.paragraph.Paragraph.draw_paragraph_border)) - External hyperlinks ([1](https://skelmis-docx.readthedocs.io/en/latest/api/text.html#docx.text.paragraph.Paragraph.add_external_hyperlink)) +- The ability to insert a customisable Table of Contents (ToC) ([1](https://skelmis-docx.readthedocs.io/en/latest/api/text.html#docx.text.paragraph.Paragraph.insert_table_of_contents)) ## Installation From 77c089b32c1688d0be7e7b3c0c4d5fe00f5d04c2 Mon Sep 17 00:00:00 2001 From: skelmis Date: Sun, 15 Jun 2025 14:42:35 +1200 Subject: [PATCH 5/5] Remove unneeded imports --- src/skelmis/docx/text/paragraph.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/skelmis/docx/text/paragraph.py b/src/skelmis/docx/text/paragraph.py index 1b06175..21e28a4 100644 --- a/src/skelmis/docx/text/paragraph.py +++ b/src/skelmis/docx/text/paragraph.py @@ -2,22 +2,21 @@ from __future__ import annotations -import io -from typing import TYPE_CHECKING, Iterator, List, cast, Literal +from typing import TYPE_CHECKING, Iterator, List, cast from skelmis.docx.enum.style import WD_STYLE_TYPE +from skelmis.docx.enum.text import WD_TAB_LEADER, WD_TAB_ALIGNMENT +from skelmis.docx.opc.constants import RELATIONSHIP_TYPE from skelmis.docx.opc.oxml import BaseOxmlElement from skelmis.docx.oxml import OxmlElement from skelmis.docx.oxml.ns import qn from skelmis.docx.oxml.text.run import CT_R -from skelmis.docx.shared import StoryChild, Inches, Cm, Length +from skelmis.docx.shared import StoryChild, Length from skelmis.docx.styles.style import ParagraphStyle from skelmis.docx.text.hyperlink import Hyperlink from skelmis.docx.text.pagebreak import RenderedPageBreak from skelmis.docx.text.parfmt import ParagraphFormat from skelmis.docx.text.run import Run -from skelmis.docx.opc.constants import RELATIONSHIP_TYPE -from skelmis.docx.enum.text import WD_TAB_LEADER, WD_TAB_ALIGNMENT if TYPE_CHECKING: import skelmis.docx.types as t