From 563da3ee9865253ce72f2b5353a258b4340eb266 Mon Sep 17 00:00:00 2001 From: Zac Scott Date: Thu, 27 Feb 2025 08:53:22 +1300 Subject: [PATCH 1/4] feat(toc): add multi-platform support --- .../docx/utility/{ => macros}/Module1.xba | 2 +- src/skelmis/docx/utility/to_pdf.py | 151 +++++++++++++----- 2 files changed, 115 insertions(+), 38 deletions(-) rename src/skelmis/docx/utility/{ => macros}/Module1.xba (99%) diff --git a/src/skelmis/docx/utility/Module1.xba b/src/skelmis/docx/utility/macros/Module1.xba similarity index 99% rename from src/skelmis/docx/utility/Module1.xba rename to src/skelmis/docx/utility/macros/Module1.xba index f0920b1..fd33db6 100644 --- a/src/skelmis/docx/utility/Module1.xba +++ b/src/skelmis/docx/utility/macros/Module1.xba @@ -22,6 +22,6 @@ Sub UpdateTOC(path As String) doc.store() doc.close(True) -end sub +End Sub diff --git a/src/skelmis/docx/utility/to_pdf.py b/src/skelmis/docx/utility/to_pdf.py index bf6f910..1a7f5be 100644 --- a/src/skelmis/docx/utility/to_pdf.py +++ b/src/skelmis/docx/utility/to_pdf.py @@ -1,34 +1,32 @@ +import hashlib import json import logging -import secrets +import os import shutil import subprocess import sys +import tempfile +import warnings +from collections.abc import Callable from pathlib import Path log = logging.getLogger(__name__) -def _update_toc_linux(docx_file: Path) -> None: - """TOC bindings for linux""" - # This method hangs if item is already open, so we cheat a little here - tmp_file = str(docx_file) + f".{secrets.token_hex(4)}.docx" - tmp_file = Path(tmp_file) - shutil.copy(docx_file, tmp_file) - - # Source: https://github.com/python-openxml/python-docx/issues/1207#issuecomment-1924053420 - subprocess.call( - [ - "libreoffice", - "--headless", - f"macro:///Standard.Module1.UpdateTOC({str(tmp_file)})", - ], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) +def get_sha1(path: Path) -> str | None: + """ + Get the SHA1 checksum of the file at `path`. + """ + if not path.exists(): + return None - shutil.copy(tmp_file, docx_file) - tmp_file.unlink() + sha1sum = hashlib.sha1() + with open(path, "rb") as src: + block = src.read(2 ** 16) + while len(block) != 0: + sha1sum.update(block) + block = src.read(2 ** 16) + return sha1sum.hexdigest().lower() def _create_pdf_windows(docx_file: Path) -> None: @@ -110,36 +108,115 @@ def stderr_results(process): sys.exit(1) -def export_libre_macro( - macro_folder: Path = Path("~/.config/libreoffice/4/user/basic/Standard"), -) -> None: - """Automatically moves the LibreOffice macro file to `macro_folder`. +def export_libre_macro(macro_folder: Path | None = None) -> None: + """ + Automatically moves the LibreOffice macro file to `macro_folder`. Warning, this overrides Module1.xba - :py:class:`Path` is where your macros live + :py:class:`Path` is where your macros live (leave None to let package choose location) """ - macro_folder = macro_folder.expanduser() - module_file = Path(__file__).parent.absolute().resolve() / "Module1.xba" - shutil.copy(module_file, macro_folder) + if macro_folder is None: + platform_paths = { + "win32": Path(os.path.expandvars("%APPDATA%"), "LibreOffice/4/user/basic/Standard"), + "linux": Path("~/.config/libreoffice/4/user/basic/Standard").expanduser(), + "darwin": Path("~/Library/Application Support/LibreOffice/4/user/basic/Standard").expanduser() + } + + try: + macro_folder = platform_paths[sys.platform] + except KeyError as e: + raise ValueError(f"Unsupported platform: {sys.platform}") from e + expect_macro_sha1 = "539afdb97c8fb21a0cd08143d6a531d7d683df21" -def update_toc(docx_file: Path | str) -> None: - """Update a TOC within a word document. + target_macro_path = macro_folder / "Module1.xba" + target_macro_sha1 = get_sha1(target_macro_path) - If you are on linux, please call `export_libre_macro` first. + if expect_macro_sha1 == target_macro_sha1: + return # No changes required + + stored_macro_path = Path(__file__).parent / "macros/Module1.xba" + stored_macro_sha1 = get_sha1(stored_macro_path) + + if expect_macro_sha1 != stored_macro_sha1: + raise ValueError( + f"Unexpected SHA1 checksum for stored macro: {stored_macro_path.name}" + f" (expected={expect_macro_sha1}, actual={stored_macro_sha1}" + ) + + log.info(f"Overwriting macro at location {target_macro_path}") + shutil.copy(stored_macro_path, target_macro_path) + + +def update_toc(docx_file: Path | str) -> None: """ - if isinstance(docx_file, str): - docx_file = Path(docx_file) + Update the table of contents and indexes within a Word document. - docx_file = docx_file.absolute().resolve() + https://github.com/python-openxml/python-docx/issues/1207#issuecomment-1924053420 + """ + docx_file = Path(docx_file).absolute().resolve() + callback: Callable[[Path], ...] if sys.platform == "linux": - _update_toc_linux(docx_file) + callback = _update_toc_linux elif sys.platform == "win32": - raise ValueError("Windows is not yet implemented yet.") + callback = _update_toc_windows + elif sys.platform == "darwin": + callback = _update_toc_macos else: - raise ValueError(f"{sys.platform} is not implemented") + raise ValueError(f"Unsupported platform: {sys.platform}") + + with tempfile.TemporaryDirectory() as temp_dir: # https://stackoverflow.com/questions/23212435 + temp_path = Path(temp_dir, "temp.docx") + + shutil.copy(docx_file, temp_path) + callback(temp_path) + shutil.copy(temp_path, docx_file) + + +def _update_toc_linux(docx_file: Path) -> None: + """ + Helper method for Linux (Call UpdateTOC binding on filepath)) + """ + subprocess.call( + [ + "libreoffice", + "--headless", + f"macro:///Standard.Module1.UpdateTOC({docx_file})", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + +def _update_toc_windows(docx_file: Path) -> None: + """ + Helper method for Windows (Call UpdateTOC binding on filepath) + """ + subprocess.call( + [ + "C:\\Program Files\\LibreOffice\\program\\soffice.exe", + f"macro:///Standard.Module1.UpdateTOC({docx_file})", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + +def _update_toc_macos(docx_file: Path) -> None: + """ + Helper method for macOS (Call UpdateTOC binding on filepath) + """ + subprocess.call( + [ + "/Applications/LibreOffice.app/Contents/MacOS/soffice", + "--headless", + f"macro:///Standard.Module1.UpdateTOC({docx_file})", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) def document_to_pdf(docx_file: Path | str) -> None: From f22b7b8e59763c8058b053634baa423a629634e6 Mon Sep 17 00:00:00 2001 From: Zac Scott Date: Thu, 27 Feb 2025 12:44:45 +1300 Subject: [PATCH 2/4] fix(toc): line endings for injected macro --- src/skelmis/docx/utility/to_pdf.py | 42 ++++++------------------------ 1 file changed, 8 insertions(+), 34 deletions(-) diff --git a/src/skelmis/docx/utility/to_pdf.py b/src/skelmis/docx/utility/to_pdf.py index 1a7f5be..249d8ec 100644 --- a/src/skelmis/docx/utility/to_pdf.py +++ b/src/skelmis/docx/utility/to_pdf.py @@ -1,4 +1,3 @@ -import hashlib import json import logging import os @@ -6,29 +5,12 @@ import subprocess import sys import tempfile -import warnings from collections.abc import Callable from pathlib import Path log = logging.getLogger(__name__) -def get_sha1(path: Path) -> str | None: - """ - Get the SHA1 checksum of the file at `path`. - """ - if not path.exists(): - return None - - sha1sum = hashlib.sha1() - with open(path, "rb") as src: - block = src.read(2 ** 16) - while len(block) != 0: - sha1sum.update(block) - block = src.read(2 ** 16) - return sha1sum.hexdigest().lower() - - def _create_pdf_windows(docx_file: Path) -> None: import win32com.client @@ -128,25 +110,17 @@ def export_libre_macro(macro_folder: Path | None = None) -> None: except KeyError as e: raise ValueError(f"Unsupported platform: {sys.platform}") from e - expect_macro_sha1 = "539afdb97c8fb21a0cd08143d6a531d7d683df21" - + source_macro_path = Path(__file__).parent / "macros/Module1.xba" target_macro_path = macro_folder / "Module1.xba" - target_macro_sha1 = get_sha1(target_macro_path) - - if expect_macro_sha1 == target_macro_sha1: - return # No changes required - stored_macro_path = Path(__file__).parent / "macros/Module1.xba" - stored_macro_sha1 = get_sha1(stored_macro_path) + source_macro_contents = os.linesep.join(source_macro_path.read_text().splitlines()) # ensure correct line endings + target_macro_contents = target_macro_path.read_text() - if expect_macro_sha1 != stored_macro_sha1: - raise ValueError( - f"Unexpected SHA1 checksum for stored macro: {stored_macro_path.name}" - f" (expected={expect_macro_sha1}, actual={stored_macro_sha1}" - ) - - log.info(f"Overwriting macro at location {target_macro_path}") - shutil.copy(stored_macro_path, target_macro_path) + if source_macro_contents != target_macro_contents: + log.info(f"Overwriting LibreOffice macro at location {target_macro_path}") + target_macro_path.write_text(source_macro_contents) + else: + log.info("LibreOffice macro up to date") def update_toc(docx_file: Path | str) -> None: From 3c469218695c8fb60fed9e1416c2669611aeecf8 Mon Sep 17 00:00:00 2001 From: Zac Scott Date: Thu, 27 Feb 2025 15:31:31 +1300 Subject: [PATCH 3/4] style: remove bare except clause https://docs.astral.sh/ruff/rules/bare-except/ --- src/skelmis/docx/utility/to_pdf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/skelmis/docx/utility/to_pdf.py b/src/skelmis/docx/utility/to_pdf.py index 249d8ec..430c36e 100644 --- a/src/skelmis/docx/utility/to_pdf.py +++ b/src/skelmis/docx/utility/to_pdf.py @@ -22,8 +22,6 @@ def _create_pdf_windows(docx_file: Path) -> None: doc = word.Documents.Open(str(docx_filepath)) try: doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF) - except: - raise finally: doc.Close(0) From dff7e4dc465889f9a4b08f9cdb37bebfd24d0fa7 Mon Sep 17 00:00:00 2001 From: Zac Scott Date: Thu, 27 Feb 2025 09:38:10 +1300 Subject: [PATCH 4/4] temp: comment out [project.urls] --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6ee5949..c3e5fb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,9 +7,9 @@ license = "MIT" readme = "README.md" packages = [{include = "skelmis/docx", from="src"}] -[project.urls] -Homepage = "https://github.com/skelmis/python-docx" -Documentation = "https://skelmis-docx.readthedocs.io/en/latest/" +#[project.urls] +#Homepage = "https://github.com/skelmis/python-docx" +#Documentation = "https://skelmis-docx.readthedocs.io/en/latest/" [tool.poetry.dependencies] python = "^3.10"