From c076dc0cf6c61d2571464929ae85263b8f1c2f2c Mon Sep 17 00:00:00 2001 From: Oliver Jack Date: Mon, 16 Mar 2026 22:30:17 +0100 Subject: [PATCH 1/2] feat: add artifacts_dir parameter to export_to_markdown and export_to_html Signed-off-by: Oliver Jack --- docling_core/types/doc/document.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 6b6b4132..0b0dcb1a 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -5777,6 +5777,7 @@ def export_to_markdown( include_annotations: bool = True, mark_annotations: bool = False, compact_tables: bool = False, + artifacts_dir: Optional[Union[str, Path]] = None, *, use_legacy_annotations: Optional[bool] = None, # deprecated allowed_meta_names: Optional[set[str]] = None, @@ -5829,6 +5830,10 @@ def export_to_markdown( :type mark_annotations: bool = False :param compact_tables: bool: Whether to use compact table format without column padding. (Default value = False). :type compact_tables: bool = False + :param artifacts_dir: Optional directory path where images will be saved when using + ImageRefMode.REFERENCED. If provided, images are automatically saved to this + directory and referenced in the markdown output. (Default value = None). + :type artifacts_dir: Optional[Union[str, Path]] = None :param use_legacy_annotations: bool: Deprecated; legacy annotations considered only when meta not present. :type use_legacy_annotations: Optional[bool] = None :param mark_meta: bool: Whether to mark meta in the export @@ -5854,8 +5859,16 @@ def export_to_markdown( DeprecationWarning, ) + # Handle image saving when artifacts_dir is provided + doc = self + if artifacts_dir is not None and image_mode == ImageRefMode.REFERENCED: + doc = self._with_pictures_refs( + image_dir=Path(artifacts_dir), + page_no=page_no, + ) + serializer = MarkdownDocSerializer( - doc=self, + doc=doc, params=MarkdownParams( labels=my_labels, layers=my_layers, @@ -6047,6 +6060,7 @@ def export_to_html( included_content_layers: Optional[set[ContentLayer]] = None, split_page_view: bool = False, include_annotations: bool = True, + artifacts_dir: Optional[Union[str, Path]] = None, ) -> str: r"""Serialize to HTML.""" from docling_core.transforms.serializer.html import ( @@ -6080,8 +6094,16 @@ def export_to_html( if html_head == "null": params.html_head = None + # Handle image saving when artifacts_dir is provided + doc = self + if artifacts_dir is not None and image_mode == ImageRefMode.REFERENCED: + doc = self._with_pictures_refs( + image_dir=Path(artifacts_dir), + page_no=page_no, + ) + serializer = HTMLDocSerializer( - doc=self, + doc=doc, params=params, ) ser_res = serializer.serialize() From 99ccd9c6b2b38704e72aa95357e578a2aa7afdb1 Mon Sep 17 00:00:00 2001 From: Oliver Jack Date: Fri, 27 Mar 2026 17:00:56 +0100 Subject: [PATCH 2/2] feat: rename artifacts_dir to image_dir for consistency Signed-off-by: Oliver Jack --- docling_core/types/doc/document.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 4d76ea7d..490151cc 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -5794,7 +5794,7 @@ def export_to_markdown( include_annotations: bool = True, mark_annotations: bool = False, compact_tables: bool = False, - artifacts_dir: Optional[Union[str, Path]] = None, + image_dir: Optional[Union[str, Path]] = None, traverse_pictures: bool = False, *, use_legacy_annotations: Optional[bool] = None, # deprecated @@ -5848,10 +5848,10 @@ def export_to_markdown( :type mark_annotations: bool = False :param compact_tables: bool: Whether to use compact table format without column padding. (Default value = False). :type compact_tables: bool = False - :param artifacts_dir: Optional directory path where images will be saved when using + :param image_dir: Optional directory path where images will be saved when using ImageRefMode.REFERENCED. If provided, images are automatically saved to this directory and referenced in the markdown output. (Default value = None). - :type artifacts_dir: Optional[Union[str, Path]] = None + :type image_dir: Optional[Union[str, Path]] = None :param traverse_pictures: bool: Whether to traverse into picture items and serialize their text children. Must be set to True for scanned/image-based PDFs processed with full-page OCR, where the layout model places all OCR @@ -5882,11 +5882,11 @@ def export_to_markdown( DeprecationWarning, ) - # Handle image saving when artifacts_dir is provided + # Handle image saving when image_dir is provided doc = self - if artifacts_dir is not None and image_mode == ImageRefMode.REFERENCED: + if image_dir is not None and image_mode == ImageRefMode.REFERENCED: doc = self._with_pictures_refs( - image_dir=Path(artifacts_dir), + image_dir=Path(image_dir), page_no=page_no, ) @@ -6091,7 +6091,7 @@ def export_to_html( included_content_layers: Optional[set[ContentLayer]] = None, split_page_view: bool = False, include_annotations: bool = True, - artifacts_dir: Optional[Union[str, Path]] = None, + image_dir: Optional[Union[str, Path]] = None, ) -> str: r"""Serialize to HTML.""" from docling_core.transforms.serializer.html import ( @@ -6125,11 +6125,11 @@ def export_to_html( if html_head == "null": params.html_head = None - # Handle image saving when artifacts_dir is provided + # Handle image saving when image_dir is provided doc = self - if artifacts_dir is not None and image_mode == ImageRefMode.REFERENCED: + if image_dir is not None and image_mode == ImageRefMode.REFERENCED: doc = self._with_pictures_refs( - image_dir=Path(artifacts_dir), + image_dir=Path(image_dir), page_no=page_no, )