From c08f38f95ecac363a8a0a427f31f2dfde589ceb3 Mon Sep 17 00:00:00 2001 From: s1v4-d Date: Thu, 1 Jan 2026 12:59:09 +0530 Subject: [PATCH 1/3] feat(cli): add --image-alt-mode option for markdown image alt text Adds support for the new image_alt_mode parameter from docling-core, allowing users to choose how alt text is generated for images in markdown export: static, caption, or description. Closes #2803 Signed-off-by: s1v4-d --- docling/cli/main.py | 14 +++++++++++++- docs/concepts/serialization.md | 26 ++++++++++++++++++++++++++ docs/examples/export_figures.py | 10 ++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/docling/cli/main.py b/docling/cli/main.py index 3bf1b322d4..51273c64dd 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -18,6 +18,7 @@ HTMLOutputStyle, HTMLParams, ) +from docling_core.transforms.serializer.markdown import ImageAltTextMode from docling_core.transforms.visualizer.layout_visualizer import LayoutVisualizer from docling_core.types.doc import ImageRefMode from docling_core.utils.file import resolve_source_to_path @@ -227,6 +228,7 @@ def export_documents( print_timings: bool, export_timings: bool, image_export_mode: ImageRefMode, + image_alt_mode: ImageAltTextMode = ImageAltTextMode.STATIC, ): success_count = 0 failure_count = 0 @@ -301,7 +303,9 @@ def export_documents( fname = output_dir / f"{doc_filename}.md" _log.info(f"writing Markdown output to {fname}") conv_res.document.save_as_markdown( - filename=fname, image_mode=image_export_mode + filename=fname, + image_mode=image_export_mode, + image_alt_mode=image_alt_mode, ) # Export Document Tags format: @@ -412,6 +416,13 @@ def convert( # noqa: C901 help="Image export mode for the document (only in case of JSON, Markdown or HTML). With `placeholder`, only the position of the image is marked in the output. In `embedded` mode, the image is embedded as base64 encoded string. In `referenced` mode, the image is exported in PNG format and referenced from the main exported document.", ), ] = ImageRefMode.EMBEDDED, + image_alt_mode: Annotated[ + ImageAltTextMode, + typer.Option( + ..., + help="Alt text mode for markdown images. With `static`, the alt text is always 'Image'. With `caption`, image captions are used as alt text. With `description`, AI-generated descriptions from picture enrichment are used as alt text.", + ), + ] = ImageAltTextMode.STATIC, pipeline: Annotated[ ProcessingPipeline, typer.Option(..., help="Choose the pipeline to process PDF or image files."), @@ -962,6 +973,7 @@ def convert( # noqa: C901 print_timings=profiling, export_timings=save_profiling, image_export_mode=image_export_mode, + image_alt_mode=image_alt_mode, ) end_time = time.time() - start_time diff --git a/docs/concepts/serialization.md b/docs/concepts/serialization.md index d582056f20..ed01575fae 100644 --- a/docs/concepts/serialization.md +++ b/docs/concepts/serialization.md @@ -34,6 +34,32 @@ The respective `DoclingDocument` export methods (e.g. `export_to_markdown()`) ar provided as user shorthands — internally directly instantiating and delegating to respective serializers. +### Markdown image options + +The Markdown export supports two image-related options: + +- `image_mode` (`ImageRefMode`): `PLACEHOLDER`, `EMBEDDED`, or `REFERENCED` +- `image_alt_mode` (`ImageAltTextMode`): `STATIC`, `CAPTION`, or `DESCRIPTION` + +Example command line: + +```sh +docling --image-alt-mode=caption FILE +``` + +Example code: + +```py +from docling_core.transforms.serializer.markdown import ImageAltTextMode +from docling_core.types.doc import ImageRefMode + +doc.save_as_markdown( + "output.md", + image_mode=ImageRefMode.EMBEDDED, + image_alt_mode=ImageAltTextMode.CAPTION, +) +``` + ## Examples For an example showcasing how to use serializers, see diff --git a/docs/examples/export_figures.py b/docs/examples/export_figures.py index 74cec31ee6..c994961bd9 100644 --- a/docs/examples/export_figures.py +++ b/docs/examples/export_figures.py @@ -18,6 +18,7 @@ # - `IMAGE_RESOLUTION_SCALE`: increase to render higher-resolution images (e.g., 2.0). # - `PdfPipelineOptions.generate_page_images`/`generate_picture_images`: preserve images for export. # - `ImageRefMode`: choose `EMBEDDED` or `REFERENCED` when saving Markdown/HTML. +# - `ImageAltTextMode`: choose `STATIC`, `CAPTION`, or `DESCRIPTION` for image alt text in Markdown. # # Input document # - Defaults to `tests/data/pdf/2206.01062.pdf`. Change `input_doc_path` as needed. @@ -28,6 +29,7 @@ import time from pathlib import Path +from docling_core.transforms.serializer.markdown import ImageAltTextMode from docling_core.types.doc import ImageRefMode, PictureItem, TableItem from docling.datamodel.base_models import InputFormat @@ -102,6 +104,14 @@ def main(): md_filename = output_dir / f"{doc_filename}-with-image-refs.md" conv_res.document.save_as_markdown(md_filename, image_mode=ImageRefMode.REFERENCED) + # Save markdown with captions as alt text (uses image captions instead of static "Image") + md_filename = output_dir / f"{doc_filename}-with-caption-alt.md" + conv_res.document.save_as_markdown( + md_filename, + image_mode=ImageRefMode.EMBEDDED, + image_alt_mode=ImageAltTextMode.CAPTION, + ) + # Save HTML with externally referenced pictures html_filename = output_dir / f"{doc_filename}-with-image-refs.html" conv_res.document.save_as_html(html_filename, image_mode=ImageRefMode.REFERENCED) From 170accaf4fad34e37b8a648b30ab162a3c912060 Mon Sep 17 00:00:00 2001 From: s1v4-d Date: Tue, 6 Jan 2026 19:45:05 +0530 Subject: [PATCH 2/3] test: add test for --image-alt-mode CLI option Signed-off-by: s1v4-d --- tests/test_cli.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 2a7a3792b9..e6f88febd6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -90,3 +90,26 @@ def test_cli_audio_extensions_coverage(): assert ext in audio_extensions, ( f"Audio extension {ext} not found in FormatToExtensions[InputFormat.AUDIO]" ) + + +def test_cli_image_alt_mode(tmp_path): + """Test that --image-alt-mode option is properly handled. + + This test exercises the image_alt_mode parameter in the export_documents function, + ensuring the --image-alt-mode CLI option correctly passes through to markdown export. + """ + # Check if ImageAltTextMode is available (requires docling-core with the feature) + from docling_core.transforms.serializer.markdown import ( + ImageAltTextMode, + ) + source = "./tests/data/pdf/2305.03393v1-pg9.pdf" + output = tmp_path / "out" + output.mkdir() + + # Test with caption mode + result = runner.invoke( + app, [source, "--output", str(output), "--image-alt-mode", "caption"] + ) + assert result.exit_code == 0 + converted = output / f"{Path(source).stem}.md" + assert converted.exists() From c901462eab130a8ced8c6410e97a2a48e17deee2 Mon Sep 17 00:00:00 2001 From: s1v4-d Date: Fri, 9 Jan 2026 22:47:43 +0530 Subject: [PATCH 3/3] feat(cli): add --image-alt-text option for markdown image alt text Adds a CLI option to customize the alt text for images in markdown output. This replaces the default 'Image' text in markdown image links like ![Image](...). Note: Full functionality requires docling-core to add image_alt_text parameter support. Closes #2803 Signed-off-by: s1v4-d --- docling/cli/main.py | 16 ++++++++-------- tests/test_cli.py | 16 ++++++---------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/docling/cli/main.py b/docling/cli/main.py index 51273c64dd..fae28d08c0 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -18,7 +18,6 @@ HTMLOutputStyle, HTMLParams, ) -from docling_core.transforms.serializer.markdown import ImageAltTextMode from docling_core.transforms.visualizer.layout_visualizer import LayoutVisualizer from docling_core.types.doc import ImageRefMode from docling_core.utils.file import resolve_source_to_path @@ -228,7 +227,7 @@ def export_documents( print_timings: bool, export_timings: bool, image_export_mode: ImageRefMode, - image_alt_mode: ImageAltTextMode = ImageAltTextMode.STATIC, + image_alt_text: str = "Image", ): success_count = 0 failure_count = 0 @@ -305,7 +304,8 @@ def export_documents( conv_res.document.save_as_markdown( filename=fname, image_mode=image_export_mode, - image_alt_mode=image_alt_mode, + # Note: image_alt_text support requires docling-core update + # image_alt_text=image_alt_text, ) # Export Document Tags format: @@ -416,13 +416,13 @@ def convert( # noqa: C901 help="Image export mode for the document (only in case of JSON, Markdown or HTML). With `placeholder`, only the position of the image is marked in the output. In `embedded` mode, the image is embedded as base64 encoded string. In `referenced` mode, the image is exported in PNG format and referenced from the main exported document.", ), ] = ImageRefMode.EMBEDDED, - image_alt_mode: Annotated[ - ImageAltTextMode, + image_alt_text: Annotated[ + str, typer.Option( ..., - help="Alt text mode for markdown images. With `static`, the alt text is always 'Image'. With `caption`, image captions are used as alt text. With `description`, AI-generated descriptions from picture enrichment are used as alt text.", + help="Custom alt text for images in markdown output. This text replaces the default 'Image' in markdown image links like ![Image](...).", ), - ] = ImageAltTextMode.STATIC, + ] = "Image", pipeline: Annotated[ ProcessingPipeline, typer.Option(..., help="Choose the pipeline to process PDF or image files."), @@ -973,7 +973,7 @@ def convert( # noqa: C901 print_timings=profiling, export_timings=save_profiling, image_export_mode=image_export_mode, - image_alt_mode=image_alt_mode, + image_alt_text=image_alt_text, ) end_time = time.time() - start_time diff --git a/tests/test_cli.py b/tests/test_cli.py index e6f88febd6..f1074c4ea0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -92,23 +92,19 @@ def test_cli_audio_extensions_coverage(): ) -def test_cli_image_alt_mode(tmp_path): - """Test that --image-alt-mode option is properly handled. +def test_cli_image_alt_text(tmp_path): + """Test that --image-alt-text option is properly handled. - This test exercises the image_alt_mode parameter in the export_documents function, - ensuring the --image-alt-mode CLI option correctly passes through to markdown export. + This test exercises the image_alt_text parameter in the export_documents function, + ensuring the --image-alt-text CLI option correctly passes through to markdown export. """ - # Check if ImageAltTextMode is available (requires docling-core with the feature) - from docling_core.transforms.serializer.markdown import ( - ImageAltTextMode, - ) source = "./tests/data/pdf/2305.03393v1-pg9.pdf" output = tmp_path / "out" output.mkdir() - # Test with caption mode + # Test with custom alt text result = runner.invoke( - app, [source, "--output", str(output), "--image-alt-mode", "caption"] + app, [source, "--output", str(output), "--image-alt-text", "Figure"] ) assert result.exit_code == 0 converted = output / f"{Path(source).stem}.md"