diff --git a/docling/cli/main.py b/docling/cli/main.py index 3bf1b322d4..fae28d08c0 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -227,6 +227,7 @@ def export_documents( print_timings: bool, export_timings: bool, image_export_mode: ImageRefMode, + image_alt_text: str = "Image", ): success_count = 0 failure_count = 0 @@ -301,7 +302,10 @@ def export_documents( fname = output_dir / f"{doc_filename}.md" _log.info(f"writing Markdown output to {fname}") conv_res.document.save_as_markdown( - filename=fname, image_mode=image_export_mode + filename=fname, + image_mode=image_export_mode, + # Note: image_alt_text support requires docling-core update + # image_alt_text=image_alt_text, ) # Export Document Tags format: @@ -412,6 +416,13 @@ def convert( # noqa: C901 help="Image export mode for the document (only in case of JSON, Markdown or HTML). With `placeholder`, only the position of the image is marked in the output. In `embedded` mode, the image is embedded as base64 encoded string. In `referenced` mode, the image is exported in PNG format and referenced from the main exported document.", ), ] = ImageRefMode.EMBEDDED, + image_alt_text: Annotated[ + str, + typer.Option( + ..., + help="Custom alt text for images in markdown output. This text replaces the default 'Image' in markdown image links like ![Image](...).", + ), + ] = "Image", pipeline: Annotated[ ProcessingPipeline, typer.Option(..., help="Choose the pipeline to process PDF or image files."), @@ -962,6 +973,7 @@ def convert( # noqa: C901 print_timings=profiling, export_timings=save_profiling, image_export_mode=image_export_mode, + image_alt_text=image_alt_text, ) end_time = time.time() - start_time diff --git a/docs/concepts/serialization.md b/docs/concepts/serialization.md index d582056f20..ed01575fae 100644 --- a/docs/concepts/serialization.md +++ b/docs/concepts/serialization.md @@ -34,6 +34,32 @@ The respective `DoclingDocument` export methods (e.g. `export_to_markdown()`) ar provided as user shorthands — internally directly instantiating and delegating to respective serializers. +### Markdown image options + +The Markdown export supports two image-related options: + +- `image_mode` (`ImageRefMode`): `PLACEHOLDER`, `EMBEDDED`, or `REFERENCED` +- `image_alt_mode` (`ImageAltTextMode`): `STATIC`, `CAPTION`, or `DESCRIPTION` + +Example command line: + +```sh +docling --image-alt-mode=caption FILE +``` + +Example code: + +```py +from docling_core.transforms.serializer.markdown import ImageAltTextMode +from docling_core.types.doc import ImageRefMode + +doc.save_as_markdown( + "output.md", + image_mode=ImageRefMode.EMBEDDED, + image_alt_mode=ImageAltTextMode.CAPTION, +) +``` + ## Examples For an example showcasing how to use serializers, see diff --git a/docs/examples/export_figures.py b/docs/examples/export_figures.py index 74cec31ee6..c994961bd9 100644 --- a/docs/examples/export_figures.py +++ b/docs/examples/export_figures.py @@ -18,6 +18,7 @@ # - `IMAGE_RESOLUTION_SCALE`: increase to render higher-resolution images (e.g., 2.0). # - `PdfPipelineOptions.generate_page_images`/`generate_picture_images`: preserve images for export. # - `ImageRefMode`: choose `EMBEDDED` or `REFERENCED` when saving Markdown/HTML. +# - `ImageAltTextMode`: choose `STATIC`, `CAPTION`, or `DESCRIPTION` for image alt text in Markdown. # # Input document # - Defaults to `tests/data/pdf/2206.01062.pdf`. Change `input_doc_path` as needed. @@ -28,6 +29,7 @@ import time from pathlib import Path +from docling_core.transforms.serializer.markdown import ImageAltTextMode from docling_core.types.doc import ImageRefMode, PictureItem, TableItem from docling.datamodel.base_models import InputFormat @@ -102,6 +104,14 @@ def main(): md_filename = output_dir / f"{doc_filename}-with-image-refs.md" conv_res.document.save_as_markdown(md_filename, image_mode=ImageRefMode.REFERENCED) + # Save markdown with captions as alt text (uses image captions instead of static "Image") + md_filename = output_dir / f"{doc_filename}-with-caption-alt.md" + conv_res.document.save_as_markdown( + md_filename, + image_mode=ImageRefMode.EMBEDDED, + image_alt_mode=ImageAltTextMode.CAPTION, + ) + # Save HTML with externally referenced pictures html_filename = output_dir / f"{doc_filename}-with-image-refs.html" conv_res.document.save_as_html(html_filename, image_mode=ImageRefMode.REFERENCED) diff --git a/tests/test_cli.py b/tests/test_cli.py index 2a7a3792b9..f1074c4ea0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -90,3 +90,22 @@ def test_cli_audio_extensions_coverage(): assert ext in audio_extensions, ( f"Audio extension {ext} not found in FormatToExtensions[InputFormat.AUDIO]" ) + + +def test_cli_image_alt_text(tmp_path): + """Test that --image-alt-text option is properly handled. + + This test exercises the image_alt_text parameter in the export_documents function, + ensuring the --image-alt-text CLI option correctly passes through to markdown export. + """ + source = "./tests/data/pdf/2305.03393v1-pg9.pdf" + output = tmp_path / "out" + output.mkdir() + + # Test with custom alt text + result = runner.invoke( + app, [source, "--output", str(output), "--image-alt-text", "Figure"] + ) + assert result.exit_code == 0 + converted = output / f"{Path(source).stem}.md" + assert converted.exists()