docling-project · s1v4-d · Dec 31, 2025
diff --git a/docling_core/transforms/serializer/markdown.py b/docling_core/transforms/serializer/markdown.py
@@ -77,12 +77,27 @@ class OrigListItemMarkerMode(str, Enum):
     AUTO = "auto"
 
 
+class ImageAltTextMode(str, Enum):
+    """Mode for image alt text in markdown output."""
+
+    STATIC = "static"
+    CAPTION = "caption"
+    DESCRIPTION = "description"
+
+
 class MarkdownParams(CommonParams):
     """Markdown-specific serialization parameters."""
 
     layers: set[ContentLayer] = {ContentLayer.BODY}
     image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER
     image_placeholder: str = "<!-- image -->"
+    image_alt_mode: ImageAltTextMode = Field(
+        default=ImageAltTextMode.STATIC,
+        description=(
+            "Mode for image alt text: 'static' uses 'Image', "
+            "'caption' uses caption text, 'description' uses AI-generated description."
+        ),
+    )
     enable_chart_tables: bool = True
     indent: int = 4
     wrap_width: Optional[PositiveInt] = None
@@ -473,6 +488,7 @@ def serialize(
                 doc=doc,
                 image_mode=params.image_mode,
                 image_placeholder=params.image_placeholder,
+                image_alt_mode=params.image_alt_mode,
             )
             if img_res.text:
                 res_parts.append(img_res)
@@ -502,19 +518,39 @@ def serialize(
 
         return create_ser_result(text=text_res, span_source=res_parts)
 
+    def _get_alt_text(
+        self,
+        item: PictureItem,
+        doc: DoclingDocument,
+        alt_mode: ImageAltTextMode,
+    ) -> str:
+        """Get alt text for an image based on the mode, falling back to 'Image'."""
+        if alt_mode == ImageAltTextMode.DESCRIPTION:
+            if item.meta and item.meta.description:
+                return item.meta.description.text
+        elif alt_mode == ImageAltTextMode.CAPTION:
+            if item.captions:
+                cap = item.captions[0].resolve(doc)
+                if hasattr(cap, "text") and cap.text:
+                    return cap.text
+        return "Image"
+
     def _serialize_image_part(
         self,
         item: PictureItem,
         doc: DoclingDocument,
         image_mode: ImageRefMode,
         image_placeholder: str,
+        image_alt_mode: ImageAltTextMode = ImageAltTextMode.STATIC,
         **kwargs: Any,
     ) -> SerializationResult:
         error_response = (
             "<!-- 🖼️❌ Image not available. "
             "Please use `PdfPipelineOptions(generate_picture_images=True)`"
             " -->"
         )
+        alt_text = self._get_alt_text(item=item, doc=doc, alt_mode=image_alt_mode)
+
         if image_mode == ImageRefMode.PLACEHOLDER:
             text_res = image_placeholder
         elif image_mode == ImageRefMode.EMBEDDED:
@@ -524,15 +560,15 @@ def _serialize_image_part(
                 and isinstance(item.image.uri, AnyUrl)
                 and item.image.uri.scheme == "data"
             ):
-                text = f"![Image]({item.image.uri})"
+                text = f"![{alt_text}]({item.image.uri})"
                 text_res = text
             else:
                 # get the item.image._pil or crop it out of the page-image
                 img = item.get_image(doc=doc)
 
                 if img is not None:
                     imgb64 = item._image_to_base64(img)
-                    text = f"![Image](data:image/png;base64,{imgb64})"
+                    text = f"![{alt_text}](data:image/png;base64,{imgb64})"
 
                     text_res = text
                 else:
@@ -543,7 +579,7 @@ def _serialize_image_part(
             ):
                 text_res = image_placeholder
             else:
-                text_res = f"![Image]({str(item.image.uri)})"
+                text_res = f"![{alt_text}]({str(item.image.uri)})"
         else:
             text_res = image_placeholder
 

diff --git a/test/test_serialization.py b/test/test_serialization.py
@@ -11,6 +11,7 @@
     HTMLParams,
 )
 from docling_core.transforms.serializer.markdown import (
+    ImageAltTextMode,
     MarkdownDocSerializer,
     MarkdownParams,
     OrigListItemMarkerMode,
@@ -19,7 +20,9 @@
 from docling_core.types.doc.base import ImageRefMode
 from docling_core.types.doc.document import (
     DescriptionAnnotation,
+    DescriptionMetaField,
     DoclingDocument,
+    PictureMeta,
     TableCell,
     TableData,
 )
@@ -338,6 +341,208 @@ def test_md_single_row_table():
     verify(exp_file=exp_file, actual=actual)
 
 
+# ===============================
+# Image Alt Text Mode tests
+# ===============================
+
+
+def test_md_image_alt_mode_static():
+    from PIL import Image as PILImage
+
+    from docling_core.types.doc import ImageRef
+
+    doc = DoclingDocument(name="test_alt_static")
+    cap = doc.add_text(label=DocItemLabel.CAPTION, text="My figure caption")
+    fig_image = PILImage.new(mode="RGB", size=(10, 10), color=(128, 128, 128))
+    pic = doc.add_picture(
+        caption=cap,
+        image=ImageRef.from_pil(image=fig_image, dpi=72),
+    )
+    pic.meta = PictureMeta(
+        description=DescriptionMetaField(text="AI-generated description of the image")
+    )
+
+    ser = MarkdownDocSerializer(
+        doc=doc,
+        params=MarkdownParams(
+            image_mode=ImageRefMode.EMBEDDED,
+            image_alt_mode=ImageAltTextMode.STATIC,
+        ),
+    )
+    result = ser.serialize().text
+    # With STATIC mode, alt text should be "Image"
+    assert "![Image](data:image/png;base64," in result
+    assert "![AI-generated description" not in result
+    assert "![My figure caption]" not in result
+
+
+def test_md_image_alt_mode_description():
+    from PIL import Image as PILImage
+
+    from docling_core.types.doc import ImageRef
+
+    doc = DoclingDocument(name="test_alt_description")
+    cap = doc.add_text(label=DocItemLabel.CAPTION, text="My figure caption")
+    fig_image = PILImage.new(mode="RGB", size=(10, 10), color=(128, 128, 128))
+    pic = doc.add_picture(
+        caption=cap,
+        image=ImageRef.from_pil(image=fig_image, dpi=72),
+    )
+    pic.meta = PictureMeta(
+        description=DescriptionMetaField(text="AI-generated description of the image")
+    )
+
+    ser = MarkdownDocSerializer(
+        doc=doc,
+        params=MarkdownParams(
+            image_mode=ImageRefMode.EMBEDDED,
+            image_alt_mode=ImageAltTextMode.DESCRIPTION,
+        ),
+    )
+    result = ser.serialize().text
+    # With DESCRIPTION mode, alt text should be the AI-generated description
+    assert "![AI-generated description of the image](data:image/png;base64," in result
+    assert "![Image](" not in result
+
+
+def test_md_image_alt_mode_caption():
+    from PIL import Image as PILImage
+
+    from docling_core.types.doc import ImageRef
+
+    doc = DoclingDocument(name="test_alt_caption")
+    cap = doc.add_text(label=DocItemLabel.CAPTION, text="My figure caption")
+    fig_image = PILImage.new(mode="RGB", size=(10, 10), color=(128, 128, 128))
+    pic = doc.add_picture(
+        caption=cap,
+        image=ImageRef.from_pil(image=fig_image, dpi=72),
+    )
+    pic.meta = PictureMeta(
+        description=DescriptionMetaField(text="AI-generated description of the image")
+    )
+
+    ser = MarkdownDocSerializer(
+        doc=doc,
+        params=MarkdownParams(
+            image_mode=ImageRefMode.EMBEDDED,
+            image_alt_mode=ImageAltTextMode.CAPTION,
+        ),
+    )
+    result = ser.serialize().text
+    # With CAPTION mode, alt text should be the caption
+    assert "![My figure caption](data:image/png;base64," in result
+    assert "![Image](" not in result
+
+
+def test_md_image_alt_mode_description_fallback():
+    from PIL import Image as PILImage
+
+    from docling_core.types.doc import ImageRef
+
+    doc = DoclingDocument(name="test_alt_fallback")
+    cap = doc.add_text(label=DocItemLabel.CAPTION, text="My figure caption")
+    fig_image = PILImage.new(mode="RGB", size=(10, 10), color=(128, 128, 128))
+    # No meta/description set
+    doc.add_picture(
+        caption=cap,
+        image=ImageRef.from_pil(image=fig_image, dpi=72),
+    )
+
+    ser = MarkdownDocSerializer(
+        doc=doc,
+        params=MarkdownParams(
+            image_mode=ImageRefMode.EMBEDDED,
+            image_alt_mode=ImageAltTextMode.DESCRIPTION,
+        ),
+    )
+    result = ser.serialize().text
+    # Without description, should fall back to "Image"
+    assert "![Image](data:image/png;base64," in result
+
+
+def test_md_image_alt_mode_caption_fallback():
+    from PIL import Image as PILImage
+
+    from docling_core.types.doc import ImageRef
+
+    doc = DoclingDocument(name="test_alt_caption_fallback")
+    fig_image = PILImage.new(mode="RGB", size=(10, 10), color=(128, 128, 128))
+    # No caption
+    doc.add_picture(image=ImageRef.from_pil(image=fig_image, dpi=72))
+
+    ser = MarkdownDocSerializer(
+        doc=doc,
+        params=MarkdownParams(
+            image_mode=ImageRefMode.EMBEDDED,
+            image_alt_mode=ImageAltTextMode.CAPTION,
+        ),
+    )
+    result = ser.serialize().text
+    # Without caption, should fall back to "Image"
+    assert "![Image](data:image/png;base64," in result
+
+
+def test_md_image_alt_mode_with_embedded():
+    from PIL import Image as PILImage
+
+    from docling_core.types.doc import ImageRef
+
+    doc = DoclingDocument(name="test_alt_embedded")
+    cap = doc.add_text(label=DocItemLabel.CAPTION, text="Embedded figure")
+    fig_image = PILImage.new(mode="RGB", size=(10, 10), color=(128, 128, 128))
+    pic = doc.add_picture(
+        caption=cap,
+        image=ImageRef.from_pil(image=fig_image, dpi=72),
+    )
+    pic.meta = PictureMeta(
+        description=DescriptionMetaField(text="Description for embedded image")
+    )
+
+    ser = MarkdownDocSerializer(
+        doc=doc,
+        params=MarkdownParams(
+            image_mode=ImageRefMode.EMBEDDED,
+            image_alt_mode=ImageAltTextMode.DESCRIPTION,
+        ),
+    )
+    result = ser.serialize().text
+    # With DESCRIPTION mode and EMBEDDED, alt text should be the description
+    assert "![Description for embedded image](data:image/png;base64," in result
+
+
+def test_md_image_alt_mode_with_referenced():
+    from docling_core.types.doc import ImageRef, Size
+
+    doc = DoclingDocument(name="test_alt_referenced")
+    cap = doc.add_text(label=DocItemLabel.CAPTION, text="Referenced figure")
+    # Create an ImageRef with a file path URI and required fields
+    pic = doc.add_picture(
+        caption=cap,
+        image=ImageRef(
+            uri="images/figure1.png",
+            mimetype="image/png",
+            dpi=72,
+            size=Size(width=100, height=100),
+        ),
+    )
+    pic.meta = PictureMeta(
+        description=DescriptionMetaField(text="Description for referenced image")
+    )
+
+    ser = MarkdownDocSerializer(
+        doc=doc,
+        params=MarkdownParams(
+            image_mode=ImageRefMode.REFERENCED,
+            image_alt_mode=ImageAltTextMode.DESCRIPTION,
+        ),
+    )
+    result = ser.serialize().text
+    # With DESCRIPTION mode and REFERENCED, alt text should be the description
+    # Note: Path separator may vary by platform, so check for both
+    assert "![Description for referenced image](images" in result
+    assert "figure1.png)" in result
+
+
 # ===============================
 # HTML tests
 # ===============================