diff --git a/docling_jobkit/datamodel/convert.py b/docling_jobkit/datamodel/convert.py index 7e70aab..857f579 100644 --- a/docling_jobkit/datamodel/convert.py +++ b/docling_jobkit/datamodel/convert.py @@ -15,6 +15,16 @@ from docling.datamodel import vlm_model_specs from docling.datamodel.base_models import InputFormat, OutputFormat +from docling.datamodel.layout_model_specs import ( + DOCLING_LAYOUT_EGRET_LARGE, + DOCLING_LAYOUT_EGRET_MEDIUM, + DOCLING_LAYOUT_EGRET_XLARGE, + DOCLING_LAYOUT_HERON, + DOCLING_LAYOUT_HERON_101, + DOCLING_LAYOUT_V2, + LayoutModelConfig, + LayoutModelType, +) # Import new engine system (available in docling>=2.73.0) from docling.datamodel.pipeline_options import ( @@ -41,6 +51,15 @@ ) from docling_core.types.doc import ImageRefMode, PictureClassificationLabel +LAYOUT_MODEL_SPECS: dict[LayoutModelType, LayoutModelConfig] = { + LayoutModelType.DOCLING_LAYOUT_HERON: DOCLING_LAYOUT_HERON, + LayoutModelType.DOCLING_LAYOUT_HERON_101: DOCLING_LAYOUT_HERON_101, + LayoutModelType.DOCLING_LAYOUT_EGRET_MEDIUM: DOCLING_LAYOUT_EGRET_MEDIUM, + LayoutModelType.DOCLING_LAYOUT_EGRET_LARGE: DOCLING_LAYOUT_EGRET_LARGE, + LayoutModelType.DOCLING_LAYOUT_EGRET_XLARGE: DOCLING_LAYOUT_EGRET_XLARGE, + LayoutModelType.DOCLING_LAYOUT_V2: DOCLING_LAYOUT_V2, +} + class PictureDescriptionLocal(BaseModel): repo_id: Annotated[ @@ -695,6 +714,23 @@ class ConvertDocumentsOptions(BaseModel): ] = None # Layout Configuration + layout_model: Annotated[ + Optional[LayoutModelType], + Field( + default=None, + description=( + "The layout analysis model to use. " + f"Allowed values: {', '.join([v.value for v in LayoutModelType])}. " + "Optional. When set, automatically expands into layout_custom_config. " + "Ignored if layout_custom_config is explicitly provided." + ), + examples=[ + LayoutModelType.DOCLING_LAYOUT_HERON.value, + LayoutModelType.DOCLING_LAYOUT_EGRET_LARGE.value, + ], + ), + ] = None + layout_custom_config: Annotated[ Optional[dict[str, Any]], Field( @@ -703,7 +739,8 @@ class ConvertDocumentsOptions(BaseModel): "Custom configuration for layout model. Use this to specify a " "non-default kind with its options. The 'kind' field in the config dict " "determines which layout implementation to use. " - "If not specified, uses the default kind with preset configuration." + "If not specified, uses the default kind with preset configuration. " + "Takes precedence over layout_model when both are set." ), examples=[ { @@ -787,6 +824,23 @@ def validate_vlm_pipeline_model_api(cls, v): ) return v + @model_validator(mode="before") + @classmethod + def expand_layout_model(cls, data: dict) -> dict: + """Expand layout_model into layout_custom_config when the latter is not set.""" + if not isinstance(data, dict): + return data + layout_model = data.get("layout_model") + layout_custom_config = data.get("layout_custom_config") + if layout_model is not None and layout_custom_config is None: + model_type = LayoutModelType(layout_model) + spec = LAYOUT_MODEL_SPECS[model_type] + data["layout_custom_config"] = { + "kind": "docling_layout_default", + "model_spec": spec.model_dump(mode="json"), + } + return data + @model_validator(mode="after") def picture_description_exclusivity(self) -> Self: # Validate picture description options diff --git a/tests/test_layout_model.py b/tests/test_layout_model.py new file mode 100644 index 0000000..fbb8727 --- /dev/null +++ b/tests/test_layout_model.py @@ -0,0 +1,65 @@ +"""Tests for layout_model field expansion into layout_custom_config.""" + +import pytest + +from docling.datamodel.layout_model_specs import LayoutModelType + +from docling_jobkit.datamodel.convert import ( + LAYOUT_MODEL_SPECS, + ConvertDocumentsOptions, +) + + +class TestLayoutModelExpansion: + """Test that the layout_model field correctly expands into layout_custom_config.""" + + def test_layout_model_expands_to_custom_config(self): + opts = ConvertDocumentsOptions( + layout_model=LayoutModelType.DOCLING_LAYOUT_EGRET_LARGE, + ) + assert opts.layout_custom_config is not None + assert opts.layout_custom_config["kind"] == "docling_layout_default" + spec = opts.layout_custom_config["model_spec"] + assert spec["name"] == "docling_layout_egret_large" + assert "docling-project" in spec["repo_id"] + + def test_layout_model_all_types_expand(self): + for model_type in LayoutModelType: + opts = ConvertDocumentsOptions(layout_model=model_type) + assert opts.layout_custom_config is not None + expected_spec = LAYOUT_MODEL_SPECS[model_type].model_dump(mode="json") + assert opts.layout_custom_config["model_spec"] == expected_spec + + def test_layout_custom_config_takes_precedence(self): + custom_config = { + "kind": "custom_layout_model", + "model_path": "/my/custom/model", + } + opts = ConvertDocumentsOptions( + layout_model=LayoutModelType.DOCLING_LAYOUT_EGRET_LARGE, + layout_custom_config=custom_config, + ) + assert opts.layout_custom_config == custom_config + + def test_layout_model_none_leaves_config_unset(self): + opts = ConvertDocumentsOptions(layout_model=None) + assert opts.layout_custom_config is None + + def test_layout_model_string_value_accepted(self): + opts = ConvertDocumentsOptions( + **{"layout_model": "docling_layout_heron"} + ) + assert opts.layout_custom_config is not None + assert opts.layout_custom_config["model_spec"]["name"] == "docling_layout_heron" + + def test_invalid_layout_model_rejected(self): + with pytest.raises(ValueError): + ConvertDocumentsOptions( + **{"layout_model": "nonexistent_model"} + ) + + def test_default_layout_model_is_none(self): + """Verify that layout_model defaults to None (no override).""" + opts = ConvertDocumentsOptions() + assert opts.layout_model is None + assert opts.layout_custom_config is None