diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 92b75c49..d7a8deaa 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -42,6 +42,7 @@ model_serializer, model_validator, validate_call, + PrivateAttr, ) from tabulate import tabulate from typing_extensions import Self, deprecated, override @@ -310,7 +311,6 @@ class PictureScatterChartData(PictureChartData): class TableCell(BaseModel): """TableCell.""" - bbox: Optional[BoundingBox] = None row_span: int = 1 col_span: int = 1 start_row_offset_idx: int @@ -323,6 +323,47 @@ class TableCell(BaseModel): row_section: bool = False fillable: bool = False + prov: Optional[list["ProvenanceItem"]] = Field(default=None) + _bbox: Optional[BoundingBox] = PrivateAttr(default=None) + + @property + def bbox(self) -> Optional[BoundingBox]: + """bbox.""" + if self.prov: + if len(self.prov) == 1: + return self.prov[0].bbox + else: + return BoundingBox.enclosing_bbox([p.bbox for p in self.prov]) + return self._bbox + + @bbox.setter + def bbox(self, value: Optional[BoundingBox]) -> None: + """bbox setter.""" + if self.prov: + if len(self.prov) == 1: + self.prov[0].bbox = value + # If multiple prov items, we cannot safely update bbox without ambiguity + else: + self._bbox = value + + @model_validator(mode="before") + @classmethod + def _validate_bbox(cls, data: Any) -> Any: + # Pydantic validation happens on input data. Pydantic 2 base model does not store PrivateAttrs automatically from init? + # We need init to handle this or model_validator to set it? + # Actually simplest is to handle it in init as we did before. + return data + + def __init__(self, **data): + bbox_val = data.pop("bbox", None) + super().__init__(**data) + if bbox_val is not None: + if not self.prov: + self._bbox = bbox_val + elif len(self.prov) == 1: + # If user explicitly passed bbox alongside prov, maybe they want to override the prov's bbox? + # Or just ensure sync. Let's update prov. + self.prov[0].bbox = bbox_val @model_validator(mode="before") @classmethod def from_dict_format(cls, data: Any) -> Any: diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json index 03b7d8cd..62de779c 100644 --- a/docs/DoclingDocument.json +++ b/docs/DoclingDocument.json @@ -2250,6 +2250,18 @@ "title": "Fillable", "type": "boolean" }, + "page_no": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Page No" + }, "ref": { "$ref": "#/$defs/RefItem" } @@ -2523,6 +2535,18 @@ "default": false, "title": "Fillable", "type": "boolean" + }, + "page_no": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Page No" } }, "required": [ diff --git a/test/data/docling_document/unit/TableItem.yaml b/test/data/docling_document/unit/TableItem.yaml index b93a89bd..f1a260b0 100644 --- a/test/data/docling_document/unit/TableItem.yaml +++ b/test/data/docling_document/unit/TableItem.yaml @@ -1,5 +1,7 @@ +annotations: [] captions: [] children: [] +content_layer: body data: grid: - - bbox: null @@ -8,6 +10,7 @@ data: end_col_offset_idx: 1 end_row_offset_idx: 1 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -20,6 +23,7 @@ data: end_col_offset_idx: 2 end_row_offset_idx: 1 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -32,6 +36,7 @@ data: end_col_offset_idx: 3 end_row_offset_idx: 1 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -44,6 +49,7 @@ data: end_col_offset_idx: 4 end_row_offset_idx: 1 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -56,6 +62,7 @@ data: end_col_offset_idx: 5 end_row_offset_idx: 1 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -68,6 +75,7 @@ data: end_col_offset_idx: 1 end_row_offset_idx: 2 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -80,6 +88,7 @@ data: end_col_offset_idx: 2 end_row_offset_idx: 2 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -92,6 +101,7 @@ data: end_col_offset_idx: 3 end_row_offset_idx: 2 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -104,6 +114,7 @@ data: end_col_offset_idx: 4 end_row_offset_idx: 2 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -116,6 +127,7 @@ data: end_col_offset_idx: 5 end_row_offset_idx: 2 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -128,6 +140,7 @@ data: end_col_offset_idx: 1 end_row_offset_idx: 3 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -140,6 +153,7 @@ data: end_col_offset_idx: 2 end_row_offset_idx: 3 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -152,6 +166,7 @@ data: end_col_offset_idx: 3 end_row_offset_idx: 3 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -164,6 +179,7 @@ data: end_col_offset_idx: 4 end_row_offset_idx: 3 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -176,6 +192,7 @@ data: end_col_offset_idx: 5 end_row_offset_idx: 3 fillable: false + page_no: null row_header: false row_section: false row_span: 1 @@ -188,10 +205,8 @@ data: footnotes: [] image: null label: table +meta: null parent: null prov: [] references: [] -self_ref: '#' -content_layer: body -annotations: [] -meta: null +self_ref: '#' \ No newline at end of file