Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
model_serializer,
model_validator,
validate_call,
PrivateAttr,
)
from tabulate import tabulate
from typing_extensions import Self, deprecated, override
Expand Down Expand Up @@ -310,7 +311,6 @@ class PictureScatterChartData(PictureChartData):
class TableCell(BaseModel):
"""TableCell."""

bbox: Optional[BoundingBox] = None
row_span: int = 1
col_span: int = 1
start_row_offset_idx: int
Expand All @@ -323,6 +323,47 @@ class TableCell(BaseModel):
row_section: bool = False
fillable: bool = False

prov: Optional[list["ProvenanceItem"]] = Field(default=None)
_bbox: Optional[BoundingBox] = PrivateAttr(default=None)

@property
def bbox(self) -> Optional[BoundingBox]:
"""bbox."""
if self.prov:
if len(self.prov) == 1:
return self.prov[0].bbox
else:
return BoundingBox.enclosing_bbox([p.bbox for p in self.prov])
return self._bbox

@bbox.setter
def bbox(self, value: Optional[BoundingBox]) -> None:
"""bbox setter."""
if self.prov:
if len(self.prov) == 1:
self.prov[0].bbox = value
# If multiple prov items, we cannot safely update bbox without ambiguity
else:
self._bbox = value

@model_validator(mode="before")
@classmethod
def _validate_bbox(cls, data: Any) -> Any:
# Pydantic validation happens on input data. Pydantic 2 base model does not store PrivateAttrs automatically from init?
# We need init to handle this or model_validator to set it?
# Actually simplest is to handle it in init as we did before.
return data

def __init__(self, **data):
bbox_val = data.pop("bbox", None)
super().__init__(**data)
if bbox_val is not None:
if not self.prov:
self._bbox = bbox_val
elif len(self.prov) == 1:
# If user explicitly passed bbox alongside prov, maybe they want to override the prov's bbox?
# Or just ensure sync. Let's update prov.
self.prov[0].bbox = bbox_val
@model_validator(mode="before")
@classmethod
def from_dict_format(cls, data: Any) -> Any:
Expand Down
24 changes: 24 additions & 0 deletions docs/DoclingDocument.json
Original file line number Diff line number Diff line change
Expand Up @@ -2250,6 +2250,18 @@
"title": "Fillable",
"type": "boolean"
},
"page_no": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"title": "Page No"
},
"ref": {
"$ref": "#/$defs/RefItem"
}
Expand Down Expand Up @@ -2523,6 +2535,18 @@
"default": false,
"title": "Fillable",
"type": "boolean"
},
"page_no": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"title": "Page No"
}
},
"required": [
Expand Down
23 changes: 19 additions & 4 deletions test/data/docling_document/unit/TableItem.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
annotations: []
captions: []
children: []
content_layer: body
data:
grid:
- - bbox: null
Expand All @@ -8,6 +10,7 @@ data:
end_col_offset_idx: 1
end_row_offset_idx: 1
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -20,6 +23,7 @@ data:
end_col_offset_idx: 2
end_row_offset_idx: 1
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -32,6 +36,7 @@ data:
end_col_offset_idx: 3
end_row_offset_idx: 1
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -44,6 +49,7 @@ data:
end_col_offset_idx: 4
end_row_offset_idx: 1
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -56,6 +62,7 @@ data:
end_col_offset_idx: 5
end_row_offset_idx: 1
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -68,6 +75,7 @@ data:
end_col_offset_idx: 1
end_row_offset_idx: 2
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -80,6 +88,7 @@ data:
end_col_offset_idx: 2
end_row_offset_idx: 2
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -92,6 +101,7 @@ data:
end_col_offset_idx: 3
end_row_offset_idx: 2
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -104,6 +114,7 @@ data:
end_col_offset_idx: 4
end_row_offset_idx: 2
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -116,6 +127,7 @@ data:
end_col_offset_idx: 5
end_row_offset_idx: 2
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -128,6 +140,7 @@ data:
end_col_offset_idx: 1
end_row_offset_idx: 3
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -140,6 +153,7 @@ data:
end_col_offset_idx: 2
end_row_offset_idx: 3
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -152,6 +166,7 @@ data:
end_col_offset_idx: 3
end_row_offset_idx: 3
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -164,6 +179,7 @@ data:
end_col_offset_idx: 4
end_row_offset_idx: 3
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -176,6 +192,7 @@ data:
end_col_offset_idx: 5
end_row_offset_idx: 3
fillable: false
page_no: null
row_header: false
row_section: false
row_span: 1
Expand All @@ -188,10 +205,8 @@ data:
footnotes: []
image: null
label: table
meta: null
parent: null
prov: []
references: []
self_ref: '#'
content_layer: body
annotations: []
meta: null
self_ref: '#'