From e233312725f9e56a1bf3575cfaf61b6b1e35574e Mon Sep 17 00:00:00 2001 From: Kristian Rickert Date: Sun, 15 Mar 2026 21:22:27 -0400 Subject: [PATCH 1/2] feat: add gRPC protobuf definitions and conversion utility --- docling_core/proto/__init__.py | 8 + docling_core/proto/gen/__init__.py | 0 docling_core/proto/gen/ai/__init__.py | 0 docling_core/proto/gen/ai/docling/__init__.py | 0 .../proto/gen/ai/docling/core/__init__.py | 0 .../proto/gen/ai/docling/core/v1/__init__.py | 0 .../docling/core/v1/docling_document_pb2.py | 222 ++++ docling_core/utils/conversion.py | 1166 +++++++++++++++++ .../ai/docling/core/v1/docling_document.proto | 809 ++++++++++++ pyproject.toml | 4 +- scripts/gen_proto.py | 42 + test/test_proto_conversion.py | 32 + uv.lock | 152 +++ 13 files changed, 2434 insertions(+), 1 deletion(-) create mode 100644 docling_core/proto/__init__.py create mode 100644 docling_core/proto/gen/__init__.py create mode 100644 docling_core/proto/gen/ai/__init__.py create mode 100644 docling_core/proto/gen/ai/docling/__init__.py create mode 100644 docling_core/proto/gen/ai/docling/core/__init__.py create mode 100644 docling_core/proto/gen/ai/docling/core/v1/__init__.py create mode 100644 docling_core/proto/gen/ai/docling/core/v1/docling_document_pb2.py create mode 100644 docling_core/utils/conversion.py create mode 100644 proto/ai/docling/core/v1/docling_document.proto create mode 100644 scripts/gen_proto.py create mode 100644 test/test_proto_conversion.py diff --git a/docling_core/proto/__init__.py b/docling_core/proto/__init__.py new file mode 100644 index 00000000..4b2aaf5d --- /dev/null +++ b/docling_core/proto/__init__.py @@ -0,0 +1,8 @@ +"""Protocol buffer definitions for DoclingDocument (gRPC/document API).""" + +from docling_core.proto.gen.ai.docling.core.v1 import ( + docling_document_pb2 as docling_document_pb2, +) +from docling_core.utils.conversion import ( + docling_document_to_proto as docling_document_to_proto, +) diff --git a/docling_core/proto/gen/__init__.py b/docling_core/proto/gen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/docling_core/proto/gen/ai/__init__.py b/docling_core/proto/gen/ai/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/docling_core/proto/gen/ai/docling/__init__.py b/docling_core/proto/gen/ai/docling/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/docling_core/proto/gen/ai/docling/core/__init__.py b/docling_core/proto/gen/ai/docling/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/docling_core/proto/gen/ai/docling/core/v1/__init__.py b/docling_core/proto/gen/ai/docling/core/v1/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/docling_core/proto/gen/ai/docling/core/v1/docling_document_pb2.py b/docling_core/proto/gen/ai/docling/core/v1/docling_document_pb2.py new file mode 100644 index 00000000..f44d8bf1 --- /dev/null +++ b/docling_core/proto/gen/ai/docling/core/v1/docling_document_pb2.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: ai/docling/core/v1/docling_document.proto +# Protobuf Python Version: 6.31.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 6, + 31, + 1, + '', + 'ai/docling/core/v1/docling_document.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n)ai/docling/core/v1/docling_document.proto\x12\x12\x61i.docling.core.v1\x1a\x1cgoogle/protobuf/struct.proto\"\xb8\x06\n\x0f\x44oclingDocument\x12\x18\n\x0bschema_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x14\n\x07version\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x37\n\x06origin\x18\x04 \x01(\x0b\x32\".ai.docling.core.v1.DocumentOriginH\x02\x88\x01\x01\x12\x30\n\tfurniture\x18\r \x01(\x0b\x32\x1d.ai.docling.core.v1.GroupItem\x12+\n\x04\x62ody\x18\x05 \x01(\x0b\x32\x1d.ai.docling.core.v1.GroupItem\x12-\n\x06groups\x18\x06 \x03(\x0b\x32\x1d.ai.docling.core.v1.GroupItem\x12/\n\x05texts\x18\x07 \x03(\x0b\x32 .ai.docling.core.v1.BaseTextItem\x12\x31\n\x08pictures\x18\x08 \x03(\x0b\x32\x1f.ai.docling.core.v1.PictureItem\x12-\n\x06tables\x18\t \x03(\x0b\x32\x1d.ai.docling.core.v1.TableItem\x12\x39\n\x0fkey_value_items\x18\n \x03(\x0b\x32 .ai.docling.core.v1.KeyValueItem\x12\x30\n\nform_items\x18\x0b \x03(\x0b\x32\x1c.ai.docling.core.v1.FormItem\x12:\n\rfield_regions\x18\x0e \x03(\x0b\x32#.ai.docling.core.v1.FieldRegionItem\x12\x32\n\x0b\x66ield_items\x18\x0f \x03(\x0b\x32\x1d.ai.docling.core.v1.FieldItem\x12=\n\x05pages\x18\x0c \x03(\x0b\x32..ai.docling.core.v1.DoclingDocument.PagesEntry\x1aJ\n\nPagesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.ai.docling.core.v1.PageItem:\x02\x38\x01\x42\x0e\n\x0c_schema_nameB\n\n\x08_versionB\t\n\x07_origin\"c\n\x0e\x44ocumentOrigin\x12\x10\n\x08mimetype\x18\x01 \x01(\t\x12\x13\n\x0b\x62inary_hash\x18\x02 \x01(\t\x12\x10\n\x08\x66ilename\x18\x03 \x01(\t\x12\x10\n\x03uri\x18\x04 \x01(\tH\x00\x88\x01\x01\x42\x06\n\x04_uri\"\xc7\x02\n\tGroupItem\x12\x10\n\x08self_ref\x18\x01 \x01(\t\x12\x30\n\x06parent\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x00\x88\x01\x01\x12-\n\x08\x63hildren\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x37\n\rcontent_layer\x18\x04 \x01(\x0e\x32 .ai.docling.core.v1.ContentLayer\x12/\n\x04meta\x18\x05 \x01(\x0b\x32\x1c.ai.docling.core.v1.BaseMetaH\x01\x88\x01\x01\x12\x11\n\x04name\x18\x06 \x01(\tH\x02\x88\x01\x01\x12-\n\x05label\x18\x07 \x01(\x0e\x32\x1e.ai.docling.core.v1.GroupLabelB\t\n\x07_parentB\x07\n\x05_metaB\x07\n\x05_name\"\x16\n\x07RefItem\x12\x0b\n\x03ref\x18\x01 \x01(\t\"%\n\x07IntSpan\x12\r\n\x05start\x18\x01 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x02 \x01(\x05\"*\n\tFloatPair\x12\r\n\x05\x66irst\x18\x01 \x01(\x01\x12\x0e\n\x06second\x18\x02 \x01(\x01\"+\n\rStringIntPair\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x05\"Q\n\x07\x46ineRef\x12\x0b\n\x03ref\x18\x01 \x01(\t\x12/\n\x05range\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.IntSpanH\x00\x88\x01\x01\x42\x08\n\x06_range\"\x95\x01\n\x0bTrackSource\x12\x12\n\nstart_time\x18\x01 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x02 \x01(\x01\x12\x17\n\nidentifier\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x12\n\x05voice\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04kind\x18\x05 \x01(\tH\x02\x88\x01\x01\x42\r\n\x0b_identifierB\x08\n\x06_voiceB\x07\n\x05_kind\"H\n\nSourceType\x12\x30\n\x05track\x18\x01 \x01(\x0b\x32\x1f.ai.docling.core.v1.TrackSourceH\x00\x42\x08\n\x06source\"\xe6\x01\n\x08\x42\x61seMeta\x12:\n\x07summary\x18\x01 \x01(\x0b\x32$.ai.docling.core.v1.SummaryMetaFieldH\x00\x88\x01\x01\x12\x45\n\rcustom_fields\x18\x64 \x03(\x0b\x32..ai.docling.core.v1.BaseMeta.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\x42\n\n\x08_summary\"\x8c\x02\n\x10SummaryMetaField\x12\x17\n\nconfidence\x18\x01 \x01(\x01H\x00\x88\x01\x01\x12\x17\n\ncreated_by\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x0c\n\x04text\x18\x03 \x01(\t\x12M\n\rcustom_fields\x18\x64 \x03(\x0b\x32\x36.ai.docling.core.v1.SummaryMetaField.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\x42\r\n\x0b_confidenceB\r\n\x0b_created_by\"\x80\x01\n\nFormatting\x12\x0c\n\x04\x62old\x18\x01 \x01(\x08\x12\x0e\n\x06italic\x18\x02 \x01(\x08\x12\x11\n\tunderline\x18\x03 \x01(\x08\x12\x15\n\rstrikethrough\x18\x04 \x01(\x08\x12*\n\x06script\x18\x05 \x01(\x0e\x32\x1a.ai.docling.core.v1.Script\"\xc4\x03\n\x0c\x42\x61seTextItem\x12.\n\x05title\x18\x01 \x01(\x0b\x32\x1d.ai.docling.core.v1.TitleItemH\x00\x12?\n\x0esection_header\x18\x02 \x01(\x0b\x32%.ai.docling.core.v1.SectionHeaderItemH\x00\x12\x31\n\tlist_item\x18\x03 \x01(\x0b\x32\x1c.ai.docling.core.v1.ListItemH\x00\x12,\n\x04\x63ode\x18\x04 \x01(\x0b\x32\x1c.ai.docling.core.v1.CodeItemH\x00\x12\x32\n\x07\x66ormula\x18\x05 \x01(\x0b\x32\x1f.ai.docling.core.v1.FormulaItemH\x00\x12,\n\x04text\x18\x06 \x01(\x0b\x32\x1c.ai.docling.core.v1.TextItemH\x00\x12=\n\rfield_heading\x18\x07 \x01(\x0b\x32$.ai.docling.core.v1.FieldHeadingItemH\x00\x12\x39\n\x0b\x66ield_value\x18\x08 \x01(\x0b\x32\".ai.docling.core.v1.FieldValueItemH\x00\x42\x06\n\x04item\"\xcb\x04\n\x0cTextItemBase\x12\x10\n\x08self_ref\x18\x01 \x01(\t\x12\x30\n\x06parent\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x00\x88\x01\x01\x12-\n\x08\x63hildren\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x37\n\rcontent_layer\x18\x04 \x01(\x0e\x32 .ai.docling.core.v1.ContentLayer\x12/\n\x04meta\x18\x05 \x01(\x0b\x32\x1c.ai.docling.core.v1.BaseMetaH\x01\x88\x01\x01\x12/\n\x05label\x18\x06 \x01(\x0e\x32 .ai.docling.core.v1.DocItemLabel\x12\x30\n\x04prov\x18\x07 \x03(\x0b\x32\".ai.docling.core.v1.ProvenanceItem\x12\x0c\n\x04orig\x18\x08 \x01(\t\x12\x0c\n\x04text\x18\t \x01(\t\x12\x37\n\nformatting\x18\n \x01(\x0b\x32\x1e.ai.docling.core.v1.FormattingH\x02\x88\x01\x01\x12\x16\n\thyperlink\x18\x0b \x01(\tH\x03\x88\x01\x01\x12.\n\x06source\x18\x0c \x03(\x0b\x32\x1e.ai.docling.core.v1.SourceType\x12-\n\x08\x63omments\x18\r \x03(\x0b\x32\x1b.ai.docling.core.v1.FineRefB\t\n\x07_parentB\x07\n\x05_metaB\r\n\x0b_formattingB\x0c\n\n_hyperlink\";\n\tTitleItem\x12.\n\x04\x62\x61se\x18\x01 \x01(\x0b\x32 .ai.docling.core.v1.TextItemBase\"R\n\x11SectionHeaderItem\x12.\n\x04\x62\x61se\x18\x01 \x01(\x0b\x32 .ai.docling.core.v1.TextItemBase\x12\r\n\x05level\x18\x02 \x01(\x05\"Q\n\x10\x46ieldHeadingItem\x12.\n\x04\x62\x61se\x18\x01 \x01(\x0b\x32 .ai.docling.core.v1.TextItemBase\x12\r\n\x05level\x18\x02 \x01(\x05\"N\n\x0e\x46ieldValueItem\x12.\n\x04\x62\x61se\x18\x01 \x01(\x0b\x32 .ai.docling.core.v1.TextItemBase\x12\x0c\n\x04kind\x18\x02 \x01(\t\"n\n\x08ListItem\x12.\n\x04\x62\x61se\x18\x01 \x01(\x0b\x32 .ai.docling.core.v1.TextItemBase\x12\x12\n\nenumerated\x18\x02 \x01(\x08\x12\x13\n\x06marker\x18\x03 \x01(\tH\x00\x88\x01\x01\x42\t\n\x07_marker\"\xcf\x03\n\x08\x43odeItem\x12.\n\x04\x62\x61se\x18\x01 \x01(\x0b\x32 .ai.docling.core.v1.TextItemBase\x12\x33\n\x04meta\x18\x02 \x01(\x0b\x32 .ai.docling.core.v1.FloatingMetaH\x00\x88\x01\x01\x12-\n\x08\x63\x61ptions\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12/\n\nreferences\x18\x04 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12.\n\tfootnotes\x18\x05 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x30\n\x05image\x18\x06 \x01(\x0b\x32\x1c.ai.docling.core.v1.ImageRefH\x01\x88\x01\x01\x12\x41\n\rcode_language\x18\x07 \x01(\x0e\x32%.ai.docling.core.v1.CodeLanguageLabelH\x02\x88\x01\x01\x12\x1e\n\x11\x63ode_language_raw\x18\x08 \x01(\tH\x03\x88\x01\x01\x42\x07\n\x05_metaB\x08\n\x06_imageB\x10\n\x0e_code_languageB\x14\n\x12_code_language_raw\"=\n\x0b\x46ormulaItem\x12.\n\x04\x62\x61se\x18\x01 \x01(\x0b\x32 .ai.docling.core.v1.TextItemBase\":\n\x08TextItem\x12.\n\x04\x62\x61se\x18\x01 \x01(\x0b\x32 .ai.docling.core.v1.TextItemBase\"\x7f\n\x0eProvenanceItem\x12\x0f\n\x07page_no\x18\x01 \x01(\x05\x12-\n\x04\x62\x62ox\x18\x02 \x01(\x0b\x32\x1f.ai.docling.core.v1.BoundingBox\x12-\n\x08\x63harspan\x18\x03 \x01(\x0b\x32\x1b.ai.docling.core.v1.IntSpan\"\xba\x01\n\x0b\x42oundingBox\x12\t\n\x01l\x18\x01 \x01(\x01\x12\t\n\x01t\x18\x02 \x01(\x01\x12\t\n\x01r\x18\x03 \x01(\x01\x12\t\n\x01\x62\x18\x04 \x01(\x01\x12:\n\x0c\x63oord_origin\x18\x05 \x01(\x0e\x32\x1f.ai.docling.core.v1.CoordOriginH\x00\x88\x01\x01\x12\x1d\n\x10\x63oord_origin_raw\x18\x06 \x01(\tH\x01\x88\x01\x01\x42\x0f\n\r_coord_originB\x13\n\x11_coord_origin_raw\"^\n\x08ImageRef\x12\x10\n\x08mimetype\x18\x01 \x01(\t\x12\x0b\n\x03\x64pi\x18\x02 \x01(\x05\x12&\n\x04size\x18\x03 \x01(\x0b\x32\x18.ai.docling.core.v1.Size\x12\x0b\n\x03uri\x18\x04 \x01(\t\"%\n\x04Size\x12\r\n\x05width\x18\x01 \x01(\x01\x12\x0e\n\x06height\x18\x02 \x01(\x01\"G\n\x15\x44\x65scriptionAnnotation\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x12\n\nprovenance\x18\x03 \x01(\t\"H\n\x0eMiscAnnotation\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12(\n\x07\x63ontent\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"D\n\x1aPictureClassificationClass\x12\x12\n\nclass_name\x18\x01 \x01(\t\x12\x12\n\nconfidence\x18\x02 \x01(\x01\"\x88\x01\n\x19PictureClassificationData\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\x12\n\nprovenance\x18\x02 \x01(\t\x12I\n\x11predicted_classes\x18\x03 \x03(\x0b\x32..ai.docling.core.v1.PictureClassificationClass\"\xa1\x01\n\x13PictureMoleculeData\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\x0b\n\x03smi\x18\x02 \x01(\t\x12\x12\n\nconfidence\x18\x03 \x01(\x01\x12\x12\n\nclass_name\x18\x04 \x01(\t\x12\x33\n\x0csegmentation\x18\x05 \x03(\x0b\x32\x1d.ai.docling.core.v1.FloatPair\x12\x12\n\nprovenance\x18\x06 \x01(\t\"i\n\x17PictureTabularChartData\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\x31\n\nchart_data\x18\x03 \x01(\x0b\x32\x1d.ai.docling.core.v1.TableData\"I\n\tChartLine\x12\r\n\x05label\x18\x01 \x01(\t\x12-\n\x06values\x18\x02 \x03(\x0b\x32\x1d.ai.docling.core.v1.FloatPair\"\x8d\x01\n\x14PictureLineChartData\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\x14\n\x0cx_axis_label\x18\x03 \x01(\t\x12\x14\n\x0cy_axis_label\x18\x04 \x01(\t\x12,\n\x05lines\x18\x05 \x03(\x0b\x32\x1d.ai.docling.core.v1.ChartLine\")\n\x08\x43hartBar\x12\r\n\x05label\x18\x01 \x01(\t\x12\x0e\n\x06values\x18\x02 \x01(\x01\"\x8a\x01\n\x13PictureBarChartData\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\x14\n\x0cx_axis_label\x18\x03 \x01(\t\x12\x14\n\x0cy_axis_label\x18\x04 \x01(\t\x12*\n\x04\x62\x61rs\x18\x05 \x03(\x0b\x32\x1c.ai.docling.core.v1.ChartBar\"S\n\x0f\x43hartStackedBar\x12\r\n\x05label\x18\x01 \x03(\t\x12\x31\n\x06values\x18\x02 \x03(\x0b\x32!.ai.docling.core.v1.StringIntPair\"\xa0\x01\n\x1aPictureStackedBarChartData\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\x14\n\x0cx_axis_label\x18\x03 \x01(\t\x12\x14\n\x0cy_axis_label\x18\x04 \x01(\t\x12\x39\n\x0cstacked_bars\x18\x05 \x03(\x0b\x32#.ai.docling.core.v1.ChartStackedBar\"*\n\nChartSlice\x12\r\n\x05label\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01\"b\n\x13PicturePieChartData\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12.\n\x06slices\x18\x03 \x03(\x0b\x32\x1e.ai.docling.core.v1.ChartSlice\":\n\nChartPoint\x12,\n\x05value\x18\x01 \x01(\x0b\x32\x1d.ai.docling.core.v1.FloatPair\"\x92\x01\n\x17PictureScatterChartData\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\x14\n\x0cx_axis_label\x18\x03 \x01(\t\x12\x14\n\x0cy_axis_label\x18\x04 \x01(\t\x12.\n\x06points\x18\x05 \x03(\x0b\x32\x1e.ai.docling.core.v1.ChartPoint\"\xb2\x05\n\x11PictureAnnotation\x12@\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32).ai.docling.core.v1.DescriptionAnnotationH\x00\x12\x32\n\x04misc\x18\x02 \x01(\x0b\x32\".ai.docling.core.v1.MiscAnnotationH\x00\x12G\n\x0e\x63lassification\x18\x03 \x01(\x0b\x32-.ai.docling.core.v1.PictureClassificationDataH\x00\x12;\n\x08molecule\x18\x04 \x01(\x0b\x32\'.ai.docling.core.v1.PictureMoleculeDataH\x00\x12\x44\n\rtabular_chart\x18\x05 \x01(\x0b\x32+.ai.docling.core.v1.PictureTabularChartDataH\x00\x12>\n\nline_chart\x18\x06 \x01(\x0b\x32(.ai.docling.core.v1.PictureLineChartDataH\x00\x12<\n\tbar_chart\x18\x07 \x01(\x0b\x32\'.ai.docling.core.v1.PictureBarChartDataH\x00\x12K\n\x11stacked_bar_chart\x18\x08 \x01(\x0b\x32..ai.docling.core.v1.PictureStackedBarChartDataH\x00\x12<\n\tpie_chart\x18\t \x01(\x0b\x32\'.ai.docling.core.v1.PicturePieChartDataH\x00\x12\x44\n\rscatter_chart\x18\n \x01(\x0b\x32+.ai.docling.core.v1.PictureScatterChartDataH\x00\x42\x0c\n\nannotation\"\x95\x01\n\x0fTableAnnotation\x12@\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32).ai.docling.core.v1.DescriptionAnnotationH\x00\x12\x32\n\x04misc\x18\x02 \x01(\x0b\x32\".ai.docling.core.v1.MiscAnnotationH\x00\x42\x0c\n\nannotation\"\xab\x05\n\x0bPictureItem\x12\x15\n\x08self_ref\x18\x01 \x01(\tH\x00\x88\x01\x01\x12+\n\x06parent\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12-\n\x08\x63hildren\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x37\n\rcontent_layer\x18\x04 \x01(\x0e\x32 .ai.docling.core.v1.ContentLayer\x12\x32\n\x04meta\x18\x05 \x01(\x0b\x32\x1f.ai.docling.core.v1.PictureMetaH\x01\x88\x01\x01\x12\r\n\x05label\x18\x06 \x01(\t\x12\x30\n\x04prov\x18\x07 \x03(\x0b\x32\".ai.docling.core.v1.ProvenanceItem\x12-\n\x08\x63\x61ptions\x18\x08 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12/\n\nreferences\x18\t \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12.\n\tfootnotes\x18\n \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x30\n\x05image\x18\x0b \x01(\x0b\x32\x1c.ai.docling.core.v1.ImageRefH\x02\x88\x01\x01\x12.\n\x06source\x18\x0c \x03(\x0b\x32\x1e.ai.docling.core.v1.SourceType\x12-\n\x08\x63omments\x18\r \x03(\x0b\x32\x1b.ai.docling.core.v1.FineRef\x12:\n\x0b\x61nnotations\x18\x0e \x03(\x0b\x32%.ai.docling.core.v1.PictureAnnotationB\x0b\n\t_self_refB\x07\n\x05_metaB\x08\n\x06_image\"\xc8\x04\n\x0bPictureMeta\x12:\n\x07summary\x18\x01 \x01(\x0b\x32$.ai.docling.core.v1.SummaryMetaFieldH\x00\x88\x01\x01\x12\x42\n\x0b\x64\x65scription\x18\x02 \x01(\x0b\x32(.ai.docling.core.v1.DescriptionMetaFieldH\x01\x88\x01\x01\x12O\n\x0e\x63lassification\x18\x03 \x01(\x0b\x32\x32.ai.docling.core.v1.PictureClassificationMetaFieldH\x02\x88\x01\x01\x12<\n\x08molecule\x18\x04 \x01(\x0b\x32%.ai.docling.core.v1.MoleculeMetaFieldH\x03\x88\x01\x01\x12\x45\n\rtabular_chart\x18\x05 \x01(\x0b\x32).ai.docling.core.v1.TabularChartMetaFieldH\x04\x88\x01\x01\x12H\n\rcustom_fields\x18\x64 \x03(\x0b\x32\x31.ai.docling.core.v1.PictureMeta.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\x42\n\n\x08_summaryB\x0e\n\x0c_descriptionB\x11\n\x0f_classificationB\x0b\n\t_moleculeB\x10\n\x0e_tabular_chart\"\x94\x02\n\x14\x44\x65scriptionMetaField\x12\x17\n\nconfidence\x18\x01 \x01(\x01H\x00\x88\x01\x01\x12\x17\n\ncreated_by\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x0c\n\x04text\x18\x03 \x01(\t\x12Q\n\rcustom_fields\x18\x64 \x03(\x0b\x32:.ai.docling.core.v1.DescriptionMetaField.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\x42\r\n\x0b_confidenceB\r\n\x0b_created_by\"\x94\x02\n\x1ePictureClassificationMetaField\x12H\n\x0bpredictions\x18\x01 \x03(\x0b\x32\x33.ai.docling.core.v1.PictureClassificationPrediction\x12[\n\rcustom_fields\x18\x64 \x03(\x0b\x32\x44.ai.docling.core.v1.PictureClassificationMetaField.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\"\xb0\x02\n\x1fPictureClassificationPrediction\x12\x17\n\nconfidence\x18\x01 \x01(\x01H\x00\x88\x01\x01\x12\x17\n\ncreated_by\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x12\n\nclass_name\x18\x03 \x01(\t\x12\\\n\rcustom_fields\x18\x64 \x03(\x0b\x32\x45.ai.docling.core.v1.PictureClassificationPrediction.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\x42\r\n\x0b_confidenceB\r\n\x0b_created_by\"\x8d\x02\n\x11MoleculeMetaField\x12\x17\n\nconfidence\x18\x01 \x01(\x01H\x00\x88\x01\x01\x12\x17\n\ncreated_by\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x0b\n\x03smi\x18\x03 \x01(\t\x12N\n\rcustom_fields\x18\x64 \x03(\x0b\x32\x37.ai.docling.core.v1.MoleculeMetaField.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\x42\r\n\x0b_confidenceB\r\n\x0b_created_by\"\xd9\x02\n\x15TabularChartMetaField\x12\x17\n\nconfidence\x18\x01 \x01(\x01H\x00\x88\x01\x01\x12\x17\n\ncreated_by\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x12\n\x05title\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x31\n\nchart_data\x18\x04 \x01(\x0b\x32\x1d.ai.docling.core.v1.TableData\x12R\n\rcustom_fields\x18\x64 \x03(\x0b\x32;.ai.docling.core.v1.TabularChartMetaField.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\x42\r\n\x0b_confidenceB\r\n\x0b_created_byB\x08\n\x06_title\"\xc2\x02\n\x0c\x46loatingMeta\x12:\n\x07summary\x18\x01 \x01(\x0b\x32$.ai.docling.core.v1.SummaryMetaFieldH\x00\x88\x01\x01\x12\x42\n\x0b\x64\x65scription\x18\x02 \x01(\x0b\x32(.ai.docling.core.v1.DescriptionMetaFieldH\x01\x88\x01\x01\x12I\n\rcustom_fields\x18\x64 \x03(\x0b\x32\x32.ai.docling.core.v1.FloatingMeta.CustomFieldsEntry\x1aK\n\x11\x43ustomFieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\x42\n\n\x08_summaryB\x0e\n\x0c_description\"\xd3\x05\n\tTableItem\x12\x10\n\x08self_ref\x18\x01 \x01(\t\x12\x30\n\x06parent\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x00\x88\x01\x01\x12-\n\x08\x63hildren\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x37\n\rcontent_layer\x18\x04 \x01(\x0e\x32 .ai.docling.core.v1.ContentLayer\x12\x33\n\x04meta\x18\x05 \x01(\x0b\x32 .ai.docling.core.v1.FloatingMetaH\x01\x88\x01\x01\x12\r\n\x05label\x18\x06 \x01(\t\x12\x30\n\x04prov\x18\x07 \x03(\x0b\x32\".ai.docling.core.v1.ProvenanceItem\x12-\n\x08\x63\x61ptions\x18\x08 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12/\n\nreferences\x18\t \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12.\n\tfootnotes\x18\n \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x30\n\x05image\x18\x0b \x01(\x0b\x32\x1c.ai.docling.core.v1.ImageRefH\x02\x88\x01\x01\x12+\n\x04\x64\x61ta\x18\x0c \x01(\x0b\x32\x1d.ai.docling.core.v1.TableData\x12.\n\x06source\x18\r \x03(\x0b\x32\x1e.ai.docling.core.v1.SourceType\x12-\n\x08\x63omments\x18\x0e \x03(\x0b\x32\x1b.ai.docling.core.v1.FineRef\x12\x38\n\x0b\x61nnotations\x18\x0f \x03(\x0b\x32#.ai.docling.core.v1.TableAnnotationB\t\n\x07_parentB\x07\n\x05_metaB\x08\n\x06_image\"\x8f\x01\n\tTableData\x12\x32\n\x0btable_cells\x18\x01 \x03(\x0b\x32\x1d.ai.docling.core.v1.TableCell\x12\x10\n\x08num_rows\x18\x02 \x01(\x05\x12\x10\n\x08num_cols\x18\x03 \x01(\x05\x12*\n\x04grid\x18\x04 \x03(\x0b\x32\x1c.ai.docling.core.v1.TableRow\"8\n\x08TableRow\x12,\n\x05\x63\x65lls\x18\x01 \x03(\x0b\x32\x1d.ai.docling.core.v1.TableCell\"\xe9\x02\n\tTableCell\x12-\n\x04\x62\x62ox\x18\x01 \x01(\x0b\x32\x1f.ai.docling.core.v1.BoundingBox\x12\x10\n\x08row_span\x18\x02 \x01(\x05\x12\x10\n\x08\x63ol_span\x18\x03 \x01(\x05\x12\x1c\n\x14start_row_offset_idx\x18\x04 \x01(\x05\x12\x1a\n\x12\x65nd_row_offset_idx\x18\x05 \x01(\x05\x12\x1c\n\x14start_col_offset_idx\x18\x06 \x01(\x05\x12\x1a\n\x12\x65nd_col_offset_idx\x18\x07 \x01(\x05\x12\x0c\n\x04text\x18\x08 \x01(\t\x12\x15\n\rcolumn_header\x18\t \x01(\x08\x12\x12\n\nrow_header\x18\n \x01(\x08\x12\x13\n\x0brow_section\x18\x0b \x01(\x08\x12\x10\n\x08\x66illable\x18\x0c \x01(\x08\x12-\n\x03ref\x18\r \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x00\x88\x01\x01\x42\x06\n\x04_ref\"\x9d\x05\n\x0cKeyValueItem\x12\x10\n\x08self_ref\x18\x01 \x01(\t\x12\x30\n\x06parent\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x00\x88\x01\x01\x12-\n\x08\x63hildren\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x37\n\rcontent_layer\x18\x04 \x01(\x0e\x32 .ai.docling.core.v1.ContentLayer\x12\x33\n\x04meta\x18\x05 \x01(\x0b\x32 .ai.docling.core.v1.FloatingMetaH\x01\x88\x01\x01\x12\r\n\x05label\x18\x06 \x01(\t\x12\x30\n\x04prov\x18\x07 \x03(\x0b\x32\".ai.docling.core.v1.ProvenanceItem\x12-\n\x08\x63\x61ptions\x18\x08 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12/\n\nreferences\x18\t \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12.\n\tfootnotes\x18\n \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x30\n\x05image\x18\x0b \x01(\x0b\x32\x1c.ai.docling.core.v1.ImageRefH\x02\x88\x01\x01\x12,\n\x05graph\x18\x0c \x01(\x0b\x32\x1d.ai.docling.core.v1.GraphData\x12.\n\x06source\x18\r \x03(\x0b\x32\x1e.ai.docling.core.v1.SourceType\x12-\n\x08\x63omments\x18\x0e \x03(\x0b\x32\x1b.ai.docling.core.v1.FineRefB\t\n\x07_parentB\x07\n\x05_metaB\x08\n\x06_image\"g\n\tGraphData\x12,\n\x05\x63\x65lls\x18\x01 \x03(\x0b\x32\x1d.ai.docling.core.v1.GraphCell\x12,\n\x05links\x18\x02 \x03(\x0b\x32\x1d.ai.docling.core.v1.GraphLink\"\xec\x01\n\tGraphCell\x12\x31\n\x05label\x18\x01 \x01(\x0e\x32\".ai.docling.core.v1.GraphCellLabel\x12\x0f\n\x07\x63\x65ll_id\x18\x02 \x01(\x05\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x0c\n\x04orig\x18\x04 \x01(\t\x12\x35\n\x04prov\x18\x05 \x01(\x0b\x32\".ai.docling.core.v1.ProvenanceItemH\x00\x88\x01\x01\x12\x32\n\x08item_ref\x18\x06 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x01\x88\x01\x01\x42\x07\n\x05_provB\x0b\n\t_item_ref\"n\n\tGraphLink\x12\x31\n\x05label\x18\x01 \x01(\x0e\x32\".ai.docling.core.v1.GraphLinkLabel\x12\x16\n\x0esource_cell_id\x18\x02 \x01(\x05\x12\x16\n\x0etarget_cell_id\x18\x03 \x01(\x05\"\x99\x05\n\x08\x46ormItem\x12\x10\n\x08self_ref\x18\x01 \x01(\t\x12\x30\n\x06parent\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x00\x88\x01\x01\x12-\n\x08\x63hildren\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x37\n\rcontent_layer\x18\x04 \x01(\x0e\x32 .ai.docling.core.v1.ContentLayer\x12\x33\n\x04meta\x18\x05 \x01(\x0b\x32 .ai.docling.core.v1.FloatingMetaH\x01\x88\x01\x01\x12\r\n\x05label\x18\x06 \x01(\t\x12\x30\n\x04prov\x18\x07 \x03(\x0b\x32\".ai.docling.core.v1.ProvenanceItem\x12-\n\x08\x63\x61ptions\x18\x08 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12/\n\nreferences\x18\t \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12.\n\tfootnotes\x18\n \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x30\n\x05image\x18\x0b \x01(\x0b\x32\x1c.ai.docling.core.v1.ImageRefH\x02\x88\x01\x01\x12,\n\x05graph\x18\x0c \x01(\x0b\x32\x1d.ai.docling.core.v1.GraphData\x12.\n\x06source\x18\r \x03(\x0b\x32\x1e.ai.docling.core.v1.SourceType\x12-\n\x08\x63omments\x18\x0e \x03(\x0b\x32\x1b.ai.docling.core.v1.FineRefB\t\n\x07_parentB\x07\n\x05_metaB\x08\n\x06_image\"\xc4\x03\n\x0f\x46ieldRegionItem\x12\x10\n\x08self_ref\x18\x01 \x01(\t\x12\x30\n\x06parent\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x00\x88\x01\x01\x12-\n\x08\x63hildren\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x37\n\rcontent_layer\x18\x04 \x01(\x0e\x32 .ai.docling.core.v1.ContentLayer\x12/\n\x04meta\x18\x05 \x01(\x0b\x32\x1c.ai.docling.core.v1.BaseMetaH\x01\x88\x01\x01\x12/\n\x05label\x18\x06 \x01(\x0e\x32 .ai.docling.core.v1.DocItemLabel\x12\x30\n\x04prov\x18\x07 \x03(\x0b\x32\".ai.docling.core.v1.ProvenanceItem\x12.\n\x06source\x18\x08 \x03(\x0b\x32\x1e.ai.docling.core.v1.SourceType\x12-\n\x08\x63omments\x18\t \x03(\x0b\x32\x1b.ai.docling.core.v1.FineRefB\t\n\x07_parentB\x07\n\x05_meta\"\xbe\x03\n\tFieldItem\x12\x10\n\x08self_ref\x18\x01 \x01(\t\x12\x30\n\x06parent\x18\x02 \x01(\x0b\x32\x1b.ai.docling.core.v1.RefItemH\x00\x88\x01\x01\x12-\n\x08\x63hildren\x18\x03 \x03(\x0b\x32\x1b.ai.docling.core.v1.RefItem\x12\x37\n\rcontent_layer\x18\x04 \x01(\x0e\x32 .ai.docling.core.v1.ContentLayer\x12/\n\x04meta\x18\x05 \x01(\x0b\x32\x1c.ai.docling.core.v1.BaseMetaH\x01\x88\x01\x01\x12/\n\x05label\x18\x06 \x01(\x0e\x32 .ai.docling.core.v1.DocItemLabel\x12\x30\n\x04prov\x18\x07 \x03(\x0b\x32\".ai.docling.core.v1.ProvenanceItem\x12.\n\x06source\x18\x08 \x03(\x0b\x32\x1e.ai.docling.core.v1.SourceType\x12-\n\x08\x63omments\x18\t \x03(\x0b\x32\x1b.ai.docling.core.v1.FineRefB\t\n\x07_parentB\x07\n\x05_meta\"\x7f\n\x08PageItem\x12&\n\x04size\x18\x01 \x01(\x0b\x32\x18.ai.docling.core.v1.Size\x12\x30\n\x05image\x18\x02 \x01(\x0b\x32\x1c.ai.docling.core.v1.ImageRefH\x00\x88\x01\x01\x12\x0f\n\x07page_no\x18\x03 \x01(\x05\x42\x08\n\x06_image*\xb6\x01\n\x0c\x43ontentLayer\x12\x1d\n\x19\x43ONTENT_LAYER_UNSPECIFIED\x10\x00\x12\x16\n\x12\x43ONTENT_LAYER_BODY\x10\x01\x12\x1b\n\x17\x43ONTENT_LAYER_FURNITURE\x10\x02\x12\x1c\n\x18\x43ONTENT_LAYER_BACKGROUND\x10\x03\x12\x1b\n\x17\x43ONTENT_LAYER_INVISIBLE\x10\x04\x12\x17\n\x13\x43ONTENT_LAYER_NOTES\x10\x05*\xcf\x02\n\nGroupLabel\x12\x1b\n\x17GROUP_LABEL_UNSPECIFIED\x10\x00\x12\x14\n\x10GROUP_LABEL_LIST\x10\x01\x12\x1c\n\x18GROUP_LABEL_ORDERED_LIST\x10\x02\x12\x17\n\x13GROUP_LABEL_CHAPTER\x10\x03\x12\x17\n\x13GROUP_LABEL_SECTION\x10\x04\x12\x15\n\x11GROUP_LABEL_SHEET\x10\x05\x12\x15\n\x11GROUP_LABEL_SLIDE\x10\x06\x12\x19\n\x15GROUP_LABEL_FORM_AREA\x10\x07\x12\x1e\n\x1aGROUP_LABEL_KEY_VALUE_AREA\x10\x08\x12\x1f\n\x1bGROUP_LABEL_COMMENT_SECTION\x10\t\x12\x16\n\x12GROUP_LABEL_INLINE\x10\n\x12\x1c\n\x18GROUP_LABEL_PICTURE_AREA\x10\x0b*\xcc\x07\n\x0c\x44ocItemLabel\x12\x1e\n\x1a\x44OC_ITEM_LABEL_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x44OC_ITEM_LABEL_CAPTION\x10\x01\x12\x18\n\x14\x44OC_ITEM_LABEL_CHART\x10\x02\x12$\n DOC_ITEM_LABEL_CHECKBOX_SELECTED\x10\x03\x12&\n\"DOC_ITEM_LABEL_CHECKBOX_UNSELECTED\x10\x04\x12\x17\n\x13\x44OC_ITEM_LABEL_CODE\x10\x05\x12!\n\x1d\x44OC_ITEM_LABEL_DOCUMENT_INDEX\x10\x06\x12\x1e\n\x1a\x44OC_ITEM_LABEL_EMPTY_VALUE\x10\x07\x12\x1b\n\x17\x44OC_ITEM_LABEL_FOOTNOTE\x10\x08\x12\x17\n\x13\x44OC_ITEM_LABEL_FORM\x10\t\x12\x1a\n\x16\x44OC_ITEM_LABEL_FORMULA\x10\n\x12 \n\x1c\x44OC_ITEM_LABEL_GRADING_SCALE\x10\x0b\x12#\n\x1f\x44OC_ITEM_LABEL_HANDWRITTEN_TEXT\x10\x0c\x12#\n\x1f\x44OC_ITEM_LABEL_KEY_VALUE_REGION\x10\r\x12\x1c\n\x18\x44OC_ITEM_LABEL_LIST_ITEM\x10\x0e\x12\x1e\n\x1a\x44OC_ITEM_LABEL_PAGE_FOOTER\x10\x0f\x12\x1e\n\x1a\x44OC_ITEM_LABEL_PAGE_HEADER\x10\x10\x12\x1c\n\x18\x44OC_ITEM_LABEL_PARAGRAPH\x10\x11\x12\x1a\n\x16\x44OC_ITEM_LABEL_PICTURE\x10\x12\x12\x1c\n\x18\x44OC_ITEM_LABEL_REFERENCE\x10\x13\x12!\n\x1d\x44OC_ITEM_LABEL_SECTION_HEADER\x10\x14\x12\x18\n\x14\x44OC_ITEM_LABEL_TABLE\x10\x15\x12\x17\n\x13\x44OC_ITEM_LABEL_TEXT\x10\x16\x12\x18\n\x14\x44OC_ITEM_LABEL_TITLE\x10\x17\x12\x1f\n\x1b\x44OC_ITEM_LABEL_FIELD_REGION\x10\x18\x12 \n\x1c\x44OC_ITEM_LABEL_FIELD_HEADING\x10\x19\x12\x1d\n\x19\x44OC_ITEM_LABEL_FIELD_ITEM\x10\x1a\x12\x1c\n\x18\x44OC_ITEM_LABEL_FIELD_KEY\x10\x1b\x12\x1e\n\x1a\x44OC_ITEM_LABEL_FIELD_VALUE\x10\x1c\x12\x1d\n\x19\x44OC_ITEM_LABEL_FIELD_HINT\x10\x1d\x12\x19\n\x15\x44OC_ITEM_LABEL_MARKER\x10\x1e*W\n\x06Script\x12\x16\n\x12SCRIPT_UNSPECIFIED\x10\x00\x12\x13\n\x0fSCRIPT_BASELINE\x10\x01\x12\x0e\n\nSCRIPT_SUB\x10\x02\x12\x10\n\x0cSCRIPT_SUPER\x10\x03*b\n\x0b\x43oordOrigin\x12\x1c\n\x18\x43OORD_ORIGIN_UNSPECIFIED\x10\x00\x12\x18\n\x14\x43OORD_ORIGIN_TOPLEFT\x10\x01\x12\x1b\n\x17\x43OORD_ORIGIN_BOTTOMLEFT\x10\x02*\xcc\x0e\n\x11\x43odeLanguageLabel\x12#\n\x1f\x43ODE_LANGUAGE_LABEL_UNSPECIFIED\x10\x00\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_ADA\x10\x01\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_AWK\x10\x02\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_BASH\x10\x03\x12\x1a\n\x16\x43ODE_LANGUAGE_LABEL_BC\x10\x04\x12\x19\n\x15\x43ODE_LANGUAGE_LABEL_C\x10\x05\x12\x1f\n\x1b\x43ODE_LANGUAGE_LABEL_C_SHARP\x10\x06\x12#\n\x1f\x43ODE_LANGUAGE_LABEL_C_PLUS_PLUS\x10\x07\x12\x1d\n\x19\x43ODE_LANGUAGE_LABEL_CMAKE\x10\x08\x12\x1d\n\x19\x43ODE_LANGUAGE_LABEL_COBOL\x10\t\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_CSS\x10\n\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_CEYLON\x10\x0b\x12\x1f\n\x1b\x43ODE_LANGUAGE_LABEL_CLOJURE\x10\x0c\x12\x1f\n\x1b\x43ODE_LANGUAGE_LABEL_CRYSTAL\x10\r\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_CUDA\x10\x0e\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_CYTHON\x10\x0f\x12\x19\n\x15\x43ODE_LANGUAGE_LABEL_D\x10\x10\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_DART\x10\x11\x12\x1a\n\x16\x43ODE_LANGUAGE_LABEL_DC\x10\x12\x12\"\n\x1e\x43ODE_LANGUAGE_LABEL_DOCKERFILE\x10\x13\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_ELIXIR\x10\x14\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_ERLANG\x10\x15\x12\x1f\n\x1b\x43ODE_LANGUAGE_LABEL_FORTRAN\x10\x16\x12\x1d\n\x19\x43ODE_LANGUAGE_LABEL_FORTH\x10\x17\x12\x1a\n\x16\x43ODE_LANGUAGE_LABEL_GO\x10\x18\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_HTML\x10\x19\x12\x1f\n\x1b\x43ODE_LANGUAGE_LABEL_HASKELL\x10\x1a\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_HAXE\x10\x1b\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_JAVA\x10\x1c\x12\"\n\x1e\x43ODE_LANGUAGE_LABEL_JAVASCRIPT\x10\x1d\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_JSON\x10\x1e\x12\x1d\n\x19\x43ODE_LANGUAGE_LABEL_JULIA\x10\x1f\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_KOTLIN\x10 \x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_LISP\x10!\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_LUA\x10\"\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_MATLAB\x10#\x12\"\n\x1e\x43ODE_LANGUAGE_LABEL_MOONSCRIPT\x10$\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_NIM\x10%\x12\x1d\n\x19\x43ODE_LANGUAGE_LABEL_OCAML\x10&\x12\"\n\x1e\x43ODE_LANGUAGE_LABEL_OBJECTIVEC\x10\'\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_OCTAVE\x10(\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_PHP\x10)\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_PASCAL\x10*\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_PERL\x10+\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_PROLOG\x10,\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_PYTHON\x10-\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_RACKET\x10.\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_RUBY\x10/\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_RUST\x10\x30\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_SML\x10\x31\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_SQL\x10\x32\x12\x1d\n\x19\x43ODE_LANGUAGE_LABEL_SCALA\x10\x33\x12\x1e\n\x1a\x43ODE_LANGUAGE_LABEL_SCHEME\x10\x34\x12\x1d\n\x19\x43ODE_LANGUAGE_LABEL_SWIFT\x10\x35\x12\"\n\x1e\x43ODE_LANGUAGE_LABEL_TYPESCRIPT\x10\x36\x12\x1f\n\x1b\x43ODE_LANGUAGE_LABEL_UNKNOWN\x10\x37\x12#\n\x1f\x43ODE_LANGUAGE_LABEL_VISUALBASIC\x10\x38\x12\x1b\n\x17\x43ODE_LANGUAGE_LABEL_XML\x10\x39\x12\x1c\n\x18\x43ODE_LANGUAGE_LABEL_YAML\x10:*\x87\x01\n\x0eGraphCellLabel\x12 \n\x1cGRAPH_CELL_LABEL_UNSPECIFIED\x10\x00\x12\x18\n\x14GRAPH_CELL_LABEL_KEY\x10\x01\x12\x1a\n\x16GRAPH_CELL_LABEL_VALUE\x10\x02\x12\x1d\n\x19GRAPH_CELL_LABEL_CHECKBOX\x10\x03*\xad\x01\n\x0eGraphLinkLabel\x12 \n\x1cGRAPH_LINK_LABEL_UNSPECIFIED\x10\x00\x12\x1d\n\x19GRAPH_LINK_LABEL_TO_VALUE\x10\x01\x12\x1b\n\x17GRAPH_LINK_LABEL_TO_KEY\x10\x02\x12\x1e\n\x1aGRAPH_LINK_LABEL_TO_PARENT\x10\x03\x12\x1d\n\x19GRAPH_LINK_LABEL_TO_CHILD\x10\x04\x42,\n\x12\x61i.docling.core.v1B\x14\x44oclingDocumentProtoP\x01\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ai.docling.core.v1.docling_document_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals['DESCRIPTOR']._loaded_options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\022ai.docling.core.v1B\024DoclingDocumentProtoP\001' + _globals['_DOCLINGDOCUMENT_PAGESENTRY']._loaded_options = None + _globals['_DOCLINGDOCUMENT_PAGESENTRY']._serialized_options = b'8\001' + _globals['_BASEMETA_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_BASEMETA_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_SUMMARYMETAFIELD_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_SUMMARYMETAFIELD_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_PICTUREMETA_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_PICTUREMETA_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_DESCRIPTIONMETAFIELD_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_DESCRIPTIONMETAFIELD_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_PICTURECLASSIFICATIONMETAFIELD_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_PICTURECLASSIFICATIONMETAFIELD_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_PICTURECLASSIFICATIONPREDICTION_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_PICTURECLASSIFICATIONPREDICTION_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_MOLECULEMETAFIELD_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_MOLECULEMETAFIELD_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_TABULARCHARTMETAFIELD_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_TABULARCHARTMETAFIELD_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_FLOATINGMETA_CUSTOMFIELDSENTRY']._loaded_options = None + _globals['_FLOATINGMETA_CUSTOMFIELDSENTRY']._serialized_options = b'8\001' + _globals['_CONTENTLAYER']._serialized_start=14639 + _globals['_CONTENTLAYER']._serialized_end=14821 + _globals['_GROUPLABEL']._serialized_start=14824 + _globals['_GROUPLABEL']._serialized_end=15159 + _globals['_DOCITEMLABEL']._serialized_start=15162 + _globals['_DOCITEMLABEL']._serialized_end=16134 + _globals['_SCRIPT']._serialized_start=16136 + _globals['_SCRIPT']._serialized_end=16223 + _globals['_COORDORIGIN']._serialized_start=16225 + _globals['_COORDORIGIN']._serialized_end=16323 + _globals['_CODELANGUAGELABEL']._serialized_start=16326 + _globals['_CODELANGUAGELABEL']._serialized_end=18194 + _globals['_GRAPHCELLLABEL']._serialized_start=18197 + _globals['_GRAPHCELLLABEL']._serialized_end=18332 + _globals['_GRAPHLINKLABEL']._serialized_start=18335 + _globals['_GRAPHLINKLABEL']._serialized_end=18508 + _globals['_DOCLINGDOCUMENT']._serialized_start=96 + _globals['_DOCLINGDOCUMENT']._serialized_end=920 + _globals['_DOCLINGDOCUMENT_PAGESENTRY']._serialized_start=807 + _globals['_DOCLINGDOCUMENT_PAGESENTRY']._serialized_end=881 + _globals['_DOCUMENTORIGIN']._serialized_start=922 + _globals['_DOCUMENTORIGIN']._serialized_end=1021 + _globals['_GROUPITEM']._serialized_start=1024 + _globals['_GROUPITEM']._serialized_end=1351 + _globals['_REFITEM']._serialized_start=1353 + _globals['_REFITEM']._serialized_end=1375 + _globals['_INTSPAN']._serialized_start=1377 + _globals['_INTSPAN']._serialized_end=1414 + _globals['_FLOATPAIR']._serialized_start=1416 + _globals['_FLOATPAIR']._serialized_end=1458 + _globals['_STRINGINTPAIR']._serialized_start=1460 + _globals['_STRINGINTPAIR']._serialized_end=1503 + _globals['_FINEREF']._serialized_start=1505 + _globals['_FINEREF']._serialized_end=1586 + _globals['_TRACKSOURCE']._serialized_start=1589 + _globals['_TRACKSOURCE']._serialized_end=1738 + _globals['_SOURCETYPE']._serialized_start=1740 + _globals['_SOURCETYPE']._serialized_end=1812 + _globals['_BASEMETA']._serialized_start=1815 + _globals['_BASEMETA']._serialized_end=2045 + _globals['_BASEMETA_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_BASEMETA_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_SUMMARYMETAFIELD']._serialized_start=2048 + _globals['_SUMMARYMETAFIELD']._serialized_end=2316 + _globals['_SUMMARYMETAFIELD_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_SUMMARYMETAFIELD_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_FORMATTING']._serialized_start=2319 + _globals['_FORMATTING']._serialized_end=2447 + _globals['_BASETEXTITEM']._serialized_start=2450 + _globals['_BASETEXTITEM']._serialized_end=2902 + _globals['_TEXTITEMBASE']._serialized_start=2905 + _globals['_TEXTITEMBASE']._serialized_end=3492 + _globals['_TITLEITEM']._serialized_start=3494 + _globals['_TITLEITEM']._serialized_end=3553 + _globals['_SECTIONHEADERITEM']._serialized_start=3555 + _globals['_SECTIONHEADERITEM']._serialized_end=3637 + _globals['_FIELDHEADINGITEM']._serialized_start=3639 + _globals['_FIELDHEADINGITEM']._serialized_end=3720 + _globals['_FIELDVALUEITEM']._serialized_start=3722 + _globals['_FIELDVALUEITEM']._serialized_end=3800 + _globals['_LISTITEM']._serialized_start=3802 + _globals['_LISTITEM']._serialized_end=3912 + _globals['_CODEITEM']._serialized_start=3915 + _globals['_CODEITEM']._serialized_end=4378 + _globals['_FORMULAITEM']._serialized_start=4380 + _globals['_FORMULAITEM']._serialized_end=4441 + _globals['_TEXTITEM']._serialized_start=4443 + _globals['_TEXTITEM']._serialized_end=4501 + _globals['_PROVENANCEITEM']._serialized_start=4503 + _globals['_PROVENANCEITEM']._serialized_end=4630 + _globals['_BOUNDINGBOX']._serialized_start=4633 + _globals['_BOUNDINGBOX']._serialized_end=4819 + _globals['_IMAGEREF']._serialized_start=4821 + _globals['_IMAGEREF']._serialized_end=4915 + _globals['_SIZE']._serialized_start=4917 + _globals['_SIZE']._serialized_end=4954 + _globals['_DESCRIPTIONANNOTATION']._serialized_start=4956 + _globals['_DESCRIPTIONANNOTATION']._serialized_end=5027 + _globals['_MISCANNOTATION']._serialized_start=5029 + _globals['_MISCANNOTATION']._serialized_end=5101 + _globals['_PICTURECLASSIFICATIONCLASS']._serialized_start=5103 + _globals['_PICTURECLASSIFICATIONCLASS']._serialized_end=5171 + _globals['_PICTURECLASSIFICATIONDATA']._serialized_start=5174 + _globals['_PICTURECLASSIFICATIONDATA']._serialized_end=5310 + _globals['_PICTUREMOLECULEDATA']._serialized_start=5313 + _globals['_PICTUREMOLECULEDATA']._serialized_end=5474 + _globals['_PICTURETABULARCHARTDATA']._serialized_start=5476 + _globals['_PICTURETABULARCHARTDATA']._serialized_end=5581 + _globals['_CHARTLINE']._serialized_start=5583 + _globals['_CHARTLINE']._serialized_end=5656 + _globals['_PICTURELINECHARTDATA']._serialized_start=5659 + _globals['_PICTURELINECHARTDATA']._serialized_end=5800 + _globals['_CHARTBAR']._serialized_start=5802 + _globals['_CHARTBAR']._serialized_end=5843 + _globals['_PICTUREBARCHARTDATA']._serialized_start=5846 + _globals['_PICTUREBARCHARTDATA']._serialized_end=5984 + _globals['_CHARTSTACKEDBAR']._serialized_start=5986 + _globals['_CHARTSTACKEDBAR']._serialized_end=6069 + _globals['_PICTURESTACKEDBARCHARTDATA']._serialized_start=6072 + _globals['_PICTURESTACKEDBARCHARTDATA']._serialized_end=6232 + _globals['_CHARTSLICE']._serialized_start=6234 + _globals['_CHARTSLICE']._serialized_end=6276 + _globals['_PICTUREPIECHARTDATA']._serialized_start=6278 + _globals['_PICTUREPIECHARTDATA']._serialized_end=6376 + _globals['_CHARTPOINT']._serialized_start=6378 + _globals['_CHARTPOINT']._serialized_end=6436 + _globals['_PICTURESCATTERCHARTDATA']._serialized_start=6439 + _globals['_PICTURESCATTERCHARTDATA']._serialized_end=6585 + _globals['_PICTUREANNOTATION']._serialized_start=6588 + _globals['_PICTUREANNOTATION']._serialized_end=7278 + _globals['_TABLEANNOTATION']._serialized_start=7281 + _globals['_TABLEANNOTATION']._serialized_end=7430 + _globals['_PICTUREITEM']._serialized_start=7433 + _globals['_PICTUREITEM']._serialized_end=8116 + _globals['_PICTUREMETA']._serialized_start=8119 + _globals['_PICTUREMETA']._serialized_end=8703 + _globals['_PICTUREMETA_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_PICTUREMETA_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_DESCRIPTIONMETAFIELD']._serialized_start=8706 + _globals['_DESCRIPTIONMETAFIELD']._serialized_end=8982 + _globals['_DESCRIPTIONMETAFIELD_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_DESCRIPTIONMETAFIELD_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_PICTURECLASSIFICATIONMETAFIELD']._serialized_start=8985 + _globals['_PICTURECLASSIFICATIONMETAFIELD']._serialized_end=9261 + _globals['_PICTURECLASSIFICATIONMETAFIELD_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_PICTURECLASSIFICATIONMETAFIELD_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_PICTURECLASSIFICATIONPREDICTION']._serialized_start=9264 + _globals['_PICTURECLASSIFICATIONPREDICTION']._serialized_end=9568 + _globals['_PICTURECLASSIFICATIONPREDICTION_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_PICTURECLASSIFICATIONPREDICTION_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_MOLECULEMETAFIELD']._serialized_start=9571 + _globals['_MOLECULEMETAFIELD']._serialized_end=9840 + _globals['_MOLECULEMETAFIELD_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_MOLECULEMETAFIELD_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_TABULARCHARTMETAFIELD']._serialized_start=9843 + _globals['_TABULARCHARTMETAFIELD']._serialized_end=10188 + _globals['_TABULARCHARTMETAFIELD_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_TABULARCHARTMETAFIELD_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_FLOATINGMETA']._serialized_start=10191 + _globals['_FLOATINGMETA']._serialized_end=10513 + _globals['_FLOATINGMETA_CUSTOMFIELDSENTRY']._serialized_start=1958 + _globals['_FLOATINGMETA_CUSTOMFIELDSENTRY']._serialized_end=2033 + _globals['_TABLEITEM']._serialized_start=10516 + _globals['_TABLEITEM']._serialized_end=11239 + _globals['_TABLEDATA']._serialized_start=11242 + _globals['_TABLEDATA']._serialized_end=11385 + _globals['_TABLEROW']._serialized_start=11387 + _globals['_TABLEROW']._serialized_end=11443 + _globals['_TABLECELL']._serialized_start=11446 + _globals['_TABLECELL']._serialized_end=11807 + _globals['_KEYVALUEITEM']._serialized_start=11810 + _globals['_KEYVALUEITEM']._serialized_end=12479 + _globals['_GRAPHDATA']._serialized_start=12481 + _globals['_GRAPHDATA']._serialized_end=12584 + _globals['_GRAPHCELL']._serialized_start=12587 + _globals['_GRAPHCELL']._serialized_end=12823 + _globals['_GRAPHLINK']._serialized_start=12825 + _globals['_GRAPHLINK']._serialized_end=12935 + _globals['_FORMITEM']._serialized_start=12938 + _globals['_FORMITEM']._serialized_end=13603 + _globals['_FIELDREGIONITEM']._serialized_start=13606 + _globals['_FIELDREGIONITEM']._serialized_end=14058 + _globals['_FIELDITEM']._serialized_start=14061 + _globals['_FIELDITEM']._serialized_end=14507 + _globals['_PAGEITEM']._serialized_start=14509 + _globals['_PAGEITEM']._serialized_end=14636 +# @@protoc_insertion_point(module_scope) diff --git a/docling_core/utils/conversion.py b/docling_core/utils/conversion.py new file mode 100644 index 00000000..ea056e9d --- /dev/null +++ b/docling_core/utils/conversion.py @@ -0,0 +1,1166 @@ +from __future__ import annotations + +import warnings +from enum import Enum +from typing import Any, Optional + +from google.protobuf import struct_pb2 + +from docling_core.types.doc.base import CoordOrigin +from docling_core.types.doc.document import ( + BaseMeta, + BaseSource, + BoundingBox, + CodeItem, + ContentLayer, + DescriptionAnnotation, + DescriptionMetaField, + DoclingDocument, + DocumentOrigin, + FieldHeadingItem, + FieldItem, + FieldRegionItem, + FieldValueItem, + FineRef, + FloatingMeta, + Formatting, + FormItem, + FormulaItem, + GraphCell, + GraphData, + GraphLink, + GroupItem, + ImageRef, + KeyValueItem, + ListItem, + MiscAnnotation, + MoleculeMetaField, + PageItem, + PictureBarChartData, + PictureClassificationClass, + PictureClassificationData, + PictureClassificationMetaField, + PictureClassificationPrediction, + PictureItem, + PictureLineChartData, + PictureMeta, + PictureMoleculeData, + PicturePieChartData, + PictureScatterChartData, + PictureStackedBarChartData, + PictureTabularChartData, + ProvenanceItem, + RefItem, + RichTableCell, + Script, + SectionHeaderItem, + Size, + SummaryMetaField, + TableCell, + TableData, + TableItem, + TabularChartMetaField, + TextItem, + TitleItem, + TrackSource, +) +from docling_core.types.doc.labels import ( + CodeLanguageLabel, + DocItemLabel, + GraphCellLabel, + GraphLinkLabel, + GroupLabel, +) +from docling_core.proto.gen.ai.docling.core.v1 import docling_document_pb2 as pb2 + + +def _enum_value(value: Enum | str | None, mapping: dict[str, int], default: int) -> int: + if value is None: + return default + if isinstance(value, Enum): + key = value.value + else: + key = value + return mapping.get(str(key), default) + + +_CONTENT_LAYER_MAP = { + ContentLayer.BODY.value: pb2.CONTENT_LAYER_BODY, + ContentLayer.FURNITURE.value: pb2.CONTENT_LAYER_FURNITURE, + ContentLayer.BACKGROUND.value: pb2.CONTENT_LAYER_BACKGROUND, + ContentLayer.INVISIBLE.value: pb2.CONTENT_LAYER_INVISIBLE, + ContentLayer.NOTES.value: pb2.CONTENT_LAYER_NOTES, +} + +_GROUP_LABEL_MAP = { + GroupLabel.UNSPECIFIED.value: pb2.GROUP_LABEL_UNSPECIFIED, + GroupLabel.LIST.value: pb2.GROUP_LABEL_LIST, + GroupLabel.ORDERED_LIST.value: pb2.GROUP_LABEL_ORDERED_LIST, + GroupLabel.CHAPTER.value: pb2.GROUP_LABEL_CHAPTER, + GroupLabel.SECTION.value: pb2.GROUP_LABEL_SECTION, + GroupLabel.SHEET.value: pb2.GROUP_LABEL_SHEET, + GroupLabel.SLIDE.value: pb2.GROUP_LABEL_SLIDE, + GroupLabel.FORM_AREA.value: pb2.GROUP_LABEL_FORM_AREA, + GroupLabel.KEY_VALUE_AREA.value: pb2.GROUP_LABEL_KEY_VALUE_AREA, + GroupLabel.COMMENT_SECTION.value: pb2.GROUP_LABEL_COMMENT_SECTION, + GroupLabel.INLINE.value: pb2.GROUP_LABEL_INLINE, + GroupLabel.PICTURE_AREA.value: pb2.GROUP_LABEL_PICTURE_AREA, +} + +_DOC_ITEM_LABEL_MAP = { + DocItemLabel.CAPTION.value: pb2.DOC_ITEM_LABEL_CAPTION, + DocItemLabel.CHART.value: pb2.DOC_ITEM_LABEL_CHART, + DocItemLabel.CHECKBOX_SELECTED.value: pb2.DOC_ITEM_LABEL_CHECKBOX_SELECTED, + DocItemLabel.CHECKBOX_UNSELECTED.value: pb2.DOC_ITEM_LABEL_CHECKBOX_UNSELECTED, + DocItemLabel.CODE.value: pb2.DOC_ITEM_LABEL_CODE, + DocItemLabel.DOCUMENT_INDEX.value: pb2.DOC_ITEM_LABEL_DOCUMENT_INDEX, + DocItemLabel.EMPTY_VALUE.value: pb2.DOC_ITEM_LABEL_EMPTY_VALUE, + DocItemLabel.FOOTNOTE.value: pb2.DOC_ITEM_LABEL_FOOTNOTE, + DocItemLabel.FORM.value: pb2.DOC_ITEM_LABEL_FORM, + DocItemLabel.FORMULA.value: pb2.DOC_ITEM_LABEL_FORMULA, + DocItemLabel.GRADING_SCALE.value: pb2.DOC_ITEM_LABEL_GRADING_SCALE, + DocItemLabel.HANDWRITTEN_TEXT.value: pb2.DOC_ITEM_LABEL_HANDWRITTEN_TEXT, + DocItemLabel.KEY_VALUE_REGION.value: pb2.DOC_ITEM_LABEL_KEY_VALUE_REGION, + DocItemLabel.LIST_ITEM.value: pb2.DOC_ITEM_LABEL_LIST_ITEM, + DocItemLabel.PAGE_FOOTER.value: pb2.DOC_ITEM_LABEL_PAGE_FOOTER, + DocItemLabel.PAGE_HEADER.value: pb2.DOC_ITEM_LABEL_PAGE_HEADER, + DocItemLabel.PARAGRAPH.value: pb2.DOC_ITEM_LABEL_PARAGRAPH, + DocItemLabel.PICTURE.value: pb2.DOC_ITEM_LABEL_PICTURE, + DocItemLabel.REFERENCE.value: pb2.DOC_ITEM_LABEL_REFERENCE, + DocItemLabel.SECTION_HEADER.value: pb2.DOC_ITEM_LABEL_SECTION_HEADER, + DocItemLabel.TABLE.value: pb2.DOC_ITEM_LABEL_TABLE, + DocItemLabel.TEXT.value: pb2.DOC_ITEM_LABEL_TEXT, + DocItemLabel.TITLE.value: pb2.DOC_ITEM_LABEL_TITLE, + DocItemLabel.FIELD_REGION.value: pb2.DOC_ITEM_LABEL_FIELD_REGION, + DocItemLabel.FIELD_HEADING.value: pb2.DOC_ITEM_LABEL_FIELD_HEADING, + DocItemLabel.FIELD_ITEM.value: pb2.DOC_ITEM_LABEL_FIELD_ITEM, + DocItemLabel.FIELD_KEY.value: pb2.DOC_ITEM_LABEL_FIELD_KEY, + DocItemLabel.FIELD_VALUE.value: pb2.DOC_ITEM_LABEL_FIELD_VALUE, + DocItemLabel.FIELD_HINT.value: pb2.DOC_ITEM_LABEL_FIELD_HINT, + DocItemLabel.MARKER.value: pb2.DOC_ITEM_LABEL_MARKER, +} + +_SCRIPT_MAP = { + Script.BASELINE.value: pb2.SCRIPT_BASELINE, + Script.SUB.value: pb2.SCRIPT_SUB, + Script.SUPER.value: pb2.SCRIPT_SUPER, +} + +_GRAPH_CELL_LABEL_MAP = { + GraphCellLabel.UNSPECIFIED.value: pb2.GRAPH_CELL_LABEL_UNSPECIFIED, + GraphCellLabel.KEY.value: pb2.GRAPH_CELL_LABEL_KEY, + GraphCellLabel.VALUE.value: pb2.GRAPH_CELL_LABEL_VALUE, + GraphCellLabel.CHECKBOX.value: pb2.GRAPH_CELL_LABEL_CHECKBOX, +} + +_GRAPH_LINK_LABEL_MAP = { + GraphLinkLabel.UNSPECIFIED.value: pb2.GRAPH_LINK_LABEL_UNSPECIFIED, + GraphLinkLabel.TO_VALUE.value: pb2.GRAPH_LINK_LABEL_TO_VALUE, + GraphLinkLabel.TO_KEY.value: pb2.GRAPH_LINK_LABEL_TO_KEY, + GraphLinkLabel.TO_PARENT.value: pb2.GRAPH_LINK_LABEL_TO_PARENT, + GraphLinkLabel.TO_CHILD.value: pb2.GRAPH_LINK_LABEL_TO_CHILD, +} + +_COORD_ORIGIN_MAP = { + CoordOrigin.TOPLEFT.value: pb2.COORD_ORIGIN_TOPLEFT, + CoordOrigin.BOTTOMLEFT.value: pb2.COORD_ORIGIN_BOTTOMLEFT, +} + +_CODE_LANGUAGE_MAP = { + CodeLanguageLabel.ADA.value: pb2.CODE_LANGUAGE_LABEL_ADA, + CodeLanguageLabel.AWK.value: pb2.CODE_LANGUAGE_LABEL_AWK, + CodeLanguageLabel.BASH.value: pb2.CODE_LANGUAGE_LABEL_BASH, + CodeLanguageLabel.BC.value: pb2.CODE_LANGUAGE_LABEL_BC, + CodeLanguageLabel.C.value: pb2.CODE_LANGUAGE_LABEL_C, + CodeLanguageLabel.C_SHARP.value: pb2.CODE_LANGUAGE_LABEL_C_SHARP, + CodeLanguageLabel.C_PLUS_PLUS.value: pb2.CODE_LANGUAGE_LABEL_C_PLUS_PLUS, + CodeLanguageLabel.CMAKE.value: pb2.CODE_LANGUAGE_LABEL_CMAKE, + CodeLanguageLabel.COBOL.value: pb2.CODE_LANGUAGE_LABEL_COBOL, + CodeLanguageLabel.CSS.value: pb2.CODE_LANGUAGE_LABEL_CSS, + CodeLanguageLabel.CEYLON.value: pb2.CODE_LANGUAGE_LABEL_CEYLON, + CodeLanguageLabel.CLOJURE.value: pb2.CODE_LANGUAGE_LABEL_CLOJURE, + CodeLanguageLabel.CRYSTAL.value: pb2.CODE_LANGUAGE_LABEL_CRYSTAL, + CodeLanguageLabel.CUDA.value: pb2.CODE_LANGUAGE_LABEL_CUDA, + CodeLanguageLabel.CYTHON.value: pb2.CODE_LANGUAGE_LABEL_CYTHON, + CodeLanguageLabel.D.value: pb2.CODE_LANGUAGE_LABEL_D, + CodeLanguageLabel.DART.value: pb2.CODE_LANGUAGE_LABEL_DART, + CodeLanguageLabel.DC.value: pb2.CODE_LANGUAGE_LABEL_DC, + CodeLanguageLabel.DOCKERFILE.value: pb2.CODE_LANGUAGE_LABEL_DOCKERFILE, + CodeLanguageLabel.ELIXIR.value: pb2.CODE_LANGUAGE_LABEL_ELIXIR, + CodeLanguageLabel.ERLANG.value: pb2.CODE_LANGUAGE_LABEL_ERLANG, + CodeLanguageLabel.FORTRAN.value: pb2.CODE_LANGUAGE_LABEL_FORTRAN, + CodeLanguageLabel.FORTH.value: pb2.CODE_LANGUAGE_LABEL_FORTH, + CodeLanguageLabel.GO.value: pb2.CODE_LANGUAGE_LABEL_GO, + CodeLanguageLabel.HTML.value: pb2.CODE_LANGUAGE_LABEL_HTML, + CodeLanguageLabel.HASKELL.value: pb2.CODE_LANGUAGE_LABEL_HASKELL, + CodeLanguageLabel.HAXE.value: pb2.CODE_LANGUAGE_LABEL_HAXE, + CodeLanguageLabel.JAVA.value: pb2.CODE_LANGUAGE_LABEL_JAVA, + CodeLanguageLabel.JAVASCRIPT.value: pb2.CODE_LANGUAGE_LABEL_JAVASCRIPT, + CodeLanguageLabel.JSON.value: pb2.CODE_LANGUAGE_LABEL_JSON, + CodeLanguageLabel.JULIA.value: pb2.CODE_LANGUAGE_LABEL_JULIA, + CodeLanguageLabel.KOTLIN.value: pb2.CODE_LANGUAGE_LABEL_KOTLIN, + CodeLanguageLabel.LISP.value: pb2.CODE_LANGUAGE_LABEL_LISP, + CodeLanguageLabel.LUA.value: pb2.CODE_LANGUAGE_LABEL_LUA, + CodeLanguageLabel.MATLAB.value: pb2.CODE_LANGUAGE_LABEL_MATLAB, + CodeLanguageLabel.MOONSCRIPT.value: pb2.CODE_LANGUAGE_LABEL_MOONSCRIPT, + CodeLanguageLabel.NIM.value: pb2.CODE_LANGUAGE_LABEL_NIM, + CodeLanguageLabel.OCAML.value: pb2.CODE_LANGUAGE_LABEL_OCAML, + CodeLanguageLabel.OBJECTIVEC.value: pb2.CODE_LANGUAGE_LABEL_OBJECTIVEC, + CodeLanguageLabel.OCTAVE.value: pb2.CODE_LANGUAGE_LABEL_OCTAVE, + CodeLanguageLabel.PHP.value: pb2.CODE_LANGUAGE_LABEL_PHP, + CodeLanguageLabel.PASCAL.value: pb2.CODE_LANGUAGE_LABEL_PASCAL, + CodeLanguageLabel.PERL.value: pb2.CODE_LANGUAGE_LABEL_PERL, + CodeLanguageLabel.PROLOG.value: pb2.CODE_LANGUAGE_LABEL_PROLOG, + CodeLanguageLabel.PYTHON.value: pb2.CODE_LANGUAGE_LABEL_PYTHON, + CodeLanguageLabel.RACKET.value: pb2.CODE_LANGUAGE_LABEL_RACKET, + CodeLanguageLabel.RUBY.value: pb2.CODE_LANGUAGE_LABEL_RUBY, + CodeLanguageLabel.RUST.value: pb2.CODE_LANGUAGE_LABEL_RUST, + CodeLanguageLabel.SML.value: pb2.CODE_LANGUAGE_LABEL_SML, + CodeLanguageLabel.SQL.value: pb2.CODE_LANGUAGE_LABEL_SQL, + CodeLanguageLabel.SCALA.value: pb2.CODE_LANGUAGE_LABEL_SCALA, + CodeLanguageLabel.SCHEME.value: pb2.CODE_LANGUAGE_LABEL_SCHEME, + CodeLanguageLabel.SWIFT.value: pb2.CODE_LANGUAGE_LABEL_SWIFT, + CodeLanguageLabel.TYPESCRIPT.value: pb2.CODE_LANGUAGE_LABEL_TYPESCRIPT, + CodeLanguageLabel.UNKNOWN.value: pb2.CODE_LANGUAGE_LABEL_UNKNOWN, + CodeLanguageLabel.VISUALBASIC.value: pb2.CODE_LANGUAGE_LABEL_VISUALBASIC, + CodeLanguageLabel.XML.value: pb2.CODE_LANGUAGE_LABEL_XML, + CodeLanguageLabel.YAML.value: pb2.CODE_LANGUAGE_LABEL_YAML, +} + + +def _to_ref(ref: Optional[RefItem]) -> Optional[pb2.RefItem]: + if ref is None: + return None + return pb2.RefItem(ref=ref.cref) + + +def _to_struct_value(value: Any) -> struct_pb2.Value: + msg = struct_pb2.Value() + if value is None: + msg.null_value = struct_pb2.NullValue.NULL_VALUE + return msg + if isinstance(value, bool): + msg.bool_value = value + return msg + if isinstance(value, (int, float)): + msg.number_value = float(value) + return msg + if isinstance(value, str): + msg.string_value = value + return msg + if isinstance(value, dict): + struct_msg = struct_pb2.Struct() + for key, item in value.items(): + if not isinstance(key, str): + raise TypeError("Custom field keys must be strings.") + struct_msg.fields[key].CopyFrom(_to_struct_value(item)) + msg.struct_value.CopyFrom(struct_msg) + return msg + if isinstance(value, (list, tuple)): + list_msg = struct_pb2.ListValue() + for item in value: + list_msg.values.add().CopyFrom(_to_struct_value(item)) + msg.list_value.CopyFrom(list_msg) + return msg + raise TypeError(f"Unsupported custom field type: {type(value)!r}") + + +def _apply_custom_fields(msg: Any, model: Any) -> None: + if model is None or not hasattr(model, "get_custom_part"): + return + custom = model.get_custom_part() + if not custom: + return + for key, value in custom.items(): + msg.custom_fields[key].CopyFrom(_to_struct_value(value)) + + +def _to_fine_ref(ref: FineRef) -> pb2.FineRef: + msg = pb2.FineRef(ref=ref.cref) + if ref.range is not None: + msg.range.CopyFrom(pb2.IntSpan(start=int(ref.range[0]), end=int(ref.range[1]))) + return msg + + +def _to_track_source(source: TrackSource) -> pb2.TrackSource: + msg = pb2.TrackSource(start_time=source.start_time, end_time=source.end_time) + if source.identifier is not None: + msg.identifier = source.identifier + if source.voice is not None: + msg.voice = source.voice + if source.kind is not None: + msg.kind = source.kind + return msg + + +def _to_source_type(source: BaseSource) -> pb2.SourceType: + msg = pb2.SourceType() + if isinstance(source, TrackSource): + msg.track.CopyFrom(_to_track_source(source)) + else: + raise TypeError(f"Unsupported source type: {type(source)!r}") + return msg + + +def _to_summary_meta(meta: SummaryMetaField) -> pb2.SummaryMetaField: + msg = pb2.SummaryMetaField(text=meta.text) + if meta.confidence is not None: + msg.confidence = meta.confidence + if meta.created_by is not None: + msg.created_by = str(meta.created_by) + _apply_custom_fields(msg, meta) + return msg + + +def _to_base_meta(meta: Optional[BaseMeta]) -> Optional[pb2.BaseMeta]: + if meta is None: + return None + if meta.summary is None and not meta.get_custom_part(): + return None + msg = pb2.BaseMeta() + if meta.summary is not None: + msg.summary.CopyFrom(_to_summary_meta(meta.summary)) + _apply_custom_fields(msg, meta) + return msg + + +def _to_description_meta(meta: DescriptionMetaField) -> pb2.DescriptionMetaField: + msg = pb2.DescriptionMetaField(text=meta.text) + if meta.confidence is not None: + msg.confidence = meta.confidence + if meta.created_by is not None: + msg.created_by = str(meta.created_by) + _apply_custom_fields(msg, meta) + return msg + + +def _to_picture_classification_prediction( + pred: PictureClassificationPrediction, +) -> pb2.PictureClassificationPrediction: + msg = pb2.PictureClassificationPrediction(class_name=pred.class_name) + if pred.confidence is not None: + msg.confidence = pred.confidence + if pred.created_by is not None: + msg.created_by = str(pred.created_by) + _apply_custom_fields(msg, pred) + return msg + + +def _to_picture_classification_meta( + meta: PictureClassificationMetaField, +) -> pb2.PictureClassificationMetaField: + msg = pb2.PictureClassificationMetaField() + msg.predictions.extend( + [_to_picture_classification_prediction(p) for p in meta.predictions] + ) + _apply_custom_fields(msg, meta) + return msg + + +def _to_molecule_meta(meta: MoleculeMetaField) -> pb2.MoleculeMetaField: + msg = pb2.MoleculeMetaField(smi=meta.smi) + if meta.confidence is not None: + msg.confidence = meta.confidence + if meta.created_by is not None: + msg.created_by = str(meta.created_by) + _apply_custom_fields(msg, meta) + return msg + + +def _to_tabular_chart_meta(meta: TabularChartMetaField) -> pb2.TabularChartMetaField: + msg = pb2.TabularChartMetaField() + if meta.confidence is not None: + msg.confidence = meta.confidence + if meta.created_by is not None: + msg.created_by = str(meta.created_by) + if meta.title is not None: + msg.title = meta.title + msg.chart_data.CopyFrom(_to_table_data(meta.chart_data)) + _apply_custom_fields(msg, meta) + return msg + + +def _to_floating_meta(meta: Optional[FloatingMeta]) -> Optional[pb2.FloatingMeta]: + if meta is None: + return None + msg = pb2.FloatingMeta() + if meta.summary is not None: + msg.summary.CopyFrom(_to_summary_meta(meta.summary)) + if meta.description is not None: + msg.description.CopyFrom(_to_description_meta(meta.description)) + _apply_custom_fields(msg, meta) + return msg + + +def _to_picture_meta(meta: Optional[PictureMeta]) -> Optional[pb2.PictureMeta]: + if meta is None: + return None + msg = pb2.PictureMeta() + if meta.summary is not None: + msg.summary.CopyFrom(_to_summary_meta(meta.summary)) + if meta.description is not None: + msg.description.CopyFrom(_to_description_meta(meta.description)) + if meta.classification is not None: + msg.classification.CopyFrom( + _to_picture_classification_meta(meta.classification) + ) + if meta.molecule is not None: + msg.molecule.CopyFrom(_to_molecule_meta(meta.molecule)) + if meta.tabular_chart is not None: + msg.tabular_chart.CopyFrom(_to_tabular_chart_meta(meta.tabular_chart)) + _apply_custom_fields(msg, meta) + return msg + + +def _to_float_pair(pair: tuple) -> pb2.FloatPair: + return pb2.FloatPair(first=float(pair[0]), second=float(pair[1])) + + +def _to_string_int_pair(pair: tuple) -> pb2.StringIntPair: + return pb2.StringIntPair(key=str(pair[0]), value=int(pair[1])) + + +def _to_picture_annotation(annotation) -> pb2.PictureAnnotation: + """Convert a Pydantic picture annotation to its proto oneof wrapper.""" + msg = pb2.PictureAnnotation() + if isinstance(annotation, DescriptionAnnotation): + msg.description.CopyFrom( + pb2.DescriptionAnnotation( + kind=annotation.kind, + text=annotation.text, + provenance=annotation.provenance, + ) + ) + elif isinstance(annotation, MiscAnnotation): + misc = pb2.MiscAnnotation(kind=annotation.kind) + if annotation.content: + struct = struct_pb2.Struct() + for k, v in annotation.content.items(): + struct.fields[str(k)].CopyFrom(_to_struct_value(v)) + misc.content.CopyFrom(struct) + msg.misc.CopyFrom(misc) + elif isinstance(annotation, PictureClassificationData): + msg.classification.CopyFrom( + pb2.PictureClassificationData( + kind=annotation.kind, + provenance=annotation.provenance, + predicted_classes=[ + pb2.PictureClassificationClass( + class_name=c.class_name, + confidence=c.confidence, + ) + for c in annotation.predicted_classes + ], + ) + ) + elif isinstance(annotation, PictureMoleculeData): + mol = pb2.PictureMoleculeData( + kind=annotation.kind, + smi=annotation.smi, + confidence=annotation.confidence, + class_name=annotation.class_name, + provenance=annotation.provenance, + ) + if annotation.segmentation: + mol.segmentation.extend( + [_to_float_pair(p) for p in annotation.segmentation] + ) + msg.molecule.CopyFrom(mol) + elif isinstance(annotation, PictureTabularChartData): + msg.tabular_chart.CopyFrom( + pb2.PictureTabularChartData( + kind=annotation.kind, + title=annotation.title, + chart_data=_to_table_data(annotation.chart_data), + ) + ) + elif isinstance(annotation, PictureLineChartData): + msg.line_chart.CopyFrom( + pb2.PictureLineChartData( + kind=annotation.kind, + title=annotation.title, + x_axis_label=annotation.x_axis_label, + y_axis_label=annotation.y_axis_label, + lines=[ + pb2.ChartLine( + label=line.label, + values=[_to_float_pair(v) for v in line.values], + ) + for line in annotation.lines + ], + ) + ) + elif isinstance(annotation, PictureBarChartData): + msg.bar_chart.CopyFrom( + pb2.PictureBarChartData( + kind=annotation.kind, + title=annotation.title, + x_axis_label=annotation.x_axis_label, + y_axis_label=annotation.y_axis_label, + bars=[ + pb2.ChartBar(label=bar.label, values=bar.values) + for bar in annotation.bars + ], + ) + ) + elif isinstance(annotation, PictureStackedBarChartData): + msg.stacked_bar_chart.CopyFrom( + pb2.PictureStackedBarChartData( + kind=annotation.kind, + title=annotation.title, + x_axis_label=annotation.x_axis_label, + y_axis_label=annotation.y_axis_label, + stacked_bars=[ + pb2.ChartStackedBar( + label=list(sb.label), + values=[_to_string_int_pair(v) for v in sb.values], + ) + for sb in annotation.stacked_bars + ], + ) + ) + elif isinstance(annotation, PicturePieChartData): + msg.pie_chart.CopyFrom( + pb2.PicturePieChartData( + kind=annotation.kind, + title=annotation.title, + slices=[ + pb2.ChartSlice(label=s.label, value=s.value) + for s in annotation.slices + ], + ) + ) + elif isinstance(annotation, PictureScatterChartData): + msg.scatter_chart.CopyFrom( + pb2.PictureScatterChartData( + kind=annotation.kind, + title=annotation.title, + x_axis_label=annotation.x_axis_label, + y_axis_label=annotation.y_axis_label, + points=[ + pb2.ChartPoint(value=_to_float_pair(p.value)) + for p in annotation.points + ], + ) + ) + else: + raise TypeError(f"Unsupported picture annotation type: {type(annotation)!r}") + return msg + + +def _to_table_annotation(annotation) -> pb2.TableAnnotation: + """Convert a Pydantic table annotation to its proto oneof wrapper.""" + msg = pb2.TableAnnotation() + if isinstance(annotation, DescriptionAnnotation): + msg.description.CopyFrom( + pb2.DescriptionAnnotation( + kind=annotation.kind, + text=annotation.text, + provenance=annotation.provenance, + ) + ) + elif isinstance(annotation, MiscAnnotation): + misc = pb2.MiscAnnotation(kind=annotation.kind) + if annotation.content: + struct = struct_pb2.Struct() + for k, v in annotation.content.items(): + struct.fields[str(k)].CopyFrom(_to_struct_value(v)) + misc.content.CopyFrom(struct) + msg.misc.CopyFrom(misc) + else: + raise TypeError(f"Unsupported table annotation type: {type(annotation)!r}") + return msg + + +def _annotations_from_floating_meta( + meta: Optional[FloatingMeta], +) -> list[DescriptionAnnotation]: + """Build table annotation list from meta (avoids deprecated item.annotations).""" + if meta is None: + return [] + out: list[DescriptionAnnotation] = [] + if meta.description is not None: + out.append( + DescriptionAnnotation( + kind="description", + text=meta.description.text, + provenance=meta.description.created_by or "", + ) + ) + return out + + +def _annotations_from_picture_meta( + meta: Optional[PictureMeta], +) -> list[ + DescriptionAnnotation + | PictureClassificationData + | PictureMoleculeData + | PictureTabularChartData +]: + """Build picture annotation list from meta (avoids deprecated item.annotations).""" + if meta is None: + return [] + out: list[ + DescriptionAnnotation + | PictureClassificationData + | PictureMoleculeData + | PictureTabularChartData + ] = [] + if meta.description is not None: + out.append( + DescriptionAnnotation( + kind="description", + text=meta.description.text, + provenance=meta.description.created_by or "", + ) + ) + if meta.classification is not None: + out.append( + PictureClassificationData( + kind="classification", + provenance="", + predicted_classes=[ + PictureClassificationClass( + class_name=p.class_name, + confidence=getattr(p, "confidence", 0.0) or 0.0, + ) + for p in meta.classification.predictions + ], + ) + ) + if meta.molecule is not None: + out.append( + PictureMoleculeData( + kind="molecule_data", + smi=meta.molecule.smi, + confidence=meta.molecule.confidence or 0.0, + class_name="", + segmentation=[], + provenance=meta.molecule.created_by or "", + ) + ) + if meta.tabular_chart is not None: + out.append( + PictureTabularChartData( + kind="tabular_chart_data", + title=meta.tabular_chart.title or "", + chart_data=meta.tabular_chart.chart_data, + ) + ) + return out + + +def _to_formatting(fmt: Optional[Formatting]) -> Optional[pb2.Formatting]: + if fmt is None: + return None + msg = pb2.Formatting( + bold=fmt.bold, + italic=fmt.italic, + underline=fmt.underline, + strikethrough=fmt.strikethrough, + script=_enum_value(fmt.script, _SCRIPT_MAP, pb2.SCRIPT_UNSPECIFIED), + ) + return msg + + +def _to_bbox(bbox: Optional[BoundingBox]) -> Optional[pb2.BoundingBox]: + if bbox is None: + return None + msg = pb2.BoundingBox(l=bbox.l, t=bbox.t, r=bbox.r, b=bbox.b) + if bbox.coord_origin is not None: + key = ( + bbox.coord_origin.value + if isinstance(bbox.coord_origin, Enum) + else str(bbox.coord_origin) + ) + enum_val = _COORD_ORIGIN_MAP.get(str(key)) + if enum_val is None: + msg.coord_origin = pb2.COORD_ORIGIN_UNSPECIFIED + msg.coord_origin_raw = str(key) + else: + msg.coord_origin = enum_val + return msg + + +def _to_size(size: Size) -> pb2.Size: + return pb2.Size(width=size.width, height=size.height) + + +def _to_image_ref(image: Optional[ImageRef]) -> Optional[pb2.ImageRef]: + if image is None: + return None + msg = pb2.ImageRef( + mimetype=image.mimetype, + dpi=image.dpi, + size=_to_size(image.size), + uri=str(image.uri), + ) + return msg + + +def _to_provenance_item(prov: ProvenanceItem) -> pb2.ProvenanceItem: + msg = pb2.ProvenanceItem(page_no=prov.page_no) + msg.bbox.CopyFrom(_to_bbox(prov.bbox)) + msg.charspan.CopyFrom( + pb2.IntSpan(start=int(prov.charspan[0]), end=int(prov.charspan[1])) + ) + return msg + + +def _to_text_item_base(item: TextItem) -> pb2.TextItemBase: + msg = pb2.TextItemBase( + self_ref=item.self_ref, + content_layer=_enum_value( + item.content_layer, _CONTENT_LAYER_MAP, pb2.CONTENT_LAYER_UNSPECIFIED + ), + label=_enum_value( + item.label, _DOC_ITEM_LABEL_MAP, pb2.DOC_ITEM_LABEL_UNSPECIFIED + ), + orig=item.orig, + text=item.text, + ) + if item.parent is not None: + msg.parent.CopyFrom(_to_ref(item.parent)) + if item.children: + msg.children.extend([_to_ref(child) for child in item.children]) + meta = _to_base_meta(item.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + if item.prov: + msg.prov.extend([_to_provenance_item(p) for p in item.prov]) + if item.source: + msg.source.extend([_to_source_type(src) for src in item.source]) + if item.comments: + msg.comments.extend([_to_fine_ref(ref) for ref in item.comments]) + fmt = _to_formatting(item.formatting) + if fmt is not None: + msg.formatting.CopyFrom(fmt) + if item.hyperlink is not None: + msg.hyperlink = str(item.hyperlink) + return msg + + +def _to_title_item(item: TitleItem) -> pb2.TitleItem: + return pb2.TitleItem(base=_to_text_item_base(item)) + + +def _to_section_header_item(item: SectionHeaderItem) -> pb2.SectionHeaderItem: + return pb2.SectionHeaderItem(base=_to_text_item_base(item), level=item.level) + + +def _to_field_heading_item(item: FieldHeadingItem) -> pb2.FieldHeadingItem: + return pb2.FieldHeadingItem(base=_to_text_item_base(item), level=item.level) + + +def _to_field_value_item(item: FieldValueItem) -> pb2.FieldValueItem: + return pb2.FieldValueItem(base=_to_text_item_base(item), kind=item.kind) + + +def _to_list_item(item: ListItem) -> pb2.ListItem: + msg = pb2.ListItem(base=_to_text_item_base(item), enumerated=item.enumerated) + if item.marker is not None: + msg.marker = item.marker + return msg + + +def _to_code_item(item: CodeItem) -> pb2.CodeItem: + msg = pb2.CodeItem(base=_to_text_item_base(item)) + meta = _to_floating_meta(item.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + if item.captions: + msg.captions.extend([_to_ref(ref) for ref in item.captions]) + if item.references: + msg.references.extend([_to_ref(ref) for ref in item.references]) + if item.footnotes: + msg.footnotes.extend([_to_ref(ref) for ref in item.footnotes]) + image = _to_image_ref(item.image) + if image is not None: + msg.image.CopyFrom(image) + if item.code_language is not None: + key = ( + item.code_language.value + if isinstance(item.code_language, Enum) + else str(item.code_language) + ) + enum_val = _CODE_LANGUAGE_MAP.get(str(key)) + if enum_val is None: + msg.code_language = pb2.CODE_LANGUAGE_LABEL_UNSPECIFIED + msg.code_language_raw = str(key) + else: + msg.code_language = enum_val + return msg + + +def _to_formula_item(item: FormulaItem) -> pb2.FormulaItem: + return pb2.FormulaItem(base=_to_text_item_base(item)) + + +def _to_text_item(item: TextItem) -> pb2.TextItem: + return pb2.TextItem(base=_to_text_item_base(item)) + + +def _to_base_text_item(item: TextItem) -> pb2.BaseTextItem: + msg = pb2.BaseTextItem() + if isinstance(item, TitleItem): + msg.title.CopyFrom(_to_title_item(item)) + elif isinstance(item, SectionHeaderItem): + msg.section_header.CopyFrom(_to_section_header_item(item)) + elif isinstance(item, FieldHeadingItem): + msg.field_heading.CopyFrom(_to_field_heading_item(item)) + elif isinstance(item, FieldValueItem): + msg.field_value.CopyFrom(_to_field_value_item(item)) + elif isinstance(item, ListItem): + msg.list_item.CopyFrom(_to_list_item(item)) + elif isinstance(item, CodeItem): + msg.code.CopyFrom(_to_code_item(item)) + elif isinstance(item, FormulaItem): + msg.formula.CopyFrom(_to_formula_item(item)) + else: + msg.text.CopyFrom(_to_text_item(item)) + return msg + + +def _to_table_cell(cell: TableCell | RichTableCell) -> pb2.TableCell: + msg = pb2.TableCell( + row_span=cell.row_span, + col_span=cell.col_span, + start_row_offset_idx=cell.start_row_offset_idx, + end_row_offset_idx=cell.end_row_offset_idx, + start_col_offset_idx=cell.start_col_offset_idx, + end_col_offset_idx=cell.end_col_offset_idx, + text=cell.text, + column_header=cell.column_header, + row_header=cell.row_header, + row_section=cell.row_section, + fillable=cell.fillable, + ) + bbox = _to_bbox(cell.bbox) + if bbox is not None: + msg.bbox.CopyFrom(bbox) + if getattr(cell, "ref", None) is not None: + msg.ref.CopyFrom(_to_ref(cell.ref)) + return msg + + +def _to_table_data(data: TableData) -> pb2.TableData: + msg = pb2.TableData(num_rows=data.num_rows, num_cols=data.num_cols) + if data.table_cells: + msg.table_cells.extend([_to_table_cell(cell) for cell in data.table_cells]) + for row in data.grid: + row_msg = pb2.TableRow() + row_msg.cells.extend([_to_table_cell(cell) for cell in row]) + msg.grid.append(row_msg) + return msg + + +def _to_table_item_base(item: TableItem) -> pb2.TableItem: + msg = pb2.TableItem( + self_ref=item.self_ref, + content_layer=_enum_value( + item.content_layer, _CONTENT_LAYER_MAP, pb2.CONTENT_LAYER_UNSPECIFIED + ), + label=str(item.label.value), + ) + if item.parent is not None: + msg.parent.CopyFrom(_to_ref(item.parent)) + if item.children: + msg.children.extend([_to_ref(ref) for ref in item.children]) + meta = _to_floating_meta(item.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + if item.prov: + msg.prov.extend([_to_provenance_item(p) for p in item.prov]) + if item.source: + msg.source.extend([_to_source_type(src) for src in item.source]) + if item.comments: + msg.comments.extend([_to_fine_ref(ref) for ref in item.comments]) + if item.captions: + msg.captions.extend([_to_ref(ref) for ref in item.captions]) + if item.references: + msg.references.extend([_to_ref(ref) for ref in item.references]) + if item.footnotes: + msg.footnotes.extend([_to_ref(ref) for ref in item.footnotes]) + image = _to_image_ref(item.image) + if image is not None: + msg.image.CopyFrom(image) + return msg + + +def _to_table_item(item: TableItem) -> pb2.TableItem: + msg = _to_table_item_base(item) + msg.data.CopyFrom(_to_table_data(item.data)) + for ann in _annotations_from_floating_meta(item.meta): + msg.annotations.append(_to_table_annotation(ann)) + return msg + + +def _to_picture_item(item: PictureItem) -> pb2.PictureItem: + msg = pb2.PictureItem( + self_ref=item.self_ref, + content_layer=_enum_value( + item.content_layer, _CONTENT_LAYER_MAP, pb2.CONTENT_LAYER_UNSPECIFIED + ), + label=str(item.label.value), + ) + if item.parent is not None: + msg.parent.CopyFrom(_to_ref(item.parent)) + if item.children: + msg.children.extend([_to_ref(ref) for ref in item.children]) + meta = _to_picture_meta(item.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + if item.prov: + msg.prov.extend([_to_provenance_item(p) for p in item.prov]) + if item.source: + msg.source.extend([_to_source_type(src) for src in item.source]) + if item.comments: + msg.comments.extend([_to_fine_ref(ref) for ref in item.comments]) + if item.captions: + msg.captions.extend([_to_ref(ref) for ref in item.captions]) + if item.references: + msg.references.extend([_to_ref(ref) for ref in item.references]) + if item.footnotes: + msg.footnotes.extend([_to_ref(ref) for ref in item.footnotes]) + image = _to_image_ref(item.image) + if image is not None: + msg.image.CopyFrom(image) + for ann in _annotations_from_picture_meta(item.meta): + msg.annotations.append(_to_picture_annotation(ann)) + return msg + + +def _to_graph_cell(cell: GraphCell) -> pb2.GraphCell: + msg = pb2.GraphCell( + label=_enum_value( + cell.label, _GRAPH_CELL_LABEL_MAP, pb2.GRAPH_CELL_LABEL_UNSPECIFIED + ), + cell_id=cell.cell_id, + text=cell.text, + orig=cell.orig, + ) + if cell.prov is not None: + msg.prov.CopyFrom(_to_provenance_item(cell.prov)) + if cell.item_ref is not None: + msg.item_ref.CopyFrom(_to_ref(cell.item_ref)) + return msg + + +def _to_graph_link(link: GraphLink) -> pb2.GraphLink: + msg = pb2.GraphLink( + label=_enum_value( + link.label, _GRAPH_LINK_LABEL_MAP, pb2.GRAPH_LINK_LABEL_UNSPECIFIED + ), + source_cell_id=link.source_cell_id, + target_cell_id=link.target_cell_id, + ) + return msg + + +def _to_graph_data(data: GraphData) -> pb2.GraphData: + msg = pb2.GraphData() + if data.cells: + msg.cells.extend([_to_graph_cell(cell) for cell in data.cells]) + if data.links: + msg.links.extend([_to_graph_link(link) for link in data.links]) + return msg + + +def _to_key_value_item(item: KeyValueItem) -> pb2.KeyValueItem: + msg = pb2.KeyValueItem( + self_ref=item.self_ref, + content_layer=_enum_value( + item.content_layer, _CONTENT_LAYER_MAP, pb2.CONTENT_LAYER_UNSPECIFIED + ), + label=str(item.label.value), + ) + if item.parent is not None: + msg.parent.CopyFrom(_to_ref(item.parent)) + if item.children: + msg.children.extend([_to_ref(ref) for ref in item.children]) + meta = _to_floating_meta(item.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + if item.prov: + msg.prov.extend([_to_provenance_item(p) for p in item.prov]) + if item.source: + msg.source.extend([_to_source_type(src) for src in item.source]) + if item.comments: + msg.comments.extend([_to_fine_ref(ref) for ref in item.comments]) + if item.captions: + msg.captions.extend([_to_ref(ref) for ref in item.captions]) + if item.references: + msg.references.extend([_to_ref(ref) for ref in item.references]) + if item.footnotes: + msg.footnotes.extend([_to_ref(ref) for ref in item.footnotes]) + image = _to_image_ref(item.image) + if image is not None: + msg.image.CopyFrom(image) + msg.graph.CopyFrom(_to_graph_data(item.graph)) + return msg + + +def _to_form_item(item: FormItem) -> pb2.FormItem: + msg = pb2.FormItem( + self_ref=item.self_ref, + content_layer=_enum_value( + item.content_layer, _CONTENT_LAYER_MAP, pb2.CONTENT_LAYER_UNSPECIFIED + ), + label=str(item.label.value), + ) + if item.parent is not None: + msg.parent.CopyFrom(_to_ref(item.parent)) + if item.children: + msg.children.extend([_to_ref(ref) for ref in item.children]) + meta = _to_floating_meta(item.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + if item.prov: + msg.prov.extend([_to_provenance_item(p) for p in item.prov]) + if item.source: + msg.source.extend([_to_source_type(src) for src in item.source]) + if item.comments: + msg.comments.extend([_to_fine_ref(ref) for ref in item.comments]) + if item.captions: + msg.captions.extend([_to_ref(ref) for ref in item.captions]) + if item.references: + msg.references.extend([_to_ref(ref) for ref in item.references]) + if item.footnotes: + msg.footnotes.extend([_to_ref(ref) for ref in item.footnotes]) + image = _to_image_ref(item.image) + if image is not None: + msg.image.CopyFrom(image) + msg.graph.CopyFrom(_to_graph_data(item.graph)) + return msg + + +def _to_field_region_item(item: FieldRegionItem) -> pb2.FieldRegionItem: + msg = pb2.FieldRegionItem( + self_ref=item.self_ref, + content_layer=_enum_value( + item.content_layer, _CONTENT_LAYER_MAP, pb2.CONTENT_LAYER_UNSPECIFIED + ), + label=_enum_value( + item.label, _DOC_ITEM_LABEL_MAP, pb2.DOC_ITEM_LABEL_UNSPECIFIED + ), + ) + if item.parent is not None: + msg.parent.CopyFrom(_to_ref(item.parent)) + if item.children: + msg.children.extend([_to_ref(ref) for ref in item.children]) + meta = _to_base_meta(item.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + if item.prov: + msg.prov.extend([_to_provenance_item(p) for p in item.prov]) + if item.source: + msg.source.extend([_to_source_type(src) for src in item.source]) + if item.comments: + msg.comments.extend([_to_fine_ref(ref) for ref in item.comments]) + return msg + + +def _to_field_item(item: FieldItem) -> pb2.FieldItem: + msg = pb2.FieldItem( + self_ref=item.self_ref, + content_layer=_enum_value( + item.content_layer, _CONTENT_LAYER_MAP, pb2.CONTENT_LAYER_UNSPECIFIED + ), + label=_enum_value( + item.label, _DOC_ITEM_LABEL_MAP, pb2.DOC_ITEM_LABEL_UNSPECIFIED + ), + ) + if item.parent is not None: + msg.parent.CopyFrom(_to_ref(item.parent)) + if item.children: + msg.children.extend([_to_ref(ref) for ref in item.children]) + meta = _to_base_meta(item.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + if item.prov: + msg.prov.extend([_to_provenance_item(p) for p in item.prov]) + if item.source: + msg.source.extend([_to_source_type(src) for src in item.source]) + if item.comments: + msg.comments.extend([_to_fine_ref(ref) for ref in item.comments]) + return msg + + +def _to_group_item(group: GroupItem) -> pb2.GroupItem: + msg = pb2.GroupItem( + self_ref=group.self_ref, + content_layer=_enum_value( + group.content_layer, _CONTENT_LAYER_MAP, pb2.CONTENT_LAYER_UNSPECIFIED + ), + label=_enum_value(group.label, _GROUP_LABEL_MAP, pb2.GROUP_LABEL_UNSPECIFIED), + name=group.name, + ) + if group.parent is not None: + msg.parent.CopyFrom(_to_ref(group.parent)) + if group.children: + msg.children.extend([_to_ref(ref) for ref in group.children]) + meta = _to_base_meta(group.meta) + if meta is not None: + msg.meta.CopyFrom(meta) + return msg + + +def _to_page_item(page: PageItem) -> pb2.PageItem: + msg = pb2.PageItem(size=_to_size(page.size), page_no=page.page_no) + image = _to_image_ref(page.image) + if image is not None: + msg.image.CopyFrom(image) + return msg + + +def _to_document_origin(origin: DocumentOrigin) -> pb2.DocumentOrigin: + msg = pb2.DocumentOrigin( + mimetype=origin.mimetype, + binary_hash=str(origin.binary_hash), + filename=origin.filename, + ) + if origin.uri is not None: + msg.uri = str(origin.uri) + return msg + + +def docling_document_to_proto(doc: DoclingDocument) -> pb2.DoclingDocument: + """Convert a DoclingDocument to its protobuf representation.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + furniture = doc.furniture + msg = pb2.DoclingDocument( + name=doc.name, + body=_to_group_item(doc.body), + furniture=_to_group_item(furniture), + ) + if doc.schema_name is not None: + msg.schema_name = doc.schema_name + if doc.version is not None: + msg.version = doc.version + if doc.origin is not None: + msg.origin.CopyFrom(_to_document_origin(doc.origin)) + if doc.groups: + msg.groups.extend([_to_group_item(group) for group in doc.groups]) + if doc.texts: + msg.texts.extend([_to_base_text_item(text) for text in doc.texts]) + if doc.pictures: + msg.pictures.extend([_to_picture_item(pic) for pic in doc.pictures]) + if doc.tables: + msg.tables.extend([_to_table_item(tbl) for tbl in doc.tables]) + if doc.key_value_items: + msg.key_value_items.extend( + [_to_key_value_item(item) for item in doc.key_value_items] + ) + if doc.form_items: + msg.form_items.extend([_to_form_item(item) for item in doc.form_items]) + if doc.field_regions: + msg.field_regions.extend( + [_to_field_region_item(item) for item in doc.field_regions] + ) + if doc.field_items: + msg.field_items.extend([_to_field_item(item) for item in doc.field_items]) + for key, page in doc.pages.items(): + msg.pages[str(key)].CopyFrom(_to_page_item(page)) + return msg diff --git a/proto/ai/docling/core/v1/docling_document.proto b/proto/ai/docling/core/v1/docling_document.proto new file mode 100644 index 00000000..4ac73305 --- /dev/null +++ b/proto/ai/docling/core/v1/docling_document.proto @@ -0,0 +1,809 @@ +syntax = "proto3"; + +package ai.docling.core.v1; + +import "google/protobuf/struct.proto"; + +option java_multiple_files = true; +option java_outer_classname = "DoclingDocumentProto"; +option java_package = "ai.docling.core.v1"; + +// Docling Document Structure Protocol Buffers Definition +// +// This proto file defines the complete structure for documents processed by Docling, +// providing a 1:1 mapping of the Docling JSON schema to protobuf. Docling is an +// advanced document parsing system that extracts rich semantic structure from PDFs +// and other document formats. +// +// Document Structure Overview: +// =========================== +// +// A DoclingDocument contains: +// - Hierarchical structure (body, furniture, groups) +// - Text content (titles, headers, paragraphs, lists, code, formulas) +// - Visual elements (pictures with AI-generated descriptions and annotations) +// - Tabular data (tables with cell-level structure and annotations) +// - Form data (key-value pairs, form fields) +// - Page metadata (size, images) +// - Provenance tracking (bounding boxes, page numbers) + +// DoclingDocument is the root message representing a complete parsed document. +message DoclingDocument { + // Schema identifier for versioning (e.g., "docling_document_v2") + optional string schema_name = 1; + + // Version number of the Docling schema used + optional string version = 2; + + // Human-readable name or title of the document + string name = 3; + + // Metadata about the source document (file info, hash, etc.) + optional DocumentOrigin origin = 4; + + // Document furniture: headers, footers, page numbers, watermarks, and other + // non-content elements that appear on the page but are not part of the + // document's semantic body. + GroupItem furniture = 13; + + // The root body group containing the main document structure. + GroupItem body = 5; + + // Additional groups representing logical sections (chapters, sections, etc.). + repeated GroupItem groups = 6; + + // All text items in the document (titles, paragraphs, lists, etc.). + repeated BaseTextItem texts = 7; + + // All picture/image items in the document. + repeated PictureItem pictures = 8; + + // All table items in the document. + repeated TableItem tables = 9; + + // Key-value pairs extracted from forms or structured data. + repeated KeyValueItem key_value_items = 10; + + // Form elements detected in the document. + repeated FormItem form_items = 11; + + // Regions of the document specifically for form fields. + repeated FieldRegionItem field_regions = 14; + + // Individual form field items. + repeated FieldItem field_items = 15; + + // Map of page numbers to page metadata. + map pages = 12; +} + +// DocumentOrigin contains metadata about the source document file. +message DocumentOrigin { + // MIME type of the source file + string mimetype = 1; + + // Binary hash of the source file for integrity verification. + string binary_hash = 2; + + // Original filename of the source document + string filename = 3; + + // Optional URI/URL where the document was retrieved from + optional string uri = 4; +} + +// ContentLayer defines the semantic layer where content appears in the document. +enum ContentLayer { + CONTENT_LAYER_UNSPECIFIED = 0; + CONTENT_LAYER_BODY = 1; + CONTENT_LAYER_FURNITURE = 2; + CONTENT_LAYER_BACKGROUND = 3; + CONTENT_LAYER_INVISIBLE = 4; + CONTENT_LAYER_NOTES = 5; +} + +// GroupLabel defines the semantic type of a group in the document hierarchy. +enum GroupLabel { + GROUP_LABEL_UNSPECIFIED = 0; + GROUP_LABEL_LIST = 1; + GROUP_LABEL_ORDERED_LIST = 2; + GROUP_LABEL_CHAPTER = 3; + GROUP_LABEL_SECTION = 4; + GROUP_LABEL_SHEET = 5; + GROUP_LABEL_SLIDE = 6; + GROUP_LABEL_FORM_AREA = 7; + GROUP_LABEL_KEY_VALUE_AREA = 8; + GROUP_LABEL_COMMENT_SECTION = 9; + GROUP_LABEL_INLINE = 10; + GROUP_LABEL_PICTURE_AREA = 11; +} + +// GroupItem represents a logical grouping of document elements. +message GroupItem { + string self_ref = 1; + optional RefItem parent = 2; + repeated RefItem children = 3; + ContentLayer content_layer = 4; + optional BaseMeta meta = 5; + optional string name = 6; + GroupLabel label = 7; +} + +// RefItem is a JSON Pointer reference to another item in the document. +message RefItem { + string ref = 1; +} + +// IntSpan represents a half-open integer range [start, end). +// Used for character spans, fine-reference ranges, and similar positional pairs. +message IntSpan { + int32 start = 1; + int32 end = 2; +} + +// FloatPair represents a pair of floating-point values. +// Used for 2D coordinates in chart data (e.g., scatter points, line values). +message FloatPair { + double first = 1; + double second = 2; +} + +// StringIntPair represents a (string, int) pair. +// Used for labeled integer values (e.g., stacked bar chart segments). +message StringIntPair { + string key = 1; + int32 value = 2; +} + +// FineRef is a reference with optional span range info. +message FineRef { + string ref = 1; + optional IntSpan range = 2; +} + +// TrackSource identifies a cue in a media track (audio, video, subtitles). +message TrackSource { + double start_time = 1; + double end_time = 2; + optional string identifier = 3; + optional string voice = 4; + optional string kind = 5; +} + +// SourceType is a union of possible source descriptors. +message SourceType { + oneof source { + TrackSource track = 1; + } +} + +// BaseMeta contains metadata fields common to most document items. +message BaseMeta { + optional SummaryMetaField summary = 1; + map custom_fields = 100; +} + +// SummaryMetaField contains an AI-generated text summary with confidence. +message SummaryMetaField { + optional double confidence = 1; + optional string created_by = 2; + string text = 3; + map custom_fields = 100; +} + +// DocItemLabel defines the semantic type of document content items. +enum DocItemLabel { + DOC_ITEM_LABEL_UNSPECIFIED = 0; + DOC_ITEM_LABEL_CAPTION = 1; + DOC_ITEM_LABEL_CHART = 2; + DOC_ITEM_LABEL_CHECKBOX_SELECTED = 3; + DOC_ITEM_LABEL_CHECKBOX_UNSELECTED = 4; + DOC_ITEM_LABEL_CODE = 5; + DOC_ITEM_LABEL_DOCUMENT_INDEX = 6; + DOC_ITEM_LABEL_EMPTY_VALUE = 7; + DOC_ITEM_LABEL_FOOTNOTE = 8; + DOC_ITEM_LABEL_FORM = 9; + DOC_ITEM_LABEL_FORMULA = 10; + DOC_ITEM_LABEL_GRADING_SCALE = 11; + DOC_ITEM_LABEL_HANDWRITTEN_TEXT = 12; + DOC_ITEM_LABEL_KEY_VALUE_REGION = 13; + DOC_ITEM_LABEL_LIST_ITEM = 14; + DOC_ITEM_LABEL_PAGE_FOOTER = 15; + DOC_ITEM_LABEL_PAGE_HEADER = 16; + DOC_ITEM_LABEL_PARAGRAPH = 17; + DOC_ITEM_LABEL_PICTURE = 18; + DOC_ITEM_LABEL_REFERENCE = 19; + DOC_ITEM_LABEL_SECTION_HEADER = 20; + DOC_ITEM_LABEL_TABLE = 21; + DOC_ITEM_LABEL_TEXT = 22; + DOC_ITEM_LABEL_TITLE = 23; + DOC_ITEM_LABEL_FIELD_REGION = 24; + DOC_ITEM_LABEL_FIELD_HEADING = 25; + DOC_ITEM_LABEL_FIELD_ITEM = 26; + DOC_ITEM_LABEL_FIELD_KEY = 27; + DOC_ITEM_LABEL_FIELD_VALUE = 28; + DOC_ITEM_LABEL_FIELD_HINT = 29; + DOC_ITEM_LABEL_MARKER = 30; +} + +// Script defines the vertical positioning of text. +enum Script { + SCRIPT_UNSPECIFIED = 0; + SCRIPT_BASELINE = 1; + SCRIPT_SUB = 2; + SCRIPT_SUPER = 3; +} + +// CoordOrigin defines the coordinate system origin for bounding boxes. +enum CoordOrigin { + COORD_ORIGIN_UNSPECIFIED = 0; + COORD_ORIGIN_TOPLEFT = 1; + COORD_ORIGIN_BOTTOMLEFT = 2; +} + +// CodeLanguageLabel defines the programming language for code blocks. +enum CodeLanguageLabel { + CODE_LANGUAGE_LABEL_UNSPECIFIED = 0; + CODE_LANGUAGE_LABEL_ADA = 1; + CODE_LANGUAGE_LABEL_AWK = 2; + CODE_LANGUAGE_LABEL_BASH = 3; + CODE_LANGUAGE_LABEL_BC = 4; + CODE_LANGUAGE_LABEL_C = 5; + CODE_LANGUAGE_LABEL_C_SHARP = 6; + CODE_LANGUAGE_LABEL_C_PLUS_PLUS = 7; + CODE_LANGUAGE_LABEL_CMAKE = 8; + CODE_LANGUAGE_LABEL_COBOL = 9; + CODE_LANGUAGE_LABEL_CSS = 10; + CODE_LANGUAGE_LABEL_CEYLON = 11; + CODE_LANGUAGE_LABEL_CLOJURE = 12; + CODE_LANGUAGE_LABEL_CRYSTAL = 13; + CODE_LANGUAGE_LABEL_CUDA = 14; + CODE_LANGUAGE_LABEL_CYTHON = 15; + CODE_LANGUAGE_LABEL_D = 16; + CODE_LANGUAGE_LABEL_DART = 17; + CODE_LANGUAGE_LABEL_DC = 18; + CODE_LANGUAGE_LABEL_DOCKERFILE = 19; + CODE_LANGUAGE_LABEL_ELIXIR = 20; + CODE_LANGUAGE_LABEL_ERLANG = 21; + CODE_LANGUAGE_LABEL_FORTRAN = 22; + CODE_LANGUAGE_LABEL_FORTH = 23; + CODE_LANGUAGE_LABEL_GO = 24; + CODE_LANGUAGE_LABEL_HTML = 25; + CODE_LANGUAGE_LABEL_HASKELL = 26; + CODE_LANGUAGE_LABEL_HAXE = 27; + CODE_LANGUAGE_LABEL_JAVA = 28; + CODE_LANGUAGE_LABEL_JAVASCRIPT = 29; + CODE_LANGUAGE_LABEL_JSON = 30; + CODE_LANGUAGE_LABEL_JULIA = 31; + CODE_LANGUAGE_LABEL_KOTLIN = 32; + CODE_LANGUAGE_LABEL_LISP = 33; + CODE_LANGUAGE_LABEL_LUA = 34; + CODE_LANGUAGE_LABEL_MATLAB = 35; + CODE_LANGUAGE_LABEL_MOONSCRIPT = 36; + CODE_LANGUAGE_LABEL_NIM = 37; + CODE_LANGUAGE_LABEL_OCAML = 38; + CODE_LANGUAGE_LABEL_OBJECTIVEC = 39; + CODE_LANGUAGE_LABEL_OCTAVE = 40; + CODE_LANGUAGE_LABEL_PHP = 41; + CODE_LANGUAGE_LABEL_PASCAL = 42; + CODE_LANGUAGE_LABEL_PERL = 43; + CODE_LANGUAGE_LABEL_PROLOG = 44; + CODE_LANGUAGE_LABEL_PYTHON = 45; + CODE_LANGUAGE_LABEL_RACKET = 46; + CODE_LANGUAGE_LABEL_RUBY = 47; + CODE_LANGUAGE_LABEL_RUST = 48; + CODE_LANGUAGE_LABEL_SML = 49; + CODE_LANGUAGE_LABEL_SQL = 50; + CODE_LANGUAGE_LABEL_SCALA = 51; + CODE_LANGUAGE_LABEL_SCHEME = 52; + CODE_LANGUAGE_LABEL_SWIFT = 53; + CODE_LANGUAGE_LABEL_TYPESCRIPT = 54; + CODE_LANGUAGE_LABEL_UNKNOWN = 55; + CODE_LANGUAGE_LABEL_VISUALBASIC = 56; + CODE_LANGUAGE_LABEL_XML = 57; + CODE_LANGUAGE_LABEL_YAML = 58; +} + +// Formatting contains text formatting/styling information. +message Formatting { + bool bold = 1; + bool italic = 2; + bool underline = 3; + bool strikethrough = 4; + Script script = 5; +} + +// BaseTextItem is a union type representing any text-based item in the document. +message BaseTextItem { + oneof item { + TitleItem title = 1; + SectionHeaderItem section_header = 2; + ListItem list_item = 3; + CodeItem code = 4; + FormulaItem formula = 5; + TextItem text = 6; + FieldHeadingItem field_heading = 7; + FieldValueItem field_value = 8; + } +} + +// TextItemBase contains fields common to all text-based items. +message TextItemBase { + string self_ref = 1; + optional RefItem parent = 2; + repeated RefItem children = 3; + ContentLayer content_layer = 4; + optional BaseMeta meta = 5; + DocItemLabel label = 6; + repeated ProvenanceItem prov = 7; + string orig = 8; + string text = 9; + optional Formatting formatting = 10; + optional string hyperlink = 11; + repeated SourceType source = 12; + repeated FineRef comments = 13; +} + +// TitleItem represents a document title or major heading. +message TitleItem { + TextItemBase base = 1; +} + +// SectionHeaderItem represents a section header with hierarchical level. +message SectionHeaderItem { + TextItemBase base = 1; + int32 level = 2; +} + +// FieldHeadingItem represents a heading for a form field region. +message FieldHeadingItem { + TextItemBase base = 1; + int32 level = 2; +} + +// FieldValueItem represents the value part of a form field. +message FieldValueItem { + TextItemBase base = 1; + string kind = 2; +} + +// ListItem represents a single item in a bulleted or numbered list. +message ListItem { + TextItemBase base = 1; + bool enumerated = 2; + optional string marker = 3; +} + +// CodeItem represents a code block with syntax highlighting metadata. +message CodeItem { + TextItemBase base = 1; + optional FloatingMeta meta = 2; + repeated RefItem captions = 3; + repeated RefItem references = 4; + repeated RefItem footnotes = 5; + optional ImageRef image = 6; + optional CodeLanguageLabel code_language = 7; + // Raw label string (fallback for unknown enum values). + optional string code_language_raw = 8; +} + +// FormulaItem represents a mathematical formula or equation. +message FormulaItem { + TextItemBase base = 1; +} + +// TextItem represents generic text content (paragraphs, captions, etc.). +message TextItem { + TextItemBase base = 1; +} + +// ProvenanceItem tracks the precise location of content in the source document. +message ProvenanceItem { + int32 page_no = 1; + BoundingBox bbox = 2; + IntSpan charspan = 3; +} + +// BoundingBox defines a rectangular region in page coordinates. +message BoundingBox { + double l = 1; + double t = 2; + double r = 3; + double b = 4; + optional CoordOrigin coord_origin = 5; + // Raw label string (fallback for unknown enum values). + optional string coord_origin_raw = 6; +} + +// ImageRef references an embedded image with its properties. +message ImageRef { + string mimetype = 1; + int32 dpi = 2; + Size size = 3; + string uri = 4; +} + +// Size represents 2D dimensions (width and height). +message Size { + double width = 1; + double height = 2; +} + +// ============================================================================ +// Annotation types +// ============================================================================ + +// DescriptionAnnotation is a text description annotation on a picture or table. +message DescriptionAnnotation { + string kind = 1; + string text = 2; + string provenance = 3; +} + +// MiscAnnotation holds arbitrary annotation data as a struct. +message MiscAnnotation { + string kind = 1; + google.protobuf.Struct content = 2; +} + +// PictureClassificationClass is a single predicted class with confidence. +message PictureClassificationClass { + string class_name = 1; + double confidence = 2; +} + +// PictureClassificationData contains classification results for a picture. +message PictureClassificationData { + string kind = 1; + string provenance = 2; + repeated PictureClassificationClass predicted_classes = 3; +} + +// PictureMoleculeData contains chemical structure information for a picture. +message PictureMoleculeData { + string kind = 1; + string smi = 2; + double confidence = 3; + string class_name = 4; + repeated FloatPair segmentation = 5; + string provenance = 6; +} + +// PictureTabularChartData contains tabular chart data extracted from a picture. +message PictureTabularChartData { + string kind = 1; + string title = 2; + TableData chart_data = 3; +} + +// ChartLine represents a single line in a line chart. +message ChartLine { + string label = 1; + repeated FloatPair values = 2; +} + +// PictureLineChartData contains line chart data extracted from a picture. +message PictureLineChartData { + string kind = 1; + string title = 2; + string x_axis_label = 3; + string y_axis_label = 4; + repeated ChartLine lines = 5; +} + +// ChartBar represents a single bar in a bar chart. +message ChartBar { + string label = 1; + double values = 2; +} + +// PictureBarChartData contains bar chart data extracted from a picture. +message PictureBarChartData { + string kind = 1; + string title = 2; + string x_axis_label = 3; + string y_axis_label = 4; + repeated ChartBar bars = 5; +} + +// ChartStackedBar represents a stacked bar with multiple labeled segments. +message ChartStackedBar { + repeated string label = 1; + repeated StringIntPair values = 2; +} + +// PictureStackedBarChartData contains stacked bar chart data. +message PictureStackedBarChartData { + string kind = 1; + string title = 2; + string x_axis_label = 3; + string y_axis_label = 4; + repeated ChartStackedBar stacked_bars = 5; +} + +// ChartSlice represents a single slice in a pie chart. +message ChartSlice { + string label = 1; + double value = 2; +} + +// PicturePieChartData contains pie chart data extracted from a picture. +message PicturePieChartData { + string kind = 1; + string title = 2; + repeated ChartSlice slices = 3; +} + +// ChartPoint represents a single point in a scatter chart. +message ChartPoint { + FloatPair value = 1; +} + +// PictureScatterChartData contains scatter chart data extracted from a picture. +message PictureScatterChartData { + string kind = 1; + string title = 2; + string x_axis_label = 3; + string y_axis_label = 4; + repeated ChartPoint points = 5; +} + +// PictureAnnotation is a union of all annotation types that can appear on a picture. +message PictureAnnotation { + oneof annotation { + DescriptionAnnotation description = 1; + MiscAnnotation misc = 2; + PictureClassificationData classification = 3; + PictureMoleculeData molecule = 4; + PictureTabularChartData tabular_chart = 5; + PictureLineChartData line_chart = 6; + PictureBarChartData bar_chart = 7; + PictureStackedBarChartData stacked_bar_chart = 8; + PicturePieChartData pie_chart = 9; + PictureScatterChartData scatter_chart = 10; + } +} + +// TableAnnotation is a union of annotation types that can appear on a table. +message TableAnnotation { + oneof annotation { + DescriptionAnnotation description = 1; + MiscAnnotation misc = 2; + } +} + +// ============================================================================ +// Picture and Table items +// ============================================================================ + +// PictureItem represents an image or figure in the document. +message PictureItem { + optional string self_ref = 1; + RefItem parent = 2; + repeated RefItem children = 3; + ContentLayer content_layer = 4; + optional PictureMeta meta = 5; + string label = 6; + repeated ProvenanceItem prov = 7; + repeated RefItem captions = 8; + repeated RefItem references = 9; + repeated RefItem footnotes = 10; + optional ImageRef image = 11; + repeated SourceType source = 12; + repeated FineRef comments = 13; + repeated PictureAnnotation annotations = 14; +} + +// PictureMeta contains rich metadata for pictures, including AI analysis. +message PictureMeta { + optional SummaryMetaField summary = 1; + optional DescriptionMetaField description = 2; + optional PictureClassificationMetaField classification = 3; + optional MoleculeMetaField molecule = 4; + optional TabularChartMetaField tabular_chart = 5; + map custom_fields = 100; +} + +// DescriptionMetaField contains an AI-generated detailed description. +message DescriptionMetaField { + optional double confidence = 1; + optional string created_by = 2; + string text = 3; + map custom_fields = 100; +} + +// PictureClassificationMetaField contains AI classification results. +message PictureClassificationMetaField { + repeated PictureClassificationPrediction predictions = 1; + map custom_fields = 100; +} + +// PictureClassificationPrediction is a single classification result. +message PictureClassificationPrediction { + optional double confidence = 1; + optional string created_by = 2; + string class_name = 3; + map custom_fields = 100; +} + +// MoleculeMetaField contains chemical structure information. +message MoleculeMetaField { + optional double confidence = 1; + optional string created_by = 2; + string smi = 3; + map custom_fields = 100; +} + +// TabularChartMetaField contains chart data extracted from images. +message TabularChartMetaField { + optional double confidence = 1; + optional string created_by = 2; + optional string title = 3; + TableData chart_data = 4; + map custom_fields = 100; +} + +// FloatingMeta contains metadata for floating elements (tables, code blocks, figures). +message FloatingMeta { + optional SummaryMetaField summary = 1; + optional DescriptionMetaField description = 2; + map custom_fields = 100; +} + +// TableItem represents a table in the document with full structure. +message TableItem { + string self_ref = 1; + optional RefItem parent = 2; + repeated RefItem children = 3; + ContentLayer content_layer = 4; + optional FloatingMeta meta = 5; + string label = 6; + repeated ProvenanceItem prov = 7; + repeated RefItem captions = 8; + repeated RefItem references = 9; + repeated RefItem footnotes = 10; + optional ImageRef image = 11; + TableData data = 12; + repeated SourceType source = 13; + repeated FineRef comments = 14; + repeated TableAnnotation annotations = 15; +} + +// TableData contains the complete table structure and content. +message TableData { + repeated TableCell table_cells = 1; + int32 num_rows = 2; + int32 num_cols = 3; + repeated TableRow grid = 4; +} + +// TableRow represents a single row in the table. +message TableRow { + repeated TableCell cells = 1; +} + +// TableCell represents a single cell in a table. +message TableCell { + BoundingBox bbox = 1; + int32 row_span = 2; + int32 col_span = 3; + int32 start_row_offset_idx = 4; + int32 end_row_offset_idx = 5; + int32 start_col_offset_idx = 6; + int32 end_col_offset_idx = 7; + string text = 8; + bool column_header = 9; + bool row_header = 10; + bool row_section = 11; + bool fillable = 12; + optional RefItem ref = 13; +} + +// KeyValueItem represents a key-value pair extracted from forms. +message KeyValueItem { + string self_ref = 1; + optional RefItem parent = 2; + repeated RefItem children = 3; + ContentLayer content_layer = 4; + optional FloatingMeta meta = 5; + string label = 6; + repeated ProvenanceItem prov = 7; + repeated RefItem captions = 8; + repeated RefItem references = 9; + repeated RefItem footnotes = 10; + optional ImageRef image = 11; + GraphData graph = 12; + repeated SourceType source = 13; + repeated FineRef comments = 14; +} + +// GraphData represents the structure of key-value relationships as a graph. +message GraphData { + repeated GraphCell cells = 1; + repeated GraphLink links = 2; +} + +// GraphCellLabel defines the role of a cell in a key-value graph. +enum GraphCellLabel { + GRAPH_CELL_LABEL_UNSPECIFIED = 0; + GRAPH_CELL_LABEL_KEY = 1; + GRAPH_CELL_LABEL_VALUE = 2; + GRAPH_CELL_LABEL_CHECKBOX = 3; +} + +// GraphCell is a node in the key-value graph. +message GraphCell { + GraphCellLabel label = 1; + int32 cell_id = 2; + string text = 3; + string orig = 4; + optional ProvenanceItem prov = 5; + optional RefItem item_ref = 6; +} + +// GraphLinkLabel defines the type of relationship between cells. +enum GraphLinkLabel { + GRAPH_LINK_LABEL_UNSPECIFIED = 0; + GRAPH_LINK_LABEL_TO_VALUE = 1; + GRAPH_LINK_LABEL_TO_KEY = 2; + GRAPH_LINK_LABEL_TO_PARENT = 3; + GRAPH_LINK_LABEL_TO_CHILD = 4; +} + +// GraphLink is an edge in the key-value graph. +message GraphLink { + GraphLinkLabel label = 1; + int32 source_cell_id = 2; + int32 target_cell_id = 3; +} + +// FormItem represents a form element in the document. +message FormItem { + string self_ref = 1; + optional RefItem parent = 2; + repeated RefItem children = 3; + ContentLayer content_layer = 4; + optional FloatingMeta meta = 5; + string label = 6; + repeated ProvenanceItem prov = 7; + repeated RefItem captions = 8; + repeated RefItem references = 9; + repeated RefItem footnotes = 10; + optional ImageRef image = 11; + GraphData graph = 12; + repeated SourceType source = 13; + repeated FineRef comments = 14; +} + +// FieldRegionItem represents a region containing form fields. +message FieldRegionItem { + string self_ref = 1; + optional RefItem parent = 2; + repeated RefItem children = 3; + ContentLayer content_layer = 4; + optional BaseMeta meta = 5; + DocItemLabel label = 6; + repeated ProvenanceItem prov = 7; + repeated SourceType source = 8; + repeated FineRef comments = 9; +} + +// FieldItem represents an individual form field item. +message FieldItem { + string self_ref = 1; + optional RefItem parent = 2; + repeated RefItem children = 3; + ContentLayer content_layer = 4; + optional BaseMeta meta = 5; + DocItemLabel label = 6; + repeated ProvenanceItem prov = 7; + repeated SourceType source = 8; + repeated FineRef comments = 9; +} + +// PageItem represents metadata about a single page in the document. +message PageItem { + Size size = 1; + optional ImageRef image = 2; + int32 page_no = 3; +} diff --git a/pyproject.toml b/pyproject.toml index 8a4c72e9..776ab2f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ dependencies = [ 'typer (>=0.12.5,<0.25.0)', 'latex2mathml (>=3.77.0,<4.0.0)', "defusedxml (>=0.7.1, <0.8.0)", + "protobuf>=4.25.0", ] [project.urls] @@ -117,6 +118,7 @@ dev = [ 'tree-sitter-java-orchard (>=0.3.0,<1.0.0); python_version >= "3.10"', "ruff>=0.14.11", "types-defusedxml (>=0.7.0.20250822, <0.8.0)", + "grpcio-tools>=1.60.0", ] constraints = [ 'pandas (>=2.1.4,<3.0.0); python_version < "3.11"', @@ -132,7 +134,7 @@ include = ["docling_core*"] namespaces = true [tool.setuptools.package-data] -"*" = ["*.json"] +"*" = ["*.json", "*.py"] [tool.ruff] target-version = "py310" diff --git a/scripts/gen_proto.py b/scripts/gen_proto.py new file mode 100644 index 00000000..3879b20f --- /dev/null +++ b/scripts/gen_proto.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +"""Generate Python protobuf code for the DoclingDocument proto (messages only, no gRPC services).""" +import pathlib +import subprocess +import sys + +ROOT = pathlib.Path(__file__).resolve().parents[1] +PROTO_DIR = ROOT / "proto" +OUT_DIR = ROOT / "docling_core" / "proto" / "gen" + + +def ensure_init_files(path: pathlib.Path) -> None: + for sub in [path] + [p for p in path.rglob("*") if p.is_dir()]: + init = sub / "__init__.py" + if not init.exists(): + init.write_text("", encoding="utf-8") + + +def main() -> None: + proto_file = PROTO_DIR / "ai" / "docling" / "core" / "v1" / "docling_document.proto" + if not proto_file.exists(): + print(f"Proto file not found: {proto_file}") + sys.exit(1) + + OUT_DIR.mkdir(parents=True, exist_ok=True) + + # Messages only (no gRPC services), so use --python_out only. + cmd = [ + sys.executable, + "-m", + "grpc_tools.protoc", + f"-I{PROTO_DIR}", + f"--python_out={OUT_DIR}", + str(proto_file), + ] + subprocess.check_call(cmd) + ensure_init_files(OUT_DIR) + print("Generated docling_document_pb2 in", OUT_DIR) + + +if __name__ == "__main__": + main() diff --git a/test/test_proto_conversion.py b/test/test_proto_conversion.py new file mode 100644 index 00000000..59efc509 --- /dev/null +++ b/test/test_proto_conversion.py @@ -0,0 +1,32 @@ +import pytest +from docling_core.types.doc import DoclingDocument, DocItemLabel +from docling_core.proto import docling_document_to_proto +from docling_core.proto.gen.ai.docling.core.v1 import docling_document_pb2 as pb2 + +def test_minimal_doc_conversion(): + doc = DoclingDocument(name="test_doc") + proto = docling_document_to_proto(doc) + + assert proto.name == "test_doc" + assert proto.body.name == "_root_" + assert proto.furniture.name == "_root_" + +def test_doc_with_text_conversion(): + doc = DoclingDocument(name="test_doc") + doc.add_text(label=DocItemLabel.PARAGRAPH, text="Hello world") + + proto = docling_document_to_proto(doc) + + assert len(proto.texts) == 1 + assert proto.texts[0].text.base.text == "Hello world" + assert proto.texts[0].text.base.label == pb2.DOC_ITEM_LABEL_PARAGRAPH + +def test_doc_with_title_conversion(): + doc = DoclingDocument(name="test_doc") + doc.add_title(text="Main Title") + + proto = docling_document_to_proto(doc) + + assert len(proto.texts) == 1 + assert proto.texts[0].title.base.text == "Main Title" + assert proto.texts[0].title.base.label == pb2.DOC_ITEM_LABEL_TITLE diff --git a/uv.lock b/uv.lock index 43f89482..88093b2a 100644 --- a/uv.lock +++ b/uv.lock @@ -964,6 +964,7 @@ dependencies = [ { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pillow" }, + { name = "protobuf" }, { name = "pydantic" }, { name = "pyyaml" }, { name = "tabulate" }, @@ -1007,6 +1008,7 @@ dev = [ { name = "coverage" }, { name = "flake8" }, { name = "flake8-docstrings" }, + { name = "grpcio-tools" }, { name = "ipykernel" }, { name = "isort" }, { name = "jsondiff" }, @@ -1035,6 +1037,7 @@ requires-dist = [ { name = "openpyxl", marker = "extra == 'examples'", specifier = ">=3.1.5" }, { name = "pandas", specifier = ">=2.1.4,<4.0.0" }, { name = "pillow", specifier = ">=10.0.0,<13.0.0" }, + { name = "protobuf", specifier = ">=4.25.0" }, { name = "pydantic", specifier = ">=2.6.0,!=2.10.0,!=2.10.1,!=2.10.2,<3.0.0" }, { name = "pyyaml", specifier = ">=5.1,<7.0.0" }, { name = "semchunk", marker = "extra == 'chunking'", specifier = ">=2.2.0,<4.0.0" }, @@ -1068,6 +1071,7 @@ dev = [ { name = "coverage", specifier = "~=7.6" }, { name = "flake8", specifier = "~=7.1" }, { name = "flake8-docstrings", specifier = "~=1.6" }, + { name = "grpcio-tools", specifier = ">=1.60.0" }, { name = "ipykernel", specifier = "~=6.29" }, { name = "isort", specifier = "~=5.10" }, { name = "jsondiff", specifier = "~=2.0" }, @@ -1385,6 +1389,130 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" }, ] +[[package]] +name = "grpcio" +version = "1.78.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/8a/3d098f35c143a89520e568e6539cc098fcd294495910e359889ce8741c84/grpcio-1.78.0.tar.gz", hash = "sha256:7382b95189546f375c174f53a5fa873cef91c4b8005faa05cc5b3beea9c4f1c5", size = 12852416, upload-time = "2026-02-06T09:57:18.093Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/a8/690a085b4d1fe066130de97a87de32c45062cf2ecd218df9675add895550/grpcio-1.78.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:7cc47943d524ee0096f973e1081cb8f4f17a4615f2116882a5f1416e4cfe92b5", size = 5946986, upload-time = "2026-02-06T09:54:34.043Z" }, + { url = "https://files.pythonhosted.org/packages/c7/1b/e5213c5c0ced9d2d92778d30529ad5bb2dcfb6c48c4e2d01b1f302d33d64/grpcio-1.78.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:c3f293fdc675ccba4db5a561048cca627b5e7bd1c8a6973ffedabe7d116e22e2", size = 11816533, upload-time = "2026-02-06T09:54:37.04Z" }, + { url = "https://files.pythonhosted.org/packages/18/37/1ba32dccf0a324cc5ace744c44331e300b000a924bf14840f948c559ede7/grpcio-1.78.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:10a9a644b5dd5aec3b82b5b0b90d41c0fa94c85ef42cb42cf78a23291ddb5e7d", size = 6519964, upload-time = "2026-02-06T09:54:40.268Z" }, + { url = "https://files.pythonhosted.org/packages/ed/f5/c0e178721b818072f2e8b6fde13faaba942406c634009caf065121ce246b/grpcio-1.78.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4c5533d03a6cbd7f56acfc9cfb44ea64f63d29091e40e44010d34178d392d7eb", size = 7198058, upload-time = "2026-02-06T09:54:42.389Z" }, + { url = "https://files.pythonhosted.org/packages/5b/b2/40d43c91ae9cd667edc960135f9f08e58faa1576dc95af29f66ec912985f/grpcio-1.78.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ff870aebe9a93a85283837801d35cd5f8814fe2ad01e606861a7fb47c762a2b7", size = 6727212, upload-time = "2026-02-06T09:54:44.91Z" }, + { url = "https://files.pythonhosted.org/packages/ed/88/9da42eed498f0efcfcd9156e48ae63c0cde3bea398a16c99fb5198c885b6/grpcio-1.78.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:391e93548644e6b2726f1bb84ed60048d4bcc424ce5e4af0843d28ca0b754fec", size = 7300845, upload-time = "2026-02-06T09:54:47.562Z" }, + { url = "https://files.pythonhosted.org/packages/23/3f/1c66b7b1b19a8828890e37868411a6e6925df5a9030bfa87ab318f34095d/grpcio-1.78.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:df2c8f3141f7cbd112a6ebbd760290b5849cda01884554f7c67acc14e7b1758a", size = 8284605, upload-time = "2026-02-06T09:54:50.475Z" }, + { url = "https://files.pythonhosted.org/packages/94/c4/ca1bd87394f7b033e88525384b4d1e269e8424ab441ea2fba1a0c5b50986/grpcio-1.78.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd8cb8026e5f5b50498a3c4f196f57f9db344dad829ffae16b82e4fdbaea2813", size = 7726672, upload-time = "2026-02-06T09:54:53.11Z" }, + { url = "https://files.pythonhosted.org/packages/41/09/f16e487d4cc65ccaf670f6ebdd1a17566b965c74fc3d93999d3b2821e052/grpcio-1.78.0-cp310-cp310-win32.whl", hash = "sha256:f8dff3d9777e5d2703a962ee5c286c239bf0ba173877cc68dc02c17d042e29de", size = 4076715, upload-time = "2026-02-06T09:54:55.549Z" }, + { url = "https://files.pythonhosted.org/packages/2a/32/4ce60d94e242725fd3bcc5673c04502c82a8e87b21ea411a63992dc39f8f/grpcio-1.78.0-cp310-cp310-win_amd64.whl", hash = "sha256:94f95cf5d532d0e717eed4fc1810e8e6eded04621342ec54c89a7c2f14b581bf", size = 4799157, upload-time = "2026-02-06T09:54:59.838Z" }, + { url = "https://files.pythonhosted.org/packages/86/c7/d0b780a29b0837bf4ca9580904dfb275c1fc321ded7897d620af7047ec57/grpcio-1.78.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:2777b783f6c13b92bd7b716667452c329eefd646bfb3f2e9dabea2e05dbd34f6", size = 5951525, upload-time = "2026-02-06T09:55:01.989Z" }, + { url = "https://files.pythonhosted.org/packages/c5/b1/96920bf2ee61df85a9503cb6f733fe711c0ff321a5a697d791b075673281/grpcio-1.78.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:9dca934f24c732750389ce49d638069c3892ad065df86cb465b3fa3012b70c9e", size = 11830418, upload-time = "2026-02-06T09:55:04.462Z" }, + { url = "https://files.pythonhosted.org/packages/83/0c/7c1528f098aeb75a97de2bae18c530f56959fb7ad6c882db45d9884d6edc/grpcio-1.78.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:459ab414b35f4496138d0ecd735fed26f1318af5e52cb1efbc82a09f0d5aa911", size = 6524477, upload-time = "2026-02-06T09:55:07.111Z" }, + { url = "https://files.pythonhosted.org/packages/8d/52/e7c1f3688f949058e19a011c4e0dec973da3d0ae5e033909677f967ae1f4/grpcio-1.78.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:082653eecbdf290e6e3e2c276ab2c54b9e7c299e07f4221872380312d8cf395e", size = 7198266, upload-time = "2026-02-06T09:55:10.016Z" }, + { url = "https://files.pythonhosted.org/packages/e5/61/8ac32517c1e856677282c34f2e7812d6c328fa02b8f4067ab80e77fdc9c9/grpcio-1.78.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85f93781028ec63f383f6bc90db785a016319c561cc11151fbb7b34e0d012303", size = 6730552, upload-time = "2026-02-06T09:55:12.207Z" }, + { url = "https://files.pythonhosted.org/packages/bd/98/b8ee0158199250220734f620b12e4a345955ac7329cfd908d0bf0fda77f0/grpcio-1.78.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f12857d24d98441af6a1d5c87442d624411db486f7ba12550b07788f74b67b04", size = 7304296, upload-time = "2026-02-06T09:55:15.044Z" }, + { url = "https://files.pythonhosted.org/packages/bd/0f/7b72762e0d8840b58032a56fdbd02b78fc645b9fa993d71abf04edbc54f4/grpcio-1.78.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5397fff416b79e4b284959642a4e95ac4b0f1ece82c9993658e0e477d40551ec", size = 8288298, upload-time = "2026-02-06T09:55:17.276Z" }, + { url = "https://files.pythonhosted.org/packages/24/ae/ae4ce56bc5bb5caa3a486d60f5f6083ac3469228faa734362487176c15c5/grpcio-1.78.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fbe6e89c7ffb48518384068321621b2a69cab509f58e40e4399fdd378fa6d074", size = 7730953, upload-time = "2026-02-06T09:55:19.545Z" }, + { url = "https://files.pythonhosted.org/packages/b5/6e/8052e3a28eb6a820c372b2eb4b5e32d195c661e137d3eca94d534a4cfd8a/grpcio-1.78.0-cp311-cp311-win32.whl", hash = "sha256:6092beabe1966a3229f599d7088b38dfc8ffa1608b5b5cdda31e591e6500f856", size = 4076503, upload-time = "2026-02-06T09:55:21.521Z" }, + { url = "https://files.pythonhosted.org/packages/08/62/f22c98c5265dfad327251fa2f840b591b1df5f5e15d88b19c18c86965b27/grpcio-1.78.0-cp311-cp311-win_amd64.whl", hash = "sha256:1afa62af6e23f88629f2b29ec9e52ec7c65a7176c1e0a83292b93c76ca882558", size = 4799767, upload-time = "2026-02-06T09:55:24.107Z" }, + { url = "https://files.pythonhosted.org/packages/4e/f4/7384ed0178203d6074446b3c4f46c90a22ddf7ae0b3aee521627f54cfc2a/grpcio-1.78.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:f9ab915a267fc47c7e88c387a3a28325b58c898e23d4995f765728f4e3dedb97", size = 5913985, upload-time = "2026-02-06T09:55:26.832Z" }, + { url = "https://files.pythonhosted.org/packages/81/ed/be1caa25f06594463f685b3790b320f18aea49b33166f4141bfdc2bfb236/grpcio-1.78.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3f8904a8165ab21e07e58bf3e30a73f4dffc7a1e0dbc32d51c61b5360d26f43e", size = 11811853, upload-time = "2026-02-06T09:55:29.224Z" }, + { url = "https://files.pythonhosted.org/packages/24/a7/f06d151afc4e64b7e3cc3e872d331d011c279aaab02831e40a81c691fb65/grpcio-1.78.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:859b13906ce098c0b493af92142ad051bf64c7870fa58a123911c88606714996", size = 6475766, upload-time = "2026-02-06T09:55:31.825Z" }, + { url = "https://files.pythonhosted.org/packages/8a/a8/4482922da832ec0082d0f2cc3a10976d84a7424707f25780b82814aafc0a/grpcio-1.78.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b2342d87af32790f934a79c3112641e7b27d63c261b8b4395350dad43eff1dc7", size = 7170027, upload-time = "2026-02-06T09:55:34.7Z" }, + { url = "https://files.pythonhosted.org/packages/54/bf/f4a3b9693e35d25b24b0b39fa46d7d8a3c439e0a3036c3451764678fec20/grpcio-1.78.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:12a771591ae40bc65ba67048fa52ef4f0e6db8279e595fd349f9dfddeef571f9", size = 6690766, upload-time = "2026-02-06T09:55:36.902Z" }, + { url = "https://files.pythonhosted.org/packages/c7/b9/521875265cc99fe5ad4c5a17010018085cae2810a928bf15ebe7d8bcd9cc/grpcio-1.78.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:185dea0d5260cbb2d224c507bf2a5444d5abbb1fa3594c1ed7e4c709d5eb8383", size = 7266161, upload-time = "2026-02-06T09:55:39.824Z" }, + { url = "https://files.pythonhosted.org/packages/05/86/296a82844fd40a4ad4a95f100b55044b4f817dece732bf686aea1a284147/grpcio-1.78.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:51b13f9aed9d59ee389ad666b8c2214cc87b5de258fa712f9ab05f922e3896c6", size = 8253303, upload-time = "2026-02-06T09:55:42.353Z" }, + { url = "https://files.pythonhosted.org/packages/f3/e4/ea3c0caf5468537f27ad5aab92b681ed7cc0ef5f8c9196d3fd42c8c2286b/grpcio-1.78.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fd5f135b1bd58ab088930b3c613455796dfa0393626a6972663ccdda5b4ac6ce", size = 7698222, upload-time = "2026-02-06T09:55:44.629Z" }, + { url = "https://files.pythonhosted.org/packages/d7/47/7f05f81e4bb6b831e93271fb12fd52ba7b319b5402cbc101d588f435df00/grpcio-1.78.0-cp312-cp312-win32.whl", hash = "sha256:94309f498bcc07e5a7d16089ab984d42ad96af1d94b5a4eb966a266d9fcabf68", size = 4066123, upload-time = "2026-02-06T09:55:47.644Z" }, + { url = "https://files.pythonhosted.org/packages/ad/e7/d6914822c88aa2974dbbd10903d801a28a19ce9cd8bad7e694cbbcf61528/grpcio-1.78.0-cp312-cp312-win_amd64.whl", hash = "sha256:9566fe4ababbb2610c39190791e5b829869351d14369603702e890ef3ad2d06e", size = 4797657, upload-time = "2026-02-06T09:55:49.86Z" }, + { url = "https://files.pythonhosted.org/packages/05/a9/8f75894993895f361ed8636cd9237f4ab39ef87fd30db17467235ed1c045/grpcio-1.78.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:ce3a90455492bf8bfa38e56fbbe1dbd4f872a3d8eeaf7337dc3b1c8aa28c271b", size = 5920143, upload-time = "2026-02-06T09:55:52.035Z" }, + { url = "https://files.pythonhosted.org/packages/55/06/0b78408e938ac424100100fd081189451b472236e8a3a1f6500390dc4954/grpcio-1.78.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:2bf5e2e163b356978b23652c4818ce4759d40f4712ee9ec5a83c4be6f8c23a3a", size = 11803926, upload-time = "2026-02-06T09:55:55.494Z" }, + { url = "https://files.pythonhosted.org/packages/88/93/b59fe7832ff6ae3c78b813ea43dac60e295fa03606d14d89d2e0ec29f4f3/grpcio-1.78.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8f2ac84905d12918e4e55a16da17939eb63e433dc11b677267c35568aa63fc84", size = 6478628, upload-time = "2026-02-06T09:55:58.533Z" }, + { url = "https://files.pythonhosted.org/packages/ed/df/e67e3734527f9926b7d9c0dde6cd998d1d26850c3ed8eeec81297967ac67/grpcio-1.78.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b58f37edab4a3881bc6c9bca52670610e0c9ca14e2ea3cf9debf185b870457fb", size = 7173574, upload-time = "2026-02-06T09:56:01.786Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/cc03fffb07bfba982a9ec097b164e8835546980aec25ecfa5f9c1a47e022/grpcio-1.78.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:735e38e176a88ce41840c21bb49098ab66177c64c82426e24e0082500cc68af5", size = 6692639, upload-time = "2026-02-06T09:56:04.529Z" }, + { url = "https://files.pythonhosted.org/packages/bf/9a/289c32e301b85bdb67d7ec68b752155e674ee3ba2173a1858f118e399ef3/grpcio-1.78.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2045397e63a7a0ee7957c25f7dbb36ddc110e0cfb418403d110c0a7a68a844e9", size = 7268838, upload-time = "2026-02-06T09:56:08.397Z" }, + { url = "https://files.pythonhosted.org/packages/0e/79/1be93f32add280461fa4773880196572563e9c8510861ac2da0ea0f892b6/grpcio-1.78.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a9f136fbafe7ccf4ac7e8e0c28b31066e810be52d6e344ef954a3a70234e1702", size = 8251878, upload-time = "2026-02-06T09:56:10.914Z" }, + { url = "https://files.pythonhosted.org/packages/65/65/793f8e95296ab92e4164593674ae6291b204bb5f67f9d4a711489cd30ffa/grpcio-1.78.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:748b6138585379c737adc08aeffd21222abbda1a86a0dca2a39682feb9196c20", size = 7695412, upload-time = "2026-02-06T09:56:13.593Z" }, + { url = "https://files.pythonhosted.org/packages/1c/9f/1e233fe697ecc82845942c2822ed06bb522e70d6771c28d5528e4c50f6a4/grpcio-1.78.0-cp313-cp313-win32.whl", hash = "sha256:271c73e6e5676afe4fc52907686670c7cea22ab2310b76a59b678403ed40d670", size = 4064899, upload-time = "2026-02-06T09:56:15.601Z" }, + { url = "https://files.pythonhosted.org/packages/4d/27/d86b89e36de8a951501fb06a0f38df19853210f341d0b28f83f4aa0ffa08/grpcio-1.78.0-cp313-cp313-win_amd64.whl", hash = "sha256:f2d4e43ee362adfc05994ed479334d5a451ab7bc3f3fee1b796b8ca66895acb4", size = 4797393, upload-time = "2026-02-06T09:56:17.882Z" }, + { url = "https://files.pythonhosted.org/packages/29/f2/b56e43e3c968bfe822fa6ce5bca10d5c723aa40875b48791ce1029bb78c7/grpcio-1.78.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:e87cbc002b6f440482b3519e36e1313eb5443e9e9e73d6a52d43bd2004fcfd8e", size = 5920591, upload-time = "2026-02-06T09:56:20.758Z" }, + { url = "https://files.pythonhosted.org/packages/5d/81/1f3b65bd30c334167bfa8b0d23300a44e2725ce39bba5b76a2460d85f745/grpcio-1.78.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:c41bc64626db62e72afec66b0c8a0da76491510015417c127bfc53b2fe6d7f7f", size = 11813685, upload-time = "2026-02-06T09:56:24.315Z" }, + { url = "https://files.pythonhosted.org/packages/0e/1c/bbe2f8216a5bd3036119c544d63c2e592bdf4a8ec6e4a1867592f4586b26/grpcio-1.78.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8dfffba826efcf366b1e3ccc37e67afe676f290e13a3b48d31a46739f80a8724", size = 6487803, upload-time = "2026-02-06T09:56:27.367Z" }, + { url = "https://files.pythonhosted.org/packages/16/5c/a6b2419723ea7ddce6308259a55e8e7593d88464ce8db9f4aa857aba96fa/grpcio-1.78.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:74be1268d1439eaaf552c698cdb11cd594f0c49295ae6bb72c34ee31abbe611b", size = 7173206, upload-time = "2026-02-06T09:56:29.876Z" }, + { url = "https://files.pythonhosted.org/packages/df/1e/b8801345629a415ea7e26c83d75eb5dbe91b07ffe5210cc517348a8d4218/grpcio-1.78.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:be63c88b32e6c0f1429f1398ca5c09bc64b0d80950c8bb7807d7d7fb36fb84c7", size = 6693826, upload-time = "2026-02-06T09:56:32.305Z" }, + { url = "https://files.pythonhosted.org/packages/34/84/0de28eac0377742679a510784f049738a80424b17287739fc47d63c2439e/grpcio-1.78.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3c586ac70e855c721bda8f548d38c3ca66ac791dc49b66a8281a1f99db85e452", size = 7277897, upload-time = "2026-02-06T09:56:34.915Z" }, + { url = "https://files.pythonhosted.org/packages/ca/9c/ad8685cfe20559a9edb66f735afdcb2b7d3de69b13666fdfc542e1916ebd/grpcio-1.78.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:35eb275bf1751d2ffbd8f57cdbc46058e857cf3971041521b78b7db94bdaf127", size = 8252404, upload-time = "2026-02-06T09:56:37.553Z" }, + { url = "https://files.pythonhosted.org/packages/3c/05/33a7a4985586f27e1de4803887c417ec7ced145ebd069bc38a9607059e2b/grpcio-1.78.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:207db540302c884b8848036b80db352a832b99dfdf41db1eb554c2c2c7800f65", size = 7696837, upload-time = "2026-02-06T09:56:40.173Z" }, + { url = "https://files.pythonhosted.org/packages/73/77/7382241caf88729b106e49e7d18e3116216c778e6a7e833826eb96de22f7/grpcio-1.78.0-cp314-cp314-win32.whl", hash = "sha256:57bab6deef2f4f1ca76cc04565df38dc5713ae6c17de690721bdf30cb1e0545c", size = 4142439, upload-time = "2026-02-06T09:56:43.258Z" }, + { url = "https://files.pythonhosted.org/packages/48/b2/b096ccce418882fbfda4f7496f9357aaa9a5af1896a9a7f60d9f2b275a06/grpcio-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:dce09d6116df20a96acfdbf85e4866258c3758180e8c49845d6ba8248b6d0bbb", size = 4929852, upload-time = "2026-02-06T09:56:45.885Z" }, +] + +[[package]] +name = "grpcio-tools" +version = "1.78.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/d1/cbefe328653f746fd319c4377836a25ba64226e41c6a1d7d5cdbc87a459f/grpcio_tools-1.78.0.tar.gz", hash = "sha256:4b0dd86560274316e155d925158276f8564508193088bc43e20d3f5dff956b2b", size = 5393026, upload-time = "2026-02-06T09:59:59.53Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/70/2118a814a62ab205c905d221064bc09021db83fceeb84764d35c00f0f633/grpcio_tools-1.78.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:ea64e38d1caa2b8468b08cb193f5a091d169b6dbfe1c7dac37d746651ab9d84e", size = 2545568, upload-time = "2026-02-06T09:57:30.308Z" }, + { url = "https://files.pythonhosted.org/packages/2b/a9/68134839dd1a00f964185ead103646d6dd6a396b92ed264eaf521431b793/grpcio_tools-1.78.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:4003fcd5cbb5d578b06176fd45883a72a8f9203152149b7c680ce28653ad9e3a", size = 5708704, upload-time = "2026-02-06T09:57:33.512Z" }, + { url = "https://files.pythonhosted.org/packages/36/1b/b6135aa9534e22051c53e5b9c0853d18024a41c50aaff464b7b47c1ed379/grpcio_tools-1.78.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe6b0081775394c61ec633c9ff5dbc18337100eabb2e946b5c83967fe43b2748", size = 2591905, upload-time = "2026-02-06T09:57:35.338Z" }, + { url = "https://files.pythonhosted.org/packages/41/2b/6380df1390d62b1d18ae18d4d790115abf4997fa29498aa50ba644ecb9d8/grpcio_tools-1.78.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:7e989ad2cd93db52d7f1a643ecaa156ac55bf0484f1007b485979ce8aef62022", size = 2905271, upload-time = "2026-02-06T09:57:37.932Z" }, + { url = "https://files.pythonhosted.org/packages/3a/07/9b369f37c8f4956b68778c044d57390a8f0f3b1cca590018809e75a4fce2/grpcio_tools-1.78.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b874991797e96c41a37e563236c3317ed41b915eff25b292b202d6277d30da85", size = 2656234, upload-time = "2026-02-06T09:57:41.157Z" }, + { url = "https://files.pythonhosted.org/packages/51/61/40eee40e7a54f775a0d4117536532713606b6b177fff5e327f33ad18746e/grpcio_tools-1.78.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:daa8c288b728228377aaf758925692fc6068939d9fa32f92ca13dedcbeb41f33", size = 3105770, upload-time = "2026-02-06T09:57:43.373Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ac/81ee4b728e70e8ba66a589f86469925ead02ed6f8973434e4a52e3576148/grpcio_tools-1.78.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:87e648759b06133199f4bc0c0053e3819f4ec3b900dc399e1097b6065db998b5", size = 3654896, upload-time = "2026-02-06T09:57:45.402Z" }, + { url = "https://files.pythonhosted.org/packages/be/b9/facb3430ee427c800bb1e39588c85685677ea649491d6e0874bd9f3a1c0e/grpcio_tools-1.78.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f3d3ced52bfe39eba3d24f5a8fab4e12d071959384861b41f0c52ca5399d6920", size = 3322529, upload-time = "2026-02-06T09:57:47.292Z" }, + { url = "https://files.pythonhosted.org/packages/c7/de/d7a011df9abfed8c30f0d2077b0562a6e3edc57cb3e5514718e2a81f370a/grpcio_tools-1.78.0-cp310-cp310-win32.whl", hash = "sha256:4bb6ed690d417b821808796221bde079377dff98fdc850ac157ad2f26cda7a36", size = 993518, upload-time = "2026-02-06T09:57:48.836Z" }, + { url = "https://files.pythonhosted.org/packages/c8/5e/f7f60c3ae2281c6b438c3a8455f4a5d5d2e677cf20207864cbee3763da22/grpcio_tools-1.78.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c676d8342fd53bd85a5d5f0d070cd785f93bc040510014708ede6fcb32fada1", size = 1158505, upload-time = "2026-02-06T09:57:50.633Z" }, + { url = "https://files.pythonhosted.org/packages/75/78/280184d19242ed6762bf453c47a70b869b3c5c72a24dc5bf2bf43909faa3/grpcio_tools-1.78.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:6a8b8b7b49f319d29dbcf507f62984fa382d1d10437d75c3f26db5f09c4ac0af", size = 2545904, upload-time = "2026-02-06T09:57:52.769Z" }, + { url = "https://files.pythonhosted.org/packages/5b/51/3c46dea5113f68fe879961cae62d34bb7a3c308a774301b45d614952ee98/grpcio_tools-1.78.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d62cf3b68372b0c6d722a6165db41b976869811abeabc19c8522182978d8db10", size = 5709078, upload-time = "2026-02-06T09:57:56.389Z" }, + { url = "https://files.pythonhosted.org/packages/e0/2c/dc1ae9ec53182c96d56dfcbf3bcd3e55a8952ad508b188c75bf5fc8993d4/grpcio_tools-1.78.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fa9056742efeaf89d5fe14198af71e5cbc4fbf155d547b89507e19d6025906c6", size = 2591744, upload-time = "2026-02-06T09:57:58.341Z" }, + { url = "https://files.pythonhosted.org/packages/04/63/9b53fc9a9151dd24386785171a4191ee7cb5afb4d983b6a6a87408f41b28/grpcio_tools-1.78.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:e3191af125dcb705aa6bc3856ba81ba99b94121c1b6ebee152e66ea084672831", size = 2905113, upload-time = "2026-02-06T09:58:00.38Z" }, + { url = "https://files.pythonhosted.org/packages/96/b2/0ad8d789f3a2a00893131c140865605fa91671a6e6fcf9da659e1fabba10/grpcio_tools-1.78.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:283239ddbb67ae83fac111c61b25d8527a1dbd355b377cbc8383b79f1329944d", size = 2656436, upload-time = "2026-02-06T09:58:03.038Z" }, + { url = "https://files.pythonhosted.org/packages/09/4d/580f47ce2fc61b093ade747b378595f51b4f59972dd39949f7444b464a03/grpcio_tools-1.78.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ac977508c0db15301ef36d6c79769ec1a6cc4e3bc75735afca7fe7e360cead3a", size = 3106128, upload-time = "2026-02-06T09:58:05.064Z" }, + { url = "https://files.pythonhosted.org/packages/c9/29/d83b2d89f8d10e438bad36b1eb29356510fb97e81e6a608b22ae1890e8e6/grpcio_tools-1.78.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4ff605e25652a0bd13aa8a73a09bc48669c68170902f5d2bf1468a57d5e78771", size = 3654953, upload-time = "2026-02-06T09:58:07.15Z" }, + { url = "https://files.pythonhosted.org/packages/08/71/917ce85633311e54fefd7e6eb1224fb780ef317a4d092766f5630c3fc419/grpcio_tools-1.78.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0197d7b561c79be78ab93d0fe2836c8def470683df594bae3ac89dd8e5c821b2", size = 3322630, upload-time = "2026-02-06T09:58:10.305Z" }, + { url = "https://files.pythonhosted.org/packages/b2/55/3fbf6b26ab46fc79e1e6f7f4e0993cf540263dad639290299fad374a0829/grpcio_tools-1.78.0-cp311-cp311-win32.whl", hash = "sha256:28f71f591f7f39555863ced84fcc209cbf4454e85ef957232f43271ee99af577", size = 993804, upload-time = "2026-02-06T09:58:13.698Z" }, + { url = "https://files.pythonhosted.org/packages/73/86/4affe006d9e1e9e1c6653d6aafe2f8b9188acb2b563cd8ed3a2c7c0e8aec/grpcio_tools-1.78.0-cp311-cp311-win_amd64.whl", hash = "sha256:5a6de495dabf86a3b40b9a7492994e1232b077af9d63080811838b781abbe4e8", size = 1158566, upload-time = "2026-02-06T09:58:15.721Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ae/5b1fa5dd8d560a6925aa52de0de8731d319f121c276e35b9b2af7cc220a2/grpcio_tools-1.78.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:9eb122da57d4cad7d339fc75483116f0113af99e8d2c67f3ef9cae7501d806e4", size = 2546823, upload-time = "2026-02-06T09:58:17.944Z" }, + { url = "https://files.pythonhosted.org/packages/a7/ed/d33ccf7fa701512efea7e7e23333b748848a123e9d3bbafde4e126784546/grpcio_tools-1.78.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d0c501b8249940b886420e6935045c44cb818fa6f265f4c2b97d5cff9cb5e796", size = 5706776, upload-time = "2026-02-06T09:58:20.944Z" }, + { url = "https://files.pythonhosted.org/packages/c6/69/4285583f40b37af28277fc6b867d636e3b10e1b6a7ebd29391a856e1279b/grpcio_tools-1.78.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:77e5aa2d2a7268d55b1b113f958264681ef1994c970f69d48db7d4683d040f57", size = 2593972, upload-time = "2026-02-06T09:58:23.29Z" }, + { url = "https://files.pythonhosted.org/packages/d7/eb/ecc1885bd6b3147f0a1b7dff5565cab72f01c8f8aa458f682a1c77a9fb08/grpcio_tools-1.78.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:8e3c0b0e6ba5275322ba29a97bf890565a55f129f99a21b121145e9e93a22525", size = 2905531, upload-time = "2026-02-06T09:58:25.406Z" }, + { url = "https://files.pythonhosted.org/packages/ae/a9/511d0040ced66960ca10ba0f082d6b2d2ee6dd61837b1709636fdd8e23b4/grpcio_tools-1.78.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:975d4cb48694e20ebd78e1643e5f1cd94cdb6a3d38e677a8e84ae43665aa4790", size = 2656909, upload-time = "2026-02-06T09:58:28.022Z" }, + { url = "https://files.pythonhosted.org/packages/06/a3/3d2c707e7dee8df842c96fbb24feb2747e506e39f4a81b661def7fed107c/grpcio_tools-1.78.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:553ff18c5d52807dedecf25045ae70bad7a3dbba0b27a9a3cdd9bcf0a1b7baec", size = 3109778, upload-time = "2026-02-06T09:58:30.091Z" }, + { url = "https://files.pythonhosted.org/packages/1f/4b/646811ba241bf05da1f0dc6f25764f1c837f78f75b4485a4210c84b79eae/grpcio_tools-1.78.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8c7f5e4af5a84d2e96c862b1a65e958a538237e268d5f8203a3a784340975b51", size = 3658763, upload-time = "2026-02-06T09:58:32.875Z" }, + { url = "https://files.pythonhosted.org/packages/45/de/0a5ef3b3e79d1011375f5580dfee3a9c1ccb96c5f5d1c74c8cee777a2483/grpcio_tools-1.78.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:96183e2b44afc3f9a761e9d0f985c3b44e03e8bb98e626241a6cbfb3b6f7e88f", size = 3325116, upload-time = "2026-02-06T09:58:34.894Z" }, + { url = "https://files.pythonhosted.org/packages/95/d2/6391b241ad571bc3e71d63f957c0b1860f0c47932d03c7f300028880f9b8/grpcio_tools-1.78.0-cp312-cp312-win32.whl", hash = "sha256:2250e8424c565a88573f7dc10659a0b92802e68c2a1d57e41872c9b88ccea7a6", size = 993493, upload-time = "2026-02-06T09:58:37.242Z" }, + { url = "https://files.pythonhosted.org/packages/7c/8f/7d0d3a39ecad76ccc136be28274daa660569b244fa7d7d0bbb24d68e5ece/grpcio_tools-1.78.0-cp312-cp312-win_amd64.whl", hash = "sha256:217d1fa29de14d9c567d616ead7cb0fef33cde36010edff5a9390b00d52e5094", size = 1158423, upload-time = "2026-02-06T09:58:40.072Z" }, + { url = "https://files.pythonhosted.org/packages/53/ce/17311fb77530420e2f441e916b347515133e83d21cd6cc77be04ce093d5b/grpcio_tools-1.78.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2d6de1cc23bdc1baafc23e201b1e48c617b8c1418b4d8e34cebf72141676e5fb", size = 2546284, upload-time = "2026-02-06T09:58:43.073Z" }, + { url = "https://files.pythonhosted.org/packages/1d/d3/79e101483115f0e78223397daef71751b75eba7e92a32060c10aae11ca64/grpcio_tools-1.78.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:2afeaad88040894c76656202ff832cb151bceb05c0e6907e539d129188b1e456", size = 5705653, upload-time = "2026-02-06T09:58:45.533Z" }, + { url = "https://files.pythonhosted.org/packages/8b/a7/52fa3ccb39ceeee6adc010056eadfbca8198651c113e418dafebbdf2b306/grpcio_tools-1.78.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:33cc593735c93c03d63efe7a8ba25f3c66f16c52f0651910712490244facad72", size = 2592788, upload-time = "2026-02-06T09:58:48.918Z" }, + { url = "https://files.pythonhosted.org/packages/68/08/682ff6bb548225513d73dc9403742d8975439d7469c673bc534b9bbc83a7/grpcio_tools-1.78.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:2921d7989c4d83b71f03130ab415fa4d66e6693b8b8a1fcbb7a1c67cff19b812", size = 2905157, upload-time = "2026-02-06T09:58:51.478Z" }, + { url = "https://files.pythonhosted.org/packages/b2/66/264f3836a96423b7018e5ada79d62576a6401f6da4e1f4975b18b2be1265/grpcio_tools-1.78.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e6a0df438e82c804c7b95e3f311c97c2f876dcc36376488d5b736b7bcf5a9b45", size = 2656166, upload-time = "2026-02-06T09:58:54.117Z" }, + { url = "https://files.pythonhosted.org/packages/f3/6b/f108276611522e03e98386b668cc7e575eff6952f2db9caa15b2a3b3e883/grpcio_tools-1.78.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e9c6070a9500798225191ef25d0055a15d2c01c9c8f2ee7b681fffa99c98c822", size = 3109110, upload-time = "2026-02-06T09:58:56.891Z" }, + { url = "https://files.pythonhosted.org/packages/6f/c7/cf048dbcd64b3396b3c860a2ffbcc67a8f8c87e736aaa74c2e505a7eee4c/grpcio_tools-1.78.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:394e8b57d85370a62e5b0a4d64c96fcf7568345c345d8590c821814d227ecf1d", size = 3657863, upload-time = "2026-02-06T09:58:59.176Z" }, + { url = "https://files.pythonhosted.org/packages/b6/37/e2736912c8fda57e2e57a66ea5e0bc8eb9a5fb7ded00e866ad22d50afb08/grpcio_tools-1.78.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a3ef700293ab375e111a2909d87434ed0a0b086adf0ce67a8d9cf12ea7765e63", size = 3324748, upload-time = "2026-02-06T09:59:01.242Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/726abc75bb5bfc2841e88ea05896e42f51ca7c30cb56da5c5b63058b3867/grpcio_tools-1.78.0-cp313-cp313-win32.whl", hash = "sha256:6993b960fec43a8d840ee5dc20247ef206c1a19587ea49fe5e6cc3d2a09c1585", size = 993074, upload-time = "2026-02-06T09:59:03.085Z" }, + { url = "https://files.pythonhosted.org/packages/c5/68/91b400bb360faf9b177ffb5540ec1c4d06ca923691ddf0f79e2c9683f4da/grpcio_tools-1.78.0-cp313-cp313-win_amd64.whl", hash = "sha256:275ce3c2978842a8cf9dd88dce954e836e590cf7029649ad5d1145b779039ed5", size = 1158185, upload-time = "2026-02-06T09:59:05.036Z" }, + { url = "https://files.pythonhosted.org/packages/cf/5e/278f3831c8d56bae02e3acc570465648eccf0a6bbedcb1733789ac966803/grpcio_tools-1.78.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:8b080d0d072e6032708a3a91731b808074d7ab02ca8fb9847b6a011fdce64cd9", size = 2546270, upload-time = "2026-02-06T09:59:07.426Z" }, + { url = "https://files.pythonhosted.org/packages/a3/d9/68582f2952b914b60dddc18a2e3f9c6f09af9372b6f6120d6cf3ec7f8b4e/grpcio_tools-1.78.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8c0ad8f8f133145cd7008b49cb611a5c6a9d89ab276c28afa17050516e801f79", size = 5705731, upload-time = "2026-02-06T09:59:09.856Z" }, + { url = "https://files.pythonhosted.org/packages/70/68/feb0f9a48818ee1df1e8b644069379a1e6ef5447b9b347c24e96fd258e5d/grpcio_tools-1.78.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2f8ea092a7de74c6359335d36f0674d939a3c7e1a550f4c2c9e80e0226de8fe4", size = 2593896, upload-time = "2026-02-06T09:59:12.23Z" }, + { url = "https://files.pythonhosted.org/packages/1f/08/a430d8d06e1b8d33f3e48d3f0cc28236723af2f35e37bd5c8db05df6c3aa/grpcio_tools-1.78.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:da422985e0cac822b41822f43429c19ecb27c81ffe3126d0b74e77edec452608", size = 2905298, upload-time = "2026-02-06T09:59:14.458Z" }, + { url = "https://files.pythonhosted.org/packages/71/0a/348c36a3eae101ca0c090c9c3bc96f2179adf59ee0c9262d11cdc7bfe7db/grpcio_tools-1.78.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4fab1faa3fbcb246263e68da7a8177d73772283f9db063fb8008517480888d26", size = 2656186, upload-time = "2026-02-06T09:59:16.949Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3f/18219f331536fad4af6207ade04142292faa77b5cb4f4463787988963df8/grpcio_tools-1.78.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:dd9c094f73f734becae3f20f27d4944d3cd8fb68db7338ee6c58e62fc5c3d99f", size = 3109859, upload-time = "2026-02-06T09:59:19.202Z" }, + { url = "https://files.pythonhosted.org/packages/5b/d9/341ea20a44c8e5a3a18acc820b65014c2e3ea5b4f32a53d14864bcd236bc/grpcio_tools-1.78.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:2ed51ce6b833068f6c580b73193fc2ec16468e6bc18354bc2f83a58721195a58", size = 3657915, upload-time = "2026-02-06T09:59:21.839Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f4/5978b0f91611a64371424c109dd0027b247e5b39260abad2eaee66b6aa37/grpcio_tools-1.78.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:05803a5cdafe77c8bdf36aa660ad7a6a1d9e49bc59ce45c1bade2a4698826599", size = 3324724, upload-time = "2026-02-06T09:59:24.402Z" }, + { url = "https://files.pythonhosted.org/packages/b2/80/96a324dba99cfbd20e291baf0b0ae719dbb62b76178c5ce6c788e7331cb1/grpcio_tools-1.78.0-cp314-cp314-win32.whl", hash = "sha256:f7c722e9ce6f11149ac5bddd5056e70aaccfd8168e74e9d34d8b8b588c3f5c7c", size = 1015505, upload-time = "2026-02-06T09:59:26.3Z" }, + { url = "https://files.pythonhosted.org/packages/3b/d1/909e6a05bfd44d46327dc4b8a78beb2bae4fb245ffab2772e350081aaf7e/grpcio_tools-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:7d58ade518b546120ec8f0a8e006fc8076ae5df151250ebd7e82e9b5e152c229", size = 1190196, upload-time = "2026-02-06T09:59:28.359Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -3067,6 +3195,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, ] +[[package]] +name = "protobuf" +version = "6.33.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/25/7c72c307aafc96fa87062aa6291d9f7c94836e43214d43722e86037aac02/protobuf-6.33.5.tar.gz", hash = "sha256:6ddcac2a081f8b7b9642c09406bc6a4290128fce5f471cddd165960bb9119e5c", size = 444465, upload-time = "2026-01-29T21:51:33.494Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/79/af92d0a8369732b027e6d6084251dd8e782c685c72da161bd4a2e00fbabb/protobuf-6.33.5-cp310-abi3-win32.whl", hash = "sha256:d71b040839446bac0f4d162e758bea99c8251161dae9d0983a3b88dee345153b", size = 425769, upload-time = "2026-01-29T21:51:21.751Z" }, + { url = "https://files.pythonhosted.org/packages/55/75/bb9bc917d10e9ee13dee8607eb9ab963b7cf8be607c46e7862c748aa2af7/protobuf-6.33.5-cp310-abi3-win_amd64.whl", hash = "sha256:3093804752167bcab3998bec9f1048baae6e29505adaf1afd14a37bddede533c", size = 437118, upload-time = "2026-01-29T21:51:24.022Z" }, + { url = "https://files.pythonhosted.org/packages/a2/6b/e48dfc1191bc5b52950246275bf4089773e91cb5ba3592621723cdddca62/protobuf-6.33.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5", size = 427766, upload-time = "2026-01-29T21:51:25.413Z" }, + { url = "https://files.pythonhosted.org/packages/4e/b1/c79468184310de09d75095ed1314b839eb2f72df71097db9d1404a1b2717/protobuf-6.33.5-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:9b71e0281f36f179d00cbcb119cb19dec4d14a81393e5ea220f64b286173e190", size = 324638, upload-time = "2026-01-29T21:51:26.423Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f5/65d838092fd01c44d16037953fd4c2cc851e783de9b8f02b27ec4ffd906f/protobuf-6.33.5-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8afa18e1d6d20af15b417e728e9f60f3aa108ee76f23c3b2c07a2c3b546d3afd", size = 339411, upload-time = "2026-01-29T21:51:27.446Z" }, + { url = "https://files.pythonhosted.org/packages/9b/53/a9443aa3ca9ba8724fdfa02dd1887c1bcd8e89556b715cfbacca6b63dbec/protobuf-6.33.5-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0", size = 323465, upload-time = "2026-01-29T21:51:28.925Z" }, + { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" }, +] + [[package]] name = "psutil" version = "7.2.2" @@ -4075,6 +4218,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/70/b84f9944a03964a88031ef6ac219b6c91e8ba2f373362329d8770ef36f02/semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4", size = 12901, upload-time = "2020-10-20T20:16:52.583Z" }, ] +[[package]] +name = "setuptools" +version = "82.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" }, +] + [[package]] name = "shellingham" version = "1.5.4" From 0be2fbf3731b500bef65a8f045c58a32e43e3dfc Mon Sep 17 00:00:00 2001 From: Kristian Rickert Date: Sun, 15 Mar 2026 21:30:40 -0400 Subject: [PATCH 2/2] DCO Remediation Commit for Kristian Rickert I, Kristian Rickert , hereby add my Signed-off-by to this commit: e233312725f9e56a1bf3575cfaf61b6b1e35574e Signed-off-by: Kristian Rickert