From 9c2ef94ba350fd841d0529007c665266dde05b47 Mon Sep 17 00:00:00 2001 From: HonahX Date: Sat, 16 Dec 2023 20:32:05 -0800 Subject: [PATCH 1/2] implement table properties update --- pyiceberg/table/__init__.py | 25 +++++++++++++++++ tests/table/test_init.py | 53 +++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 40b37cc248..f51266f239 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -414,6 +414,31 @@ def _(update: UpgradeFormatVersionUpdate, base_metadata: TableMetadata, context: return TableMetadataUtil.parse_obj(updated_metadata_data) +@_apply_table_update.register(SetPropertiesUpdate) +def _(update: SetPropertiesUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: + if len(update.updates) == 0: + return base_metadata + + properties = dict(base_metadata.properties) + properties.update(update.updates) + + context.add_update(update) + return base_metadata.model_copy(update={"properties": properties}) + + +@_apply_table_update.register(RemovePropertiesUpdate) +def _(update: RemovePropertiesUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: + if len(update.removals) == 0: + return base_metadata + + properties = dict(base_metadata.properties) + for key in update.removals: + properties.pop(key) + + context.add_update(update) + return base_metadata.model_copy(update={"properties": properties}) + + @_apply_table_update.register(AddSchemaUpdate) def _(update: AddSchemaUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: if update.last_column_id < base_metadata.last_column_id: diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 04d467c318..05ae36e43a 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -49,6 +49,7 @@ AssertLastAssignedPartitionId, AssertRefSnapshotId, AssertTableUUID, + RemovePropertiesUpdate, SetPropertiesUpdate, SetSnapshotRefUpdate, SnapshotRef, @@ -526,6 +527,51 @@ def test_add_nested_list_type_column(table_v2: Table) -> None: assert new_schema.highest_field_id == 7 +def test_apply_set_properties_update(table_v2: Table) -> None: + base_metadata = table_v2.metadata + + new_metadata_no_update = update_table_metadata(base_metadata, (SetPropertiesUpdate(updates={}),)) + assert new_metadata_no_update == base_metadata + + new_metadata = update_table_metadata( + base_metadata, (SetPropertiesUpdate(updates={"read.split.target.size": "123", "test_a": "test_a", "test_b": "test_b"}),) + ) + + assert base_metadata.properties == {"read.split.target.size": "134217728"} + assert new_metadata.properties == {"read.split.target.size": "123", "test_a": "test_a", "test_b": "test_b"} + + new_metadata_add_only = update_table_metadata(new_metadata, (SetPropertiesUpdate(updates={"test_c": "test_c"}),)) + + assert new_metadata_add_only.properties == { + "read.split.target.size": "123", + "test_a": "test_a", + "test_b": "test_b", + "test_c": "test_c", + } + + +def test_apply_remove_properties_update(table_v2: Table) -> None: + base_metadata = update_table_metadata( + table_v2.metadata, + (SetPropertiesUpdate(updates={"test_a": "test_a", "test_b": "test_b", "test_c": "test_c", "test_d": "test_d"}),), + ) + + new_metadata_no_removal = update_table_metadata(base_metadata, (RemovePropertiesUpdate(removals=[]),)) + + assert base_metadata == new_metadata_no_removal + + new_metadata = update_table_metadata(base_metadata, (RemovePropertiesUpdate(removals=["test_a", "test_c"]),)) + + assert base_metadata.properties == { + "read.split.target.size": "134217728", + "test_a": "test_a", + "test_b": "test_b", + "test_c": "test_c", + "test_d": "test_d", + } + assert new_metadata.properties == {"read.split.target.size": "134217728", "test_b": "test_b", "test_d": "test_d"} + + def test_apply_add_schema_update(table_v2: Table) -> None: transaction = table_v2.transaction() update = transaction.update_schema() @@ -626,6 +672,8 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None: schema_update_1.add_column(path="b", field_type=IntegerType()) schema_update_1.commit() + transaction.set_properties(owner="test", test_a="test_a", test_b="test_b", test_c="test_c") + test_updates = transaction._updates # pylint: disable=W0212 new_snapshot = Snapshot( @@ -640,6 +688,7 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None: test_updates += ( AddSnapshotUpdate(snapshot=new_snapshot), + SetPropertiesUpdate(updates={"test_a": "test_a1"}), SetSnapshotRefUpdate( ref_name="main", type="branch", @@ -648,6 +697,7 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None: max_snapshot_age_ms=12312312312, min_snapshots_to_keep=1, ), + RemovePropertiesUpdate(removals=["test_c", "test_b"]), ) new_metadata = update_table_metadata(base_metadata, test_updates) @@ -682,6 +732,9 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None: max_ref_age_ms=123123123, ) + # Set/RemovePropertiesUpdate + assert new_metadata.properties == {"owner": "test", "test_a": "test_a1"} + def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None: base_metadata = table_v1.metadata From 2ac23aca0ad2e423a2e43c4eafb94b5e2e76cd19 Mon Sep 17 00:00:00 2001 From: HonahX Date: Sat, 13 Jan 2024 17:06:44 -0800 Subject: [PATCH 2/2] add test for glue catalog --- tests/catalog/integration_test_glue.py | 17 +++++++++++++++++ tests/catalog/test_glue.py | 22 ++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py index 99f0adac40..24401cae39 100644 --- a/tests/catalog/integration_test_glue.py +++ b/tests/catalog/integration_test_glue.py @@ -294,3 +294,20 @@ def test_commit_table_update_schema( assert new_schema assert new_schema == update._apply() assert new_schema.find_field("b").field_type == IntegerType() + + +def test_commit_table_properties(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: + identifier = (database_name, table_name) + test_catalog.create_namespace(namespace=database_name) + table = test_catalog.create_table(identifier=identifier, schema=table_schema_nested, properties={"test_a": "test_a"}) + + assert test_catalog._parse_metadata_version(table.metadata_location) == 0 + + transaction = table.transaction() + transaction.set_properties(test_a="test_aa", test_b="test_b", test_c="test_c") + transaction.remove_properties("test_b") + transaction.commit_transaction() + + updated_table_metadata = table.metadata + assert test_catalog._parse_metadata_version(table.metadata_location) == 1 + assert updated_table_metadata.properties == {"test_a": "test_aa", "test_c": "test_c"} diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index f84ed4ad20..bf6d11784f 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -553,3 +553,25 @@ def test_commit_table_update_schema( assert new_schema assert new_schema == update._apply() assert new_schema.find_field("b").field_type == IntegerType() + + +@mock_glue +def test_commit_table_properties( + _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str +) -> None: + catalog_name = "glue" + identifier = (database_name, table_name) + test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"}) + test_catalog.create_namespace(namespace=database_name) + table = test_catalog.create_table(identifier=identifier, schema=table_schema_nested, properties={"test_a": "test_a"}) + + assert test_catalog._parse_metadata_version(table.metadata_location) == 0 + + transaction = table.transaction() + transaction.set_properties(test_a="test_aa", test_b="test_b", test_c="test_c") + transaction.remove_properties("test_b") + transaction.commit_transaction() + + updated_table_metadata = table.metadata + assert test_catalog._parse_metadata_version(table.metadata_location) == 1 + assert updated_table_metadata.properties == {"test_a": "test_aa", "test_c": "test_c"}