From be138e7626079613c1f07949521ea4dab1084275 Mon Sep 17 00:00:00 2001 From: Prashant Kumar Singh Date: Tue, 4 Nov 2025 15:50:33 +0000 Subject: [PATCH 1/2] Core: Add snapshotTransformer API for TableMetadata --- .../apache/iceberg/RewriteTablePathUtil.java | 3 +- .../org/apache/iceberg/TableMetadata.java | 45 +++- .../apache/iceberg/TableMetadataParser.java | 3 +- .../org/apache/iceberg/TestTableMetadata.java | 250 +++++++++++++++++- 4 files changed, 279 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java index ee7679f5e972..5264e8fbae2d 100644 --- a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java +++ b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java @@ -136,7 +136,8 @@ public static TableMetadata replacePaths( metadata.partitionStatisticsFiles(), sourcePrefix, targetPrefix), metadata.nextRowId(), metadata.encryptionKeys(), - metadata.changes()); + metadata.changes(), + null); } private static Map updateProperties( diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 3c2a3eb9b7a7..5b2d5af820ae 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -263,6 +263,7 @@ public String toString() { private final List changes; private final long nextRowId; private final List encryptionKeys; + private final transient Function snapshotTransformer; private SerializableSupplier> snapshotsSupplier; private volatile List snapshots; private volatile Map snapshotsById; @@ -296,7 +297,8 @@ public String toString() { List partitionStatisticsFiles, long nextRowId, List encryptionKeys, - List changes) { + List changes, + Function snapshotTransformer) { Preconditions.checkArgument( specs != null && !specs.isEmpty(), "Partition specs cannot be null or empty"); Preconditions.checkArgument( @@ -333,7 +335,9 @@ public String toString() { this.sortOrders = sortOrders; this.properties = properties; this.currentSnapshotId = currentSnapshotId; - this.snapshots = snapshots; + this.snapshotTransformer = + snapshotTransformer != null ? snapshotTransformer : Function.identity(); + this.snapshots = snapshots != null ? applySnapshotTransformer(snapshots) : null; this.snapshotsSupplier = snapshotsSupplier; this.snapshotsLoaded = snapshotsSupplier == null; this.snapshotLog = snapshotLog; @@ -343,11 +347,15 @@ public String toString() { // changes are carried through until metadata is read from a file this.changes = changes; - this.snapshotsById = indexAndValidateSnapshots(snapshots, lastSequenceNumber); + this.snapshotsById = + this.snapshots != null + ? indexAndValidateSnapshots(this.snapshots, lastSequenceNumber) + : ImmutableMap.of(); this.schemasById = indexSchemas(); this.specsById = PartitionUtil.indexSpecs(specs); this.sortOrdersById = indexSortOrders(sortOrders); - this.refs = validateRefs(currentSnapshotId, refs, snapshotsById); + this.refs = + this.snapshots != null ? validateRefs(currentSnapshotId, refs, snapshotsById) : refs; this.statisticsFiles = ImmutableList.copyOf(statisticsFiles); this.partitionStatisticsFiles = ImmutableList.copyOf(partitionStatisticsFiles); @@ -395,7 +403,9 @@ public String toString() { previous.timestampMillis); } - validateCurrentSnapshot(); + if (this.snapshots != null) { + validateCurrentSnapshot(); + } } public int formatVersion() { @@ -533,7 +543,9 @@ private synchronized void ensureSnapshotsLoaded() { List loadedSnapshots = Lists.newArrayList(snapshotsSupplier.get()); loadedSnapshots.removeIf(s -> s.sequenceNumber() > lastSequenceNumber); - this.snapshots = ImmutableList.copyOf(loadedSnapshots); + List transformedSnapshots = applySnapshotTransformer(loadedSnapshots); + + this.snapshots = ImmutableList.copyOf(transformedSnapshots); this.snapshotsById = indexAndValidateSnapshots(snapshots, lastSequenceNumber); validateCurrentSnapshot(); @@ -609,6 +621,14 @@ public TableMetadata removeSnapshotsIf(Predicate removeIf) { return new Builder(this).removeSnapshots(toRemove).build(); } + public TableMetadata transformSnapshots(Function transformer) { + return new Builder(this).transformSnapshots(transformer).build(); + } + + private List applySnapshotTransformer(List snapshotList) { + return snapshotList.stream().map(snapshotTransformer).collect(Collectors.toList()); + } + public TableMetadata replaceProperties(Map rawProperties) { ValidationException.check(rawProperties != null, "Cannot set properties to null"); Map newProperties = unreservedProperties(rawProperties); @@ -916,6 +936,7 @@ public static class Builder { private boolean suppressHistoricalSnapshots = false; private long nextRowId; private final List encryptionKeys; + private Function snapshotTransformer = Function.identity(); // change tracking private final List changes; @@ -1516,6 +1537,12 @@ public Builder discardChanges() { return this; } + public Builder transformSnapshots(Function transformer) { + Preconditions.checkArgument(transformer != null, "Snapshot transformer cannot be null"); + this.snapshotTransformer = transformer; + return this; + } + public Builder setPreviousFileLocation(String previousFileLocation) { this.previousFileLocation = previousFileLocation; return this; @@ -1526,7 +1553,8 @@ private boolean hasChanges() { || (discardChanges && !changes.isEmpty()) || metadataLocation != null || suppressHistoricalSnapshots - || null != snapshotsSupplier; + || null != snapshotsSupplier + || !snapshotTransformer.equals(Function.identity()); } public TableMetadata build() { @@ -1590,7 +1618,8 @@ public TableMetadata build() { .collect(Collectors.toList()), nextRowId, encryptionKeys, - discardChanges ? ImmutableList.of() : ImmutableList.copyOf(changes)); + discardChanges ? ImmutableList.of() : ImmutableList.copyOf(changes), + snapshotTransformer); } private int addSchemaInternal(Schema schema, int newLastColumnId) { diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index eeeeeab8a699..cdd9becf3c48 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -580,7 +580,8 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { partitionStatisticsFiles, lastRowId, keys, - ImmutableList.of() /* no changes from the file */); + ImmutableList.of() /* no changes from the file */, + null /* no snapshot transformer */); } private static Map refsFromJson(JsonNode refMap) { diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 345f506fa978..36af2925bcd9 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -202,7 +202,8 @@ public void testJsonConversion() throws Exception { partitionStatisticsFiles, 40, ImmutableList.of(), - ImmutableList.of()); + ImmutableList.of(), + null); String asJson = TableMetadataParser.toJson(expected); TableMetadata metadata = TableMetadataParser.fromJson(asJson); @@ -307,7 +308,8 @@ public void testBackwardCompat() throws Exception { ImmutableList.of(), 0, ImmutableList.of(), - ImmutableList.of()); + ImmutableList.of(), + null); String asJson = toJsonWithoutSpecAndSchemaList(expected); TableMetadata metadata = TableMetadataParser.fromJson(asJson); @@ -431,7 +433,8 @@ public void testInvalidMainBranch() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of())) + ImmutableList.of(), + null)) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Current snapshot ID does not match main branch"); } @@ -479,7 +482,8 @@ public void testMainWithoutCurrent() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of())) + ImmutableList.of(), + null)) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Current snapshot is not set, but main branch exists"); } @@ -521,7 +525,8 @@ public void testBranchSnapshotMissing() { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of())) + ImmutableList.of(), + null)) .isInstanceOf(IllegalArgumentException.class) .hasMessageEndingWith("does not exist in the existing snapshots list"); } @@ -640,7 +645,8 @@ public void testJsonWithPreviousMetadataLog() throws Exception { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of()); + ImmutableList.of(), + null); String asJson = TableMetadataParser.toJson(base); TableMetadata metadataFromJson = TableMetadataParser.fromJson(asJson); @@ -731,7 +737,8 @@ public void testAddPreviousMetadataRemoveNone() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of()); + ImmutableList.of(), + null); previousMetadataLog.add(latestPreviousMetadata); @@ -837,7 +844,8 @@ public void testAddPreviousMetadataRemoveOne() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of()); + ImmutableList.of(), + null); previousMetadataLog.add(latestPreviousMetadata); @@ -947,7 +955,8 @@ public void testAddPreviousMetadataRemoveMultiple() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of()); + ImmutableList.of(), + null); previousMetadataLog.add(latestPreviousMetadata); @@ -995,7 +1004,8 @@ public void testV2UUIDValidation() { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of())) + ImmutableList.of(), + null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("UUID is required in format v2"); } @@ -1032,7 +1042,8 @@ public void testVersionValidation() { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of())) + ImmutableList.of(), + null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Unsupported format version: v%s (supported: v%s)", @@ -1080,7 +1091,8 @@ public void testVersionValidation() { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of())) + ImmutableList.of(), + null)) .isNotNull(); assertThat( @@ -1967,4 +1979,218 @@ public void testMetadataWithRemoveSchemas() { assertThat(meta.changes()).anyMatch(u -> u instanceof MetadataUpdate.RemoveSchemas); } + + @Test + public void testTransformSnapshotsWithoutLazyLoading() { + long snapshotId1 = System.currentTimeMillis(); + Map summary1 = + ImmutableMap.of( + "total-records", "100", + "total-files", "10", + "operation", "append"); + + Snapshot snapshot1 = + new BaseSnapshot( + 0, + snapshotId1, + null, + snapshotId1, + "append", + summary1, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest1.avro", + null, + null, + null); + + long snapshotId2 = snapshotId1 + 1; + Map summary2 = + ImmutableMap.of( + "total-records", "200", + "total-files", "20", + "operation", "overwrite"); + + Snapshot snapshot2 = + new BaseSnapshot( + 1, + snapshotId2, + snapshotId1, + snapshotId2, + "overwrite", + summary2, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest2.avro", + null, + null, + null); + + TableMetadata base = + new TableMetadata( + null, + 2, + UUID.randomUUID().toString(), + TEST_LOCATION, + 1, + System.currentTimeMillis(), + LAST_ASSIGNED_COLUMN_ID, + TEST_SCHEMA.schemaId(), + ImmutableList.of(TEST_SCHEMA), + SPEC_5.specId(), + ImmutableList.of(SPEC_5), + SPEC_5.lastAssignedFieldId(), + SORT_ORDER_3.orderId(), + ImmutableList.of(SORT_ORDER_3), + ImmutableMap.of(), + snapshotId2, + ImmutableList.of(snapshot1, snapshot2), + null, + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of( + SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId2).build()), + ImmutableList.of(), + ImmutableList.of(), + 0L, + ImmutableList.of(), + ImmutableList.of(), + null); + + assertThat(base.snapshots()).hasSize(2); + assertThat(base.snapshots().get(0).summary()).isEqualTo(summary1); + assertThat(base.snapshots().get(1).summary()).isEqualTo(summary2); + + TableMetadata transformed = + base.transformSnapshots( + snapshot -> { + if (snapshot instanceof BaseSnapshot) { + BaseSnapshot tSnapshot = (BaseSnapshot) snapshot; + return new BaseSnapshot( + tSnapshot.sequenceNumber(), + tSnapshot.snapshotId(), + tSnapshot.parentId(), + tSnapshot.timestampMillis(), + tSnapshot.operation(), + ImmutableMap.of(), + tSnapshot.schemaId(), + tSnapshot.manifestListLocation(), + tSnapshot.firstRowId(), + tSnapshot.addedRows(), + tSnapshot.keyId()); + } + return snapshot; + }); + + assertThat(transformed.snapshots()).hasSize(2); + assertThat(transformed.snapshots().get(0).summary()).isEmpty(); + assertThat(transformed.snapshots().get(1).summary()).isEmpty(); + assertThat(transformed.snapshots().get(0).snapshotId()).isEqualTo(snapshotId1); + assertThat(transformed.snapshots().get(1).snapshotId()).isEqualTo(snapshotId2); + assertThat(transformed.snapshots().get(0).operation()).isEqualTo("append"); + assertThat(transformed.snapshots().get(1).operation()).isEqualTo("overwrite"); + } + + @Test + public void testTransformSnapshotsWithLazyLoading() { + long snapshotId1 = System.currentTimeMillis(); + Map summary1 = + ImmutableMap.of( + "total-records", "100", + "total-files", "10", + "operation", "append"); + + Snapshot snapshot1 = + new BaseSnapshot( + 0, + snapshotId1, + null, + snapshotId1, + "append", + summary1, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest1.avro", + null, + null, + null); + + long snapshotId2 = snapshotId1 + 1; + Map summary2 = + ImmutableMap.of( + "total-records", "200", + "total-files", "20", + "operation", "overwrite"); + + Snapshot snapshot2 = + new BaseSnapshot( + 1, + snapshotId2, + snapshotId1, + snapshotId2, + "overwrite", + summary2, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest2.avro", + null, + null, + null); + + TableMetadata base = + new TableMetadata( + null, + 2, + UUID.randomUUID().toString(), + TEST_LOCATION, + 1, + System.currentTimeMillis(), + LAST_ASSIGNED_COLUMN_ID, + TEST_SCHEMA.schemaId(), + ImmutableList.of(TEST_SCHEMA), + SPEC_5.specId(), + ImmutableList.of(SPEC_5), + SPEC_5.lastAssignedFieldId(), + SORT_ORDER_3.orderId(), + ImmutableList.of(SORT_ORDER_3), + ImmutableMap.of(), + snapshotId2, + null, + () -> ImmutableList.of(snapshot1, snapshot2), + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of( + SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId2).build()), + ImmutableList.of(), + ImmutableList.of(), + 0L, + ImmutableList.of(), + ImmutableList.of(), + null); + + TableMetadata transformed = + base.transformSnapshots( + snapshot -> { + if (snapshot instanceof BaseSnapshot) { + BaseSnapshot tSnapshot = (BaseSnapshot) snapshot; + return new BaseSnapshot( + tSnapshot.sequenceNumber(), + tSnapshot.snapshotId(), + tSnapshot.parentId(), + tSnapshot.timestampMillis(), + tSnapshot.operation(), + ImmutableMap.of(), + tSnapshot.schemaId(), + tSnapshot.manifestListLocation(), + tSnapshot.firstRowId(), + tSnapshot.addedRows(), + tSnapshot.keyId()); + } + return snapshot; + }); + + assertThat(transformed.snapshots()).hasSize(2); + assertThat(transformed.snapshots().get(0).summary()).isEmpty(); + assertThat(transformed.snapshots().get(1).summary()).isEmpty(); + assertThat(transformed.snapshots().get(0).snapshotId()).isEqualTo(snapshotId1); + assertThat(transformed.snapshots().get(1).snapshotId()).isEqualTo(snapshotId2); + assertThat(transformed.snapshots().get(0).operation()).isEqualTo("append"); + assertThat(transformed.snapshots().get(1).operation()).isEqualTo("overwrite"); + } } From 0ccb33dd90d111f2414038cba325c9d385d54623 Mon Sep 17 00:00:00 2001 From: Prashant Kumar Singh Date: Tue, 25 Nov 2025 01:47:31 +0000 Subject: [PATCH 2/2] Add projection for TableMetadata --- .../apache/iceberg/RewriteTablePathUtil.java | 3 +- .../org/apache/iceberg/TableMetadata.java | 37 +- .../apache/iceberg/TableMetadataParser.java | 3 +- .../iceberg/TableMetadataProjection.java | 204 +++++++ .../org/apache/iceberg/TestTableMetadata.java | 250 +-------- .../iceberg/TestTableMetadataProjection.java | 528 ++++++++++++++++++ 6 files changed, 755 insertions(+), 270 deletions(-) create mode 100644 core/src/main/java/org/apache/iceberg/TableMetadataProjection.java create mode 100644 core/src/test/java/org/apache/iceberg/TestTableMetadataProjection.java diff --git a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java index 5264e8fbae2d..ee7679f5e972 100644 --- a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java +++ b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java @@ -136,8 +136,7 @@ public static TableMetadata replacePaths( metadata.partitionStatisticsFiles(), sourcePrefix, targetPrefix), metadata.nextRowId(), metadata.encryptionKeys(), - metadata.changes(), - null); + metadata.changes()); } private static Map updateProperties( diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 5b2d5af820ae..1c98a3588683 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -263,7 +263,6 @@ public String toString() { private final List changes; private final long nextRowId; private final List encryptionKeys; - private final transient Function snapshotTransformer; private SerializableSupplier> snapshotsSupplier; private volatile List snapshots; private volatile Map snapshotsById; @@ -297,8 +296,7 @@ public String toString() { List partitionStatisticsFiles, long nextRowId, List encryptionKeys, - List changes, - Function snapshotTransformer) { + List changes) { Preconditions.checkArgument( specs != null && !specs.isEmpty(), "Partition specs cannot be null or empty"); Preconditions.checkArgument( @@ -335,9 +333,7 @@ public String toString() { this.sortOrders = sortOrders; this.properties = properties; this.currentSnapshotId = currentSnapshotId; - this.snapshotTransformer = - snapshotTransformer != null ? snapshotTransformer : Function.identity(); - this.snapshots = snapshots != null ? applySnapshotTransformer(snapshots) : null; + this.snapshots = snapshots; this.snapshotsSupplier = snapshotsSupplier; this.snapshotsLoaded = snapshotsSupplier == null; this.snapshotLog = snapshotLog; @@ -532,6 +528,10 @@ public Snapshot currentSnapshot() { return snapshotsById.get(currentSnapshotId); } + public long currentSnapshotId() { + return currentSnapshotId; + } + public List snapshots() { ensureSnapshotsLoaded(); @@ -543,9 +543,7 @@ private synchronized void ensureSnapshotsLoaded() { List loadedSnapshots = Lists.newArrayList(snapshotsSupplier.get()); loadedSnapshots.removeIf(s -> s.sequenceNumber() > lastSequenceNumber); - List transformedSnapshots = applySnapshotTransformer(loadedSnapshots); - - this.snapshots = ImmutableList.copyOf(transformedSnapshots); + this.snapshots = ImmutableList.copyOf(loadedSnapshots); this.snapshotsById = indexAndValidateSnapshots(snapshots, lastSequenceNumber); validateCurrentSnapshot(); @@ -621,14 +619,6 @@ public TableMetadata removeSnapshotsIf(Predicate removeIf) { return new Builder(this).removeSnapshots(toRemove).build(); } - public TableMetadata transformSnapshots(Function transformer) { - return new Builder(this).transformSnapshots(transformer).build(); - } - - private List applySnapshotTransformer(List snapshotList) { - return snapshotList.stream().map(snapshotTransformer).collect(Collectors.toList()); - } - public TableMetadata replaceProperties(Map rawProperties) { ValidationException.check(rawProperties != null, "Cannot set properties to null"); Map newProperties = unreservedProperties(rawProperties); @@ -936,7 +926,6 @@ public static class Builder { private boolean suppressHistoricalSnapshots = false; private long nextRowId; private final List encryptionKeys; - private Function snapshotTransformer = Function.identity(); // change tracking private final List changes; @@ -1537,12 +1526,6 @@ public Builder discardChanges() { return this; } - public Builder transformSnapshots(Function transformer) { - Preconditions.checkArgument(transformer != null, "Snapshot transformer cannot be null"); - this.snapshotTransformer = transformer; - return this; - } - public Builder setPreviousFileLocation(String previousFileLocation) { this.previousFileLocation = previousFileLocation; return this; @@ -1553,8 +1536,7 @@ private boolean hasChanges() { || (discardChanges && !changes.isEmpty()) || metadataLocation != null || suppressHistoricalSnapshots - || null != snapshotsSupplier - || !snapshotTransformer.equals(Function.identity()); + || null != snapshotsSupplier; } public TableMetadata build() { @@ -1618,8 +1600,7 @@ public TableMetadata build() { .collect(Collectors.toList()), nextRowId, encryptionKeys, - discardChanges ? ImmutableList.of() : ImmutableList.copyOf(changes), - snapshotTransformer); + discardChanges ? ImmutableList.of() : ImmutableList.copyOf(changes)); } private int addSchemaInternal(Schema schema, int newLastColumnId) { diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index cdd9becf3c48..eeeeeab8a699 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -580,8 +580,7 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { partitionStatisticsFiles, lastRowId, keys, - ImmutableList.of() /* no changes from the file */, - null /* no snapshot transformer */); + ImmutableList.of() /* no changes from the file */); } private static Map refsFromJson(JsonNode refMap) { diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataProjection.java b/core/src/main/java/org/apache/iceberg/TableMetadataProjection.java new file mode 100644 index 000000000000..8c3216def916 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/TableMetadataProjection.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; + +/** + * A projection wrapper for {@link TableMetadata} that transforms snapshots on-demand. + * + *

This projection follows the same pattern as StructProjection, applying transformations lazily + * when snapshot-related methods are accessed. Unlike embedding transformation logic in + * TableMetadata itself, this keeps the transformation concerns separate. + * + *

The projection is lightweight - it extends TableMetadata and overrides only snapshot-related + * methods, allowing all other metadata (schemas, specs, properties, etc.) to be inherited + * naturally. + * + *

Example use cases: + * + *

    + *
  • Filtering sensitive data from snapshot summaries before exposing metadata + *
  • Enriching snapshots with additional computed properties + *
  • Testing scenarios requiring modified snapshot metadata without persisting changes + *
+ * + *

Example usage: + * + *

+ * // Remove sensitive data from snapshot summaries
+ * TableMetadata cleaned = TableMetadataProjection.create(
+ *     originalMetadata,
+ *     snapshot -> {
+ *       if (snapshot instanceof BaseSnapshot) {
+ *         BaseSnapshot base = (BaseSnapshot) snapshot;
+ *         Map<String, String> filteredSummary = filterSensitiveKeys(base.summary());
+ *         return new BaseSnapshot(
+ *             base.sequenceNumber(), base.snapshotId(), base.parentId(),
+ *             base.timestampMillis(), base.operation(), filteredSummary,
+ *             base.schemaId(), base.manifestListLocation(),
+ *             base.firstRowId(), base.addedRows(), base.keyId());
+ *       }
+ *       return snapshot;
+ *     });
+ * 
+ */ +public class TableMetadataProjection extends TableMetadata { + + /** + * Creates a projecting wrapper for {@link TableMetadata} that transforms snapshots. + * + * @param metadata the base table metadata to wrap + * @param snapshotTransformer function to transform each snapshot + * @return a projection that applies the transformer to all snapshots + */ + public static TableMetadata create( + TableMetadata metadata, Function snapshotTransformer) { + Preconditions.checkArgument(metadata != null, "Metadata cannot be null"); + Preconditions.checkArgument(snapshotTransformer != null, "Snapshot transformer cannot be null"); + return new TableMetadataProjection(metadata, snapshotTransformer); + } + + private final Function snapshotTransformer; + private volatile List transformedSnapshots; + private volatile Map transformedSnapshotsById; + private volatile Map transformedRefs; + + private TableMetadataProjection( + TableMetadata base, Function snapshotTransformer) { + // Call super constructor with all fields from base metadata + // Pass null for snapshots since we'll provide them via override + super( + base.metadataFileLocation(), + base.formatVersion(), + base.uuid(), + base.location(), + base.lastSequenceNumber(), + base.lastUpdatedMillis(), + base.lastColumnId(), + base.currentSchemaId(), + base.schemas(), + base.defaultSpecId(), + base.specs(), + base.lastAssignedPartitionId(), + base.defaultSortOrderId(), + base.sortOrders(), + base.properties(), + base.currentSnapshotId(), // Get raw field value without triggering loading + null, // snapshots will be provided via override + base::snapshots, // lazy supplier to trigger transformation + base.snapshotLog(), + base.previousFiles(), + base.refs(), + base.statisticsFiles(), + base.partitionStatisticsFiles(), + base.nextRowId(), + base.encryptionKeys(), + base.changes()); + + this.snapshotTransformer = snapshotTransformer; + } + + @Override + public List snapshots() { + if (transformedSnapshots == null) { + synchronized (this) { + if (transformedSnapshots == null) { + // Trigger lazy loading via super, which will call our supplier + List baseSnapshots = super.snapshots(); + transformedSnapshots = + baseSnapshots.stream() + .map(snapshotTransformer) + .collect(ImmutableList.toImmutableList()); + } + } + } + return transformedSnapshots; + } + + @Override + public Snapshot snapshot(long snapshotId) { + // Build index lazily and look up transformed snapshot + if (transformedSnapshotsById == null) { + synchronized (this) { + if (transformedSnapshotsById == null) { + transformedSnapshotsById = + snapshots().stream() + .collect(ImmutableMap.toImmutableMap(Snapshot::snapshotId, s -> s)); + } + } + } + return transformedSnapshotsById.get(snapshotId); + } + + @Override + public Snapshot currentSnapshot() { + long currentId = currentSnapshotId(); + if (currentId == -1) { + return null; + } + + // Trigger loading if needed and return transformed snapshot + return snapshot(currentId); + } + + @Override + public Map refs() { + if (transformedRefs == null) { + synchronized (this) { + if (transformedRefs == null) { + // Rebuild refs to point to transformed snapshots + // Filter out any refs that point to snapshots that no longer exist after transformation + Map transformedById = transformedSnapshotsById(); + transformedRefs = + super.refs().entrySet().stream() + .filter(entry -> transformedById.containsKey(entry.getValue().snapshotId())) + .collect(ImmutableMap.toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); + } + } + } + return transformedRefs; + } + + @Override + public SnapshotRef ref(String name) { + return refs().get(name); + } + + private Map transformedSnapshotsById() { + if (transformedSnapshotsById == null) { + synchronized (this) { + if (transformedSnapshotsById == null) { + transformedSnapshotsById = + snapshots().stream() + .collect(ImmutableMap.toImmutableMap(Snapshot::snapshotId, s -> s)); + } + } + } + return transformedSnapshotsById; + } + + // All other methods (schemas, specs, properties, statistics, etc.) are + // automatically inherited from TableMetadata without any delegation needed +} diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 36af2925bcd9..345f506fa978 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -202,8 +202,7 @@ public void testJsonConversion() throws Exception { partitionStatisticsFiles, 40, ImmutableList.of(), - ImmutableList.of(), - null); + ImmutableList.of()); String asJson = TableMetadataParser.toJson(expected); TableMetadata metadata = TableMetadataParser.fromJson(asJson); @@ -308,8 +307,7 @@ public void testBackwardCompat() throws Exception { ImmutableList.of(), 0, ImmutableList.of(), - ImmutableList.of(), - null); + ImmutableList.of()); String asJson = toJsonWithoutSpecAndSchemaList(expected); TableMetadata metadata = TableMetadataParser.fromJson(asJson); @@ -433,8 +431,7 @@ public void testInvalidMainBranch() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null)) + ImmutableList.of())) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Current snapshot ID does not match main branch"); } @@ -482,8 +479,7 @@ public void testMainWithoutCurrent() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null)) + ImmutableList.of())) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Current snapshot is not set, but main branch exists"); } @@ -525,8 +521,7 @@ public void testBranchSnapshotMissing() { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null)) + ImmutableList.of())) .isInstanceOf(IllegalArgumentException.class) .hasMessageEndingWith("does not exist in the existing snapshots list"); } @@ -645,8 +640,7 @@ public void testJsonWithPreviousMetadataLog() throws Exception { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null); + ImmutableList.of()); String asJson = TableMetadataParser.toJson(base); TableMetadata metadataFromJson = TableMetadataParser.fromJson(asJson); @@ -737,8 +731,7 @@ public void testAddPreviousMetadataRemoveNone() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null); + ImmutableList.of()); previousMetadataLog.add(latestPreviousMetadata); @@ -844,8 +837,7 @@ public void testAddPreviousMetadataRemoveOne() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null); + ImmutableList.of()); previousMetadataLog.add(latestPreviousMetadata); @@ -955,8 +947,7 @@ public void testAddPreviousMetadataRemoveMultiple() throws IOException { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null); + ImmutableList.of()); previousMetadataLog.add(latestPreviousMetadata); @@ -1004,8 +995,7 @@ public void testV2UUIDValidation() { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null)) + ImmutableList.of())) .isInstanceOf(IllegalArgumentException.class) .hasMessage("UUID is required in format v2"); } @@ -1042,8 +1032,7 @@ public void testVersionValidation() { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null)) + ImmutableList.of())) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Unsupported format version: v%s (supported: v%s)", @@ -1091,8 +1080,7 @@ public void testVersionValidation() { ImmutableList.of(), 0L, ImmutableList.of(), - ImmutableList.of(), - null)) + ImmutableList.of())) .isNotNull(); assertThat( @@ -1979,218 +1967,4 @@ public void testMetadataWithRemoveSchemas() { assertThat(meta.changes()).anyMatch(u -> u instanceof MetadataUpdate.RemoveSchemas); } - - @Test - public void testTransformSnapshotsWithoutLazyLoading() { - long snapshotId1 = System.currentTimeMillis(); - Map summary1 = - ImmutableMap.of( - "total-records", "100", - "total-files", "10", - "operation", "append"); - - Snapshot snapshot1 = - new BaseSnapshot( - 0, - snapshotId1, - null, - snapshotId1, - "append", - summary1, - TEST_SCHEMA.schemaId(), - "file:/tmp/manifest1.avro", - null, - null, - null); - - long snapshotId2 = snapshotId1 + 1; - Map summary2 = - ImmutableMap.of( - "total-records", "200", - "total-files", "20", - "operation", "overwrite"); - - Snapshot snapshot2 = - new BaseSnapshot( - 1, - snapshotId2, - snapshotId1, - snapshotId2, - "overwrite", - summary2, - TEST_SCHEMA.schemaId(), - "file:/tmp/manifest2.avro", - null, - null, - null); - - TableMetadata base = - new TableMetadata( - null, - 2, - UUID.randomUUID().toString(), - TEST_LOCATION, - 1, - System.currentTimeMillis(), - LAST_ASSIGNED_COLUMN_ID, - TEST_SCHEMA.schemaId(), - ImmutableList.of(TEST_SCHEMA), - SPEC_5.specId(), - ImmutableList.of(SPEC_5), - SPEC_5.lastAssignedFieldId(), - SORT_ORDER_3.orderId(), - ImmutableList.of(SORT_ORDER_3), - ImmutableMap.of(), - snapshotId2, - ImmutableList.of(snapshot1, snapshot2), - null, - ImmutableList.of(), - ImmutableList.of(), - ImmutableMap.of( - SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId2).build()), - ImmutableList.of(), - ImmutableList.of(), - 0L, - ImmutableList.of(), - ImmutableList.of(), - null); - - assertThat(base.snapshots()).hasSize(2); - assertThat(base.snapshots().get(0).summary()).isEqualTo(summary1); - assertThat(base.snapshots().get(1).summary()).isEqualTo(summary2); - - TableMetadata transformed = - base.transformSnapshots( - snapshot -> { - if (snapshot instanceof BaseSnapshot) { - BaseSnapshot tSnapshot = (BaseSnapshot) snapshot; - return new BaseSnapshot( - tSnapshot.sequenceNumber(), - tSnapshot.snapshotId(), - tSnapshot.parentId(), - tSnapshot.timestampMillis(), - tSnapshot.operation(), - ImmutableMap.of(), - tSnapshot.schemaId(), - tSnapshot.manifestListLocation(), - tSnapshot.firstRowId(), - tSnapshot.addedRows(), - tSnapshot.keyId()); - } - return snapshot; - }); - - assertThat(transformed.snapshots()).hasSize(2); - assertThat(transformed.snapshots().get(0).summary()).isEmpty(); - assertThat(transformed.snapshots().get(1).summary()).isEmpty(); - assertThat(transformed.snapshots().get(0).snapshotId()).isEqualTo(snapshotId1); - assertThat(transformed.snapshots().get(1).snapshotId()).isEqualTo(snapshotId2); - assertThat(transformed.snapshots().get(0).operation()).isEqualTo("append"); - assertThat(transformed.snapshots().get(1).operation()).isEqualTo("overwrite"); - } - - @Test - public void testTransformSnapshotsWithLazyLoading() { - long snapshotId1 = System.currentTimeMillis(); - Map summary1 = - ImmutableMap.of( - "total-records", "100", - "total-files", "10", - "operation", "append"); - - Snapshot snapshot1 = - new BaseSnapshot( - 0, - snapshotId1, - null, - snapshotId1, - "append", - summary1, - TEST_SCHEMA.schemaId(), - "file:/tmp/manifest1.avro", - null, - null, - null); - - long snapshotId2 = snapshotId1 + 1; - Map summary2 = - ImmutableMap.of( - "total-records", "200", - "total-files", "20", - "operation", "overwrite"); - - Snapshot snapshot2 = - new BaseSnapshot( - 1, - snapshotId2, - snapshotId1, - snapshotId2, - "overwrite", - summary2, - TEST_SCHEMA.schemaId(), - "file:/tmp/manifest2.avro", - null, - null, - null); - - TableMetadata base = - new TableMetadata( - null, - 2, - UUID.randomUUID().toString(), - TEST_LOCATION, - 1, - System.currentTimeMillis(), - LAST_ASSIGNED_COLUMN_ID, - TEST_SCHEMA.schemaId(), - ImmutableList.of(TEST_SCHEMA), - SPEC_5.specId(), - ImmutableList.of(SPEC_5), - SPEC_5.lastAssignedFieldId(), - SORT_ORDER_3.orderId(), - ImmutableList.of(SORT_ORDER_3), - ImmutableMap.of(), - snapshotId2, - null, - () -> ImmutableList.of(snapshot1, snapshot2), - ImmutableList.of(), - ImmutableList.of(), - ImmutableMap.of( - SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId2).build()), - ImmutableList.of(), - ImmutableList.of(), - 0L, - ImmutableList.of(), - ImmutableList.of(), - null); - - TableMetadata transformed = - base.transformSnapshots( - snapshot -> { - if (snapshot instanceof BaseSnapshot) { - BaseSnapshot tSnapshot = (BaseSnapshot) snapshot; - return new BaseSnapshot( - tSnapshot.sequenceNumber(), - tSnapshot.snapshotId(), - tSnapshot.parentId(), - tSnapshot.timestampMillis(), - tSnapshot.operation(), - ImmutableMap.of(), - tSnapshot.schemaId(), - tSnapshot.manifestListLocation(), - tSnapshot.firstRowId(), - tSnapshot.addedRows(), - tSnapshot.keyId()); - } - return snapshot; - }); - - assertThat(transformed.snapshots()).hasSize(2); - assertThat(transformed.snapshots().get(0).summary()).isEmpty(); - assertThat(transformed.snapshots().get(1).summary()).isEmpty(); - assertThat(transformed.snapshots().get(0).snapshotId()).isEqualTo(snapshotId1); - assertThat(transformed.snapshots().get(1).snapshotId()).isEqualTo(snapshotId2); - assertThat(transformed.snapshots().get(0).operation()).isEqualTo("append"); - assertThat(transformed.snapshots().get(1).operation()).isEqualTo("overwrite"); - } } diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadataProjection.java b/core/src/test/java/org/apache/iceberg/TestTableMetadataProjection.java new file mode 100644 index 000000000000..f0619591c15c --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadataProjection.java @@ -0,0 +1,528 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Map; +import java.util.UUID; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestTableMetadataProjection { + + private static final Schema TEST_SCHEMA = + new Schema( + Types.NestedField.required(1, "x", Types.LongType.get()), + Types.NestedField.required(2, "y", Types.LongType.get()), + Types.NestedField.required(3, "z", Types.LongType.get())); + + private static final PartitionSpec SPEC_5 = + PartitionSpec.builderFor(TEST_SCHEMA).withSpecId(5).build(); + + private static final SortOrder SORT_ORDER_3 = + SortOrder.builderFor(TEST_SCHEMA).withOrderId(3).asc("y", NullOrder.NULLS_FIRST).build(); + + private static final String TEST_LOCATION = "s3://bucket/test/location"; + private static final int LAST_ASSIGNED_COLUMN_ID = 3; + + @Test + public void testTransformSnapshotsWithoutLazyLoading() { + long snapshotId1 = System.currentTimeMillis(); + Map summary1 = + ImmutableMap.of( + "total-records", "100", + "total-files", "10", + "operation", "append"); + + Snapshot snapshot1 = + new BaseSnapshot( + 0, + snapshotId1, + null, + snapshotId1, + "append", + summary1, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest1.avro", + null, + null, + null); + + long snapshotId2 = snapshotId1 + 1; + Map summary2 = + ImmutableMap.of( + "total-records", "200", + "total-files", "20", + "operation", "overwrite"); + + Snapshot snapshot2 = + new BaseSnapshot( + 1, + snapshotId2, + snapshotId1, + snapshotId2, + "overwrite", + summary2, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest2.avro", + null, + null, + null); + + TableMetadata base = + new TableMetadata( + null, + 2, + UUID.randomUUID().toString(), + TEST_LOCATION, + 1, + System.currentTimeMillis(), + LAST_ASSIGNED_COLUMN_ID, + TEST_SCHEMA.schemaId(), + ImmutableList.of(TEST_SCHEMA), + SPEC_5.specId(), + ImmutableList.of(SPEC_5), + SPEC_5.lastAssignedFieldId(), + SORT_ORDER_3.orderId(), + ImmutableList.of(SORT_ORDER_3), + ImmutableMap.of(), + snapshotId2, + ImmutableList.of(snapshot1, snapshot2), + null, + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of( + SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId2).build()), + ImmutableList.of(), + ImmutableList.of(), + 0L, + ImmutableList.of(), + ImmutableList.of()); + + assertThat(base.snapshots()).hasSize(2); + assertThat(base.snapshots().get(0).summary()).isEqualTo(summary1); + assertThat(base.snapshots().get(1).summary()).isEqualTo(summary2); + + // Create projection that clears snapshot summaries + TableMetadata projected = + TableMetadataProjection.create( + base, + snapshot -> { + if (snapshot instanceof BaseSnapshot) { + BaseSnapshot tSnapshot = (BaseSnapshot) snapshot; + return new BaseSnapshot( + tSnapshot.sequenceNumber(), + tSnapshot.snapshotId(), + tSnapshot.parentId(), + tSnapshot.timestampMillis(), + tSnapshot.operation(), + ImmutableMap.of(), // Clear summary + tSnapshot.schemaId(), + tSnapshot.manifestListLocation(), + tSnapshot.firstRowId(), + tSnapshot.addedRows(), + tSnapshot.keyId()); + } + return snapshot; + }); + + // Verify transformed snapshots + assertThat(projected.snapshots()).hasSize(2); + assertThat(projected.snapshots().get(0).summary()).isEmpty(); + assertThat(projected.snapshots().get(1).summary()).isEmpty(); + assertThat(projected.snapshots().get(0).snapshotId()).isEqualTo(snapshotId1); + assertThat(projected.snapshots().get(1).snapshotId()).isEqualTo(snapshotId2); + assertThat(projected.snapshots().get(0).operation()).isEqualTo("append"); + assertThat(projected.snapshots().get(1).operation()).isEqualTo("overwrite"); + + // Verify snapshot lookup by ID works + assertThat(projected.snapshot(snapshotId1)).isNotNull(); + assertThat(projected.snapshot(snapshotId1).summary()).isEmpty(); + assertThat(projected.snapshot(snapshotId2)).isNotNull(); + assertThat(projected.snapshot(snapshotId2).summary()).isEmpty(); + + // Verify current snapshot + assertThat(projected.currentSnapshot()).isNotNull(); + assertThat(projected.currentSnapshot().snapshotId()).isEqualTo(snapshotId2); + assertThat(projected.currentSnapshot().summary()).isEmpty(); + + // Verify refs still work + assertThat(projected.refs()).hasSize(1); + assertThat(projected.ref(SnapshotRef.MAIN_BRANCH)).isNotNull(); + assertThat(projected.ref(SnapshotRef.MAIN_BRANCH).snapshotId()).isEqualTo(snapshotId2); + } + + @Test + public void testTransformSnapshotsWithLazyLoading() { + long snapshotId1 = System.currentTimeMillis(); + Map summary1 = + ImmutableMap.of( + "total-records", "100", + "total-files", "10", + "operation", "append"); + + Snapshot snapshot1 = + new BaseSnapshot( + 0, + snapshotId1, + null, + snapshotId1, + "append", + summary1, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest1.avro", + null, + null, + null); + + long snapshotId2 = snapshotId1 + 1; + Map summary2 = + ImmutableMap.of( + "total-records", "200", + "total-files", "20", + "operation", "overwrite"); + + Snapshot snapshot2 = + new BaseSnapshot( + 1, + snapshotId2, + snapshotId1, + snapshotId2, + "overwrite", + summary2, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest2.avro", + null, + null, + null); + + // Create TableMetadata with lazy snapshot loading + TableMetadata base = + new TableMetadata( + null, + 2, + UUID.randomUUID().toString(), + TEST_LOCATION, + 1, + System.currentTimeMillis(), + LAST_ASSIGNED_COLUMN_ID, + TEST_SCHEMA.schemaId(), + ImmutableList.of(TEST_SCHEMA), + SPEC_5.specId(), + ImmutableList.of(SPEC_5), + SPEC_5.lastAssignedFieldId(), + SORT_ORDER_3.orderId(), + ImmutableList.of(SORT_ORDER_3), + ImmutableMap.of(), + snapshotId2, + null, // No snapshots provided directly + () -> ImmutableList.of(snapshot1, snapshot2), // Lazy supplier + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of( + SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId2).build()), + ImmutableList.of(), + ImmutableList.of(), + 0L, + ImmutableList.of(), + ImmutableList.of()); + + // Create projection + TableMetadata projected = + TableMetadataProjection.create( + base, + snapshot -> { + if (snapshot instanceof BaseSnapshot) { + BaseSnapshot tSnapshot = (BaseSnapshot) snapshot; + return new BaseSnapshot( + tSnapshot.sequenceNumber(), + tSnapshot.snapshotId(), + tSnapshot.parentId(), + tSnapshot.timestampMillis(), + tSnapshot.operation(), + ImmutableMap.of(), // Clear summary + tSnapshot.schemaId(), + tSnapshot.manifestListLocation(), + tSnapshot.firstRowId(), + tSnapshot.addedRows(), + tSnapshot.keyId()); + } + return snapshot; + }); + + // IMPORTANT: Verify that currentSnapshotId() works WITHOUT triggering lazy loading + assertThat(projected.currentSnapshotId()).isEqualTo(snapshotId2); + + // Verify that currentSnapshot() triggers loading and returns transformed snapshot + Snapshot current = projected.currentSnapshot(); + assertThat(current).isNotNull(); + assertThat(current.snapshotId()).isEqualTo(snapshotId2); + assertThat(current.summary()).isEmpty(); // Should be transformed (cleared) + + // Now verify all snapshots are accessible and transformed + assertThat(projected.snapshots()).hasSize(2); + assertThat(projected.snapshots().get(0).summary()).isEmpty(); + assertThat(projected.snapshots().get(1).summary()).isEmpty(); + assertThat(projected.snapshots().get(0).snapshotId()).isEqualTo(snapshotId1); + assertThat(projected.snapshots().get(1).snapshotId()).isEqualTo(snapshotId2); + assertThat(projected.snapshots().get(0).operation()).isEqualTo("append"); + assertThat(projected.snapshots().get(1).operation()).isEqualTo("overwrite"); + } + + @Test + public void testNonSnapshotMethodsAreInherited() { + long snapshotId = System.currentTimeMillis(); + Snapshot snapshot = + new BaseSnapshot( + 0, + snapshotId, + null, + snapshotId, + "append", + ImmutableMap.of(), + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest.avro", + null, + null, + null); + + Map properties = ImmutableMap.of("key1", "value1", "key2", "value2"); + + TableMetadata base = + new TableMetadata( + null, + 2, + UUID.randomUUID().toString(), + TEST_LOCATION, + 1, + System.currentTimeMillis(), + LAST_ASSIGNED_COLUMN_ID, + TEST_SCHEMA.schemaId(), + ImmutableList.of(TEST_SCHEMA), + SPEC_5.specId(), + ImmutableList.of(SPEC_5), + SPEC_5.lastAssignedFieldId(), + SORT_ORDER_3.orderId(), + ImmutableList.of(SORT_ORDER_3), + properties, + snapshotId, + ImmutableList.of(snapshot), + null, + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of(SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId).build()), + ImmutableList.of(), + ImmutableList.of(), + 0L, + ImmutableList.of(), + ImmutableList.of()); + + TableMetadata projected = + TableMetadataProjection.create(base, s -> s); // Identity transformation + + // Verify all non-snapshot methods are properly inherited + assertThat(projected.formatVersion()).isEqualTo(base.formatVersion()); + assertThat(projected.uuid()).isEqualTo(base.uuid()); + assertThat(projected.location()).isEqualTo(base.location()); + assertThat(projected.lastSequenceNumber()).isEqualTo(base.lastSequenceNumber()); + assertThat(projected.lastUpdatedMillis()).isEqualTo(base.lastUpdatedMillis()); + assertThat(projected.lastColumnId()).isEqualTo(base.lastColumnId()); + + // Schema methods + assertThat(projected.schema()).isEqualTo(base.schema()); + assertThat(projected.schemas()).isEqualTo(base.schemas()); + assertThat(projected.currentSchemaId()).isEqualTo(base.currentSchemaId()); + + // Spec methods + assertThat(projected.spec()).isEqualTo(base.spec()); + assertThat(projected.specs()).isEqualTo(base.specs()); + assertThat(projected.defaultSpecId()).isEqualTo(base.defaultSpecId()); + + // Sort order methods + assertThat(projected.sortOrder()).isEqualTo(base.sortOrder()); + assertThat(projected.sortOrders()).isEqualTo(base.sortOrders()); + assertThat(projected.defaultSortOrderId()).isEqualTo(base.defaultSortOrderId()); + + // Properties + assertThat(projected.properties()).isEqualTo(base.properties()); + assertThat(projected.property("key1", "default")).isEqualTo("value1"); + + // Other metadata + assertThat(projected.snapshotLog()).isEqualTo(base.snapshotLog()); + assertThat(projected.previousFiles()).isEqualTo(base.previousFiles()); + assertThat(projected.statisticsFiles()).isEqualTo(base.statisticsFiles()); + assertThat(projected.partitionStatisticsFiles()).isEqualTo(base.partitionStatisticsFiles()); + } + + @Test + public void testSerializationAndDeserialization() { + long snapshotId = System.currentTimeMillis(); + Map summary = ImmutableMap.of("operation", "append", "records", "100"); + + Snapshot snapshot = + new BaseSnapshot( + 0, + snapshotId, + null, + snapshotId, + "append", + summary, + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest.avro", + null, + null, + null); + + TableMetadata base = + new TableMetadata( + null, + 2, + UUID.randomUUID().toString(), + TEST_LOCATION, + 1, + System.currentTimeMillis(), + LAST_ASSIGNED_COLUMN_ID, + TEST_SCHEMA.schemaId(), + ImmutableList.of(TEST_SCHEMA), + SPEC_5.specId(), + ImmutableList.of(SPEC_5), + SPEC_5.lastAssignedFieldId(), + SORT_ORDER_3.orderId(), + ImmutableList.of(SORT_ORDER_3), + ImmutableMap.of(), + snapshotId, + ImmutableList.of(snapshot), + null, + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of(SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId).build()), + ImmutableList.of(), + ImmutableList.of(), + 0L, + ImmutableList.of(), + ImmutableList.of()); + + // Create projection that modifies snapshot + TableMetadata projected = + TableMetadataProjection.create( + base, + s -> { + if (s instanceof BaseSnapshot) { + BaseSnapshot bs = (BaseSnapshot) s; + return new BaseSnapshot( + bs.sequenceNumber(), + bs.snapshotId(), + bs.parentId(), + bs.timestampMillis(), + bs.operation(), + ImmutableMap.of("filtered", "true"), // Modified summary + bs.schemaId(), + bs.manifestListLocation(), + bs.firstRowId(), + bs.addedRows(), + bs.keyId()); + } + return s; + }); + + // Serialize the PROJECTED metadata (with transformed snapshots) + String projectedJson = TableMetadataParser.toJson(projected); + + // Deserialize back + TableMetadata deserialized = TableMetadataParser.fromJson(projectedJson); + + // The deserialized version should have the TRANSFORMED snapshot data + // (not the original), because we serialized the projection + assertThat(deserialized.snapshots()).hasSize(1); + assertThat(deserialized.snapshots().get(0).snapshotId()).isEqualTo(snapshotId); + assertThat(deserialized.snapshots().get(0).summary()) + .containsEntry("filtered", "true") + .doesNotContainKey("records"); // Original key should be gone + + // But the base metadata should still have original data + assertThat(base.snapshots().get(0).summary()) + .containsEntry("records", "100") + .doesNotContainKey("filtered"); + + // Verify other metadata is preserved correctly + assertThat(deserialized.uuid()).isEqualTo(base.uuid()); + assertThat(deserialized.location()).isEqualTo(base.location()); + assertThat(deserialized.schema().asStruct()).isEqualTo(base.schema().asStruct()); + } + + @Test + public void testProjectionIsTableMetadata() { + long snapshotId = System.currentTimeMillis(); + Snapshot snapshot = + new BaseSnapshot( + 0, + snapshotId, + null, + snapshotId, + "append", + ImmutableMap.of(), + TEST_SCHEMA.schemaId(), + "file:/tmp/manifest.avro", + null, + null, + null); + + TableMetadata base = + new TableMetadata( + null, + 2, + UUID.randomUUID().toString(), + TEST_LOCATION, + 1, + System.currentTimeMillis(), + LAST_ASSIGNED_COLUMN_ID, + TEST_SCHEMA.schemaId(), + ImmutableList.of(TEST_SCHEMA), + SPEC_5.specId(), + ImmutableList.of(SPEC_5), + SPEC_5.lastAssignedFieldId(), + SORT_ORDER_3.orderId(), + ImmutableList.of(SORT_ORDER_3), + ImmutableMap.of(), + snapshotId, + ImmutableList.of(snapshot), + null, + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of(SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(snapshotId).build()), + ImmutableList.of(), + ImmutableList.of(), + 0L, + ImmutableList.of(), + ImmutableList.of()); + + TableMetadata projected = TableMetadataProjection.create(base, s -> s); + + // Verify projection is-a TableMetadata + assertThat(projected).isInstanceOf(TableMetadata.class); + + // Can be used anywhere TableMetadata is expected + TableMetadata metadata = projected; + assertThat(metadata.snapshots()).isNotNull(); + assertThat(metadata.currentSnapshot()).isNotNull(); + } +}