diff --git a/core/src/main/java/org/apache/iceberg/FastAppend.java b/core/src/main/java/org/apache/iceberg/FastAppend.java index 11459e0ecbe1..15855ddb8263 100644 --- a/core/src/main/java/org/apache/iceberg/FastAppend.java +++ b/core/src/main/java/org/apache/iceberg/FastAppend.java @@ -36,6 +36,7 @@ class FastAppend extends SnapshotProducer implements AppendFiles { private final String tableName; private final SnapshotSummary.Builder summaryBuilder = SnapshotSummary.builder(); + private final SnapshotSummary.Builder manifestsSummary = SnapshotSummary.builder(); private final Map newDataFilesBySpec = Maps.newHashMap(); private final List appendManifests = Lists.newArrayList(); private final List rewrittenAppendManifests = Lists.newArrayList(); @@ -166,6 +167,8 @@ public List apply(TableMetadata base, Snapshot snapshot) { manifests.addAll(snapshot.allManifests(ops().io())); } + summaryBuilder.merge(buildManifestCountSummary(manifestsSummary, manifests, 0)); + return manifests; } diff --git a/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java b/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java index 9b5dce446732..03ca1bde862c 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java +++ b/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java @@ -87,6 +87,8 @@ public String partition() { // cache filtered manifests to avoid extra work when commits fail. private final Map filteredManifests = Maps.newConcurrentMap(); + // count of manifests that were rewritten with different manifest entry status during filtering + private int replacedManifestsCount = 0; // tracking where files were deleted to validate retries quickly private final Map> filteredManifestToDeletedFiles = @@ -313,6 +315,18 @@ private Set deletedFiles(ManifestFile[] manifests) { return deletedFiles; } + /** + * Returns the count of manifests that were replaced (rewritten) during filtering. + * + *

A manifest is considered replaced when a new manifest was created to replace the original + * one (i.e., the original manifest != filtered manifest). + * + * @return the count of replaced manifests + */ + int replacedManifestsCount() { + return replacedManifestsCount; + } + /** * Deletes filtered manifests that were created by this class, but are not in the committed * manifest set. @@ -329,9 +343,10 @@ void cleanUncommitted(Set committed) { ManifestFile manifest = entry.getKey(); ManifestFile filtered = entry.getValue(); if (!committed.contains(filtered)) { - // only delete if the filtered copy was created + // only delete if the filtered copy was created (manifest was replaced) if (!manifest.equals(filtered)) { deleteFile(filtered.path()); + replacedManifestsCount--; } // remove the entry from the cache @@ -342,6 +357,7 @@ void cleanUncommitted(Set committed) { private void invalidateFilteredCache() { cleanUncommitted(SnapshotProducer.EMPTY_SET); + replacedManifestsCount = 0; } /** @@ -367,7 +383,9 @@ private ManifestFile filterManifest( // manifest without copying data. if a manifest does have a file to remove, this will break // out of the loop and move on to filtering the manifest. if (manifestHasDeletedFiles(evaluator, manifest, reader)) { - return filterManifestWithDeletedFiles(evaluator, manifest, reader); + ManifestFile filtered = filterManifestWithDeletedFiles(evaluator, manifest, reader); + replacedManifestsCount++; + return filtered; } else { filteredManifests.put(manifest, manifest); return manifest; diff --git a/core/src/main/java/org/apache/iceberg/ManifestMergeManager.java b/core/src/main/java/org/apache/iceberg/ManifestMergeManager.java index 94eb8a110709..266e51e4cd09 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestMergeManager.java +++ b/core/src/main/java/org/apache/iceberg/ManifestMergeManager.java @@ -45,6 +45,8 @@ abstract class ManifestMergeManager> { // cache merge results to reuse when retrying private final Map, ManifestFile> mergedManifests = Maps.newConcurrentMap(); + // count of manifests that were replaced (merged) during bin-packing + private int replacedManifestsCount = 0; private final Supplier workerPoolSupplier; @@ -86,6 +88,18 @@ Iterable mergeManifests(Iterable manifests) { return merged; } + /** + * Returns the count of manifests that were replaced (merged) during bin-packing. + * + *

When multiple manifests are merged into a single manifest, each of the original manifests is + * considered replaced. + * + * @return the count of replaced manifests + */ + int replacedManifestsCount() { + return replacedManifestsCount; + } + void cleanUncommitted(Set committed) { // iterate over a copy of entries to avoid concurrent modification List, ManifestFile>> entries = @@ -96,8 +110,10 @@ void cleanUncommitted(Set committed) { ManifestFile merged = entry.getValue(); if (!committed.contains(merged)) { deleteFile(merged.path()); - // remove the deleted file from the cache - mergedManifests.remove(entry.getKey()); + // remove the deleted file from the cache and update replaced count + List bin = entry.getKey(); + mergedManifests.remove(bin); + replacedManifestsCount -= bin.size(); } } } @@ -200,8 +216,9 @@ private ManifestFile createManifest(int specId, List bin) { ManifestFile manifest = writer.toManifestFile(); - // update the cache + // update the cache and track replaced manifests mergedManifests.put(bin, manifest); + replacedManifestsCount += bin.size(); return manifest; } diff --git a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java index 51d17fbdd0f2..84b679166e87 100644 --- a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java @@ -92,6 +92,7 @@ abstract class MergingSnapshotProducer extends SnapshotProducer { private final List rewrittenAppendManifests = Lists.newArrayList(); private final SnapshotSummary.Builder addedFilesSummary = SnapshotSummary.builder(); private final SnapshotSummary.Builder appendedManifestsSummary = SnapshotSummary.builder(); + private final SnapshotSummary.Builder manifestsSummary = SnapshotSummary.builder(); private Expression deleteExpression = Expressions.alwaysFalse(); // cache new data manifests after writing @@ -962,6 +963,21 @@ public List apply(TableMetadata base, Snapshot snapshot) { Iterables.addAll(manifests, mergeManager.mergeManifests(unmergedManifests)); Iterables.addAll(manifests, deleteMergeManager.mergeManifests(unmergedDeleteManifests)); + // update created/kept/replaced manifest count + // replaced manifests come from: + // 1. filterManager - manifests rewritten to remove deleted files + // 2. deleteFilterManager - delete manifests rewritten to remove deleted files + // 3. mergeManager - data manifests merged via bin-packing + // 4. deleteMergeManager - delete manifests merged via bin-packing + // Note: rewrittenAppendManifests are NEW manifests (copies), not replaced ones + int replacedManifestsCount = + filterManager.replacedManifestsCount() + + deleteFilterManager.replacedManifestsCount() + + mergeManager.replacedManifestsCount() + + deleteMergeManager.replacedManifestsCount(); + summaryBuilder.merge( + buildManifestCountSummary(manifestsSummary, manifests, replacedManifestsCount)); + return manifests; } diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index a8f28855ab9b..645b080d402d 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -645,6 +645,35 @@ protected boolean cleanupAfterCommit() { return true; } + /** + * Updates manifest count in the snapshot summary builder, including replaced manifests. + * + * @param summaryBuilder the summary builder to update + * @param manifests the list of manifests in the new snapshot + * @param replacedManifestsCount the count of manifests that were replaced (rewritten) + */ + protected SnapshotSummary.Builder buildManifestCountSummary( + SnapshotSummary.Builder summaryBuilder, + List manifests, + int replacedManifestsCount) { + int manifestsCreated = 0; + int manifestsKept = 0; + + for (ManifestFile manifest : manifests) { + if (manifest.snapshotId() == snapshotId()) { + manifestsCreated++; + } else { + manifestsKept++; + } + } + + summaryBuilder.set(SnapshotSummary.CREATED_MANIFESTS_COUNT, String.valueOf(manifestsCreated)); + summaryBuilder.set(SnapshotSummary.KEPT_MANIFESTS_COUNT, String.valueOf(manifestsKept)); + summaryBuilder.set( + SnapshotSummary.REPLACED_MANIFESTS_COUNT, String.valueOf(replacedManifestsCount)); + return summaryBuilder; + } + protected List writeDataManifests(Collection files, PartitionSpec spec) { return writeDataManifests(files, null /* inherit data seq */, spec); } diff --git a/core/src/test/java/org/apache/iceberg/TestCommitReporting.java b/core/src/test/java/org/apache/iceberg/TestCommitReporting.java index d17348a99cb8..48950b38b137 100644 --- a/core/src/test/java/org/apache/iceberg/TestCommitReporting.java +++ b/core/src/test/java/org/apache/iceberg/TestCommitReporting.java @@ -195,4 +195,68 @@ public void addAndDeleteManifests() { assertThat(metrics.manifestsReplaced().value()).isEqualTo(2L); assertThat(metrics.manifestEntriesProcessed().value()).isEqualTo(2L); } + + @TestTemplate + public void snapshotProducerManifestMetrics() { + String tableName = "snapshot-producer-manifest-metrics"; + Table table = + TestTables.create( + tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(), formatVersion, reporter); + + // Test FastAppend: first append - creates 1 manifest, keeps 0 + table.newFastAppend().appendFile(FILE_A).commit(); + + CommitReport report = reporter.lastCommitReport(); + assertThat(report).isNotNull(); + assertThat(report.operation()).isEqualTo("append"); + assertThat(report.snapshotId()).isEqualTo(1L); + + CommitMetricsResult metrics = report.commitMetrics(); + assertThat(metrics.manifestsCreated().value()).isEqualTo(1L); + assertThat(metrics.manifestsKept().value()).isEqualTo(0L); + + // Test FastAppend: second append - creates 1 new manifest, keeps 1 + table.newFastAppend().appendFile(FILE_B).commit(); + + report = reporter.lastCommitReport(); + metrics = report.commitMetrics(); + assertThat(metrics.manifestsCreated().value()).isEqualTo(1L); + assertThat(metrics.manifestsKept().value()).isEqualTo(1L); + + // Test MergeAppend: creates 1 new manifest, keeps 2 + table.newAppend().appendFile(FILE_C).commit(); + + report = reporter.lastCommitReport(); + assertThat(report.operation()).isEqualTo("append"); + metrics = report.commitMetrics(); + assertThat(metrics.manifestsCreated().value()).isEqualTo(1L); + assertThat(metrics.manifestsKept().value()).isEqualTo(2L); + + // Test RowDelta with delete file: creates 1 delete manifest, keeps 3 data manifests + table.newRowDelta().addDeletes(fileADeletes()).commit(); + + report = reporter.lastCommitReport(); + assertThat(report.operation()).isEqualTo("delete"); + metrics = report.commitMetrics(); + assertThat(metrics.manifestsCreated().value()).isEqualTo(1L); + assertThat(metrics.manifestsKept().value()).isEqualTo(3L); + + // Test RowDelta with data and delete: creates 2 manifests (1 data + 1 delete), keeps 4 + table.newRowDelta().addRows(FILE_D).addDeletes(fileBDeletes()).commit(); + + report = reporter.lastCommitReport(); + assertThat(report.operation()).isEqualTo("overwrite"); + metrics = report.commitMetrics(); + assertThat(metrics.manifestsCreated().value()).isEqualTo(2L); + assertThat(metrics.manifestsKept().value()).isEqualTo(4L); + + // Test Delete: creates 1 manifest (rewritten), keeps 5 + table.newDelete().deleteFile(FILE_C).commit(); + + report = reporter.lastCommitReport(); + assertThat(report.operation()).isEqualTo("delete"); + metrics = report.commitMetrics(); + assertThat(metrics.manifestsCreated().value()).isEqualTo(1L); + assertThat(metrics.manifestsKept().value()).isEqualTo(5L); + } } diff --git a/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java b/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java index ea0988155b1d..4ef91088398c 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java @@ -351,6 +351,62 @@ public void testDeleteFilesOnIndependentBranches() { statuses(Status.EXISTING, Status.DELETED, Status.DELETED)); } + @TestTemplate + public void testDeleteFilesWithManifestSnapshotSummary() { + // add both data files + Snapshot appendFile = + commit(table, table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B), branch); + + assertThat(appendFile.allManifests(FILE_IO)).hasSize(1); + assertThat(appendFile.summary()) + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); + + validateManifestEntries( + appendFile.allManifests(FILE_IO).get(0), + ids(appendFile.snapshotId(), appendFile.snapshotId()), + files(FILE_A, FILE_B), + statuses(Status.ADDED, Status.ADDED)); + + // delete the first data file by file reference + Snapshot deleteByFileReference = commit(table, table.newDelete().deleteFile(FILE_A), branch); + assertThat(deleteByFileReference.allManifests(FILE_IO)).hasSize(1); + assertThat(deleteByFileReference.summary()) + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); + validateManifestEntries( + deleteByFileReference.allManifests(FILE_IO).get(0), + ids(deleteByFileReference.snapshotId(), appendFile.snapshotId()), + files(FILE_A, FILE_B), + statuses(Status.DELETED, Status.EXISTING)); + + // unmatched delete by row filter + Snapshot unmatchedDelete = + commit(table, table.newDelete().deleteFromRowFilter(Expressions.alwaysFalse()), branch); + assertThat(unmatchedDelete.allManifests(FILE_IO)).hasSize(1); + assertThat(unmatchedDelete.summary()) + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "1"); + + // delete the second data file by using file path only + Snapshot deleteByFilePath = + commit(table, table.newDelete().deleteFile(FILE_B.location()), branch); + + assertThat(deleteByFilePath.allManifests(FILE_IO)).hasSize(1); + assertThat(deleteByFilePath.summary()) + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); + validateManifestEntries( + deleteByFilePath.allManifests(FILE_IO).get(0), + ids(deleteByFilePath.snapshotId()), + files(FILE_B), + statuses(Status.DELETED)); + } + @TestTemplate public void testDeleteWithCollision() { Schema schema = new Schema(Types.NestedField.required(0, "x", Types.StringType.get())); diff --git a/core/src/test/java/org/apache/iceberg/TestFastAppend.java b/core/src/test/java/org/apache/iceberg/TestFastAppend.java index 33153d8454c3..8f427525e214 100644 --- a/core/src/test/java/org/apache/iceberg/TestFastAppend.java +++ b/core/src/test/java/org/apache/iceberg/TestFastAppend.java @@ -170,7 +170,11 @@ public void testEmptyTableAppendManifest() throws IOException { } // validate that the metadata summary is correct when using appendManifest - assertThat(snap.summary()).containsEntry("added-data-files", "2"); + assertThat(snap.summary()) + .containsEntry("added-data-files", "2") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap.sequenceNumber()); V2Assert.assertEquals( @@ -214,6 +218,13 @@ public void testEmptyTableAppendFilesAndManifest() throws IOException { assertThat(snap.allManifests(FILE_IO).get(1).path()).isEqualTo(manifest.path()); } + // validate manifest metrics in the snapshot summary + // 2 manifests created: 1 for appendFile (FILE_C, FILE_D) + 1 for appendManifest + assertThat(snap.summary()) + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "2") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); + V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap.sequenceNumber()); V2Assert.assertEquals( "Last sequence number should be 1", 1, readMetadata().lastSequenceNumber()); @@ -459,7 +470,10 @@ public void testAppendManifestWithSnapshotIdInheritance() throws IOException { .containsEntry("added-data-files", "2") .containsEntry("added-records", "2") .containsEntry("total-data-files", "2") - .containsEntry("total-records", "2"); + .containsEntry("total-records", "2") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -551,7 +565,10 @@ public void testDefaultPartitionSummaries() { assertThat(table.currentSnapshot().summary()) .doesNotContainKey(SnapshotSummary.PARTITION_SUMMARY_PROP) - .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1"); + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -571,7 +588,10 @@ public void testIncludedPartitionSummaries() { .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1") .containsEntry( SnapshotSummary.CHANGED_PARTITION_PREFIX + "data_bucket=0", - "added-data-files=1,added-records=1,added-files-size=10"); + "added-data-files=1,added-records=1,added-files-size=10") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -588,7 +608,10 @@ public void testIncludedPartitionSummaryLimit() { assertThat(table.currentSnapshot().summary()) .doesNotContainKey(SnapshotSummary.PARTITION_SUMMARY_PROP) - .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2"); + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate diff --git a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java index 0759b0f13ad7..e07871e05605 100644 --- a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java +++ b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java @@ -285,7 +285,11 @@ public void testEmptyTableAppendManifest() throws IOException { statuses(Status.ADDED, Status.ADDED)); // validate that the metadata summary is correct when using appendManifest - assertThat(committedSnapshot.summary()).containsEntry("added-data-files", "2"); + assertThat(committedSnapshot.summary()) + .containsEntry("added-data-files", "2") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -514,6 +518,18 @@ public void testManifestMergeMinCount() throws IOException { ids(commitId1, commitId1), files(FILE_C, FILE_D), statuses(Status.ADDED, Status.ADDED)); + // 3 manifests appended, each ~5661-6397 bytes. With targetSizeBytes=15000: + // - Bin-packing creates 2 bins: [manifest] and [manifest2, manifest3] + // - Bin 1 (size=1): kept as-is, no merge + // - Bin 2 (size=2 >= minCountToMerge=2): merged into 1 new manifest + // Result: 2 created (1 kept + 1 merged), 2 replaced (from merged bin), 0 kept (first snapshot) + assertThat(snap1.summary()) + .as( + "2 manifests created: 1 from bin with single manifest, 1 from merging bin with 2 manifests") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "2") + .as("2 manifests replaced: the 2 manifests in the merged bin") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "2") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); // produce new manifests as the old ones could have been compacted manifest = writeManifestWithName("FILE_A_S2", FILE_A); @@ -560,7 +576,19 @@ public void testManifestMergeMinCount() throws IOException { statuses(Status.EXISTING, Status.EXISTING, Status.EXISTING)); // validate that the metadata summary is correct when using appendManifest - assertThat(snap2.summary()).containsEntry("added-data-files", "3"); + // snap2 has: 3 new manifests appended + 2 existing manifests from snap1 + // Bin-packing with targetSizeBytes=15000: + // - New manifests: [manifest] kept as-is, [manifest2, manifest3] merged (2 replaced) + // - Existing manifests from snap1: both merged into 1 (2 replaced) + // Result: 3 created (1 + 1 merged from new + 1 merged from existing), 4 replaced, 0 kept + assertThat(snap2.summary()) + .containsEntry("added-data-files", "3") + .as("3 manifests created: 1 single new, 1 merged from 2 new, 1 merged from 2 existing") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "3") + .as( + "4 manifests replaced: 2 from merging new manifests + 2 from merging existing manifests") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "4") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -781,6 +809,13 @@ public void testMergeWithExistingManifestAfterDelete() { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); + // The delete operation rewrites the original manifest to mark FILE_A as deleted + // This should result in 1 replaced manifest, 1 created manifest, and 0 kept manifests + assertThat(deleteSnapshot.summary()) + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); + long deleteId = latestSnapshot(table, branch).snapshotId(); assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(1); ManifestFile deleteManifest = deleteSnapshot.allManifests(table.io()).get(0); @@ -1485,7 +1520,10 @@ public void testDefaultPartitionSummaries() { assertThat(table.currentSnapshot().summary()) .doesNotContainKey(SnapshotSummary.PARTITION_SUMMARY_PROP) - .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1"); + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -1505,7 +1543,10 @@ public void testIncludedPartitionSummaries() { .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1") .containsEntry( SnapshotSummary.CHANGED_PARTITION_PREFIX + "data_bucket=0", - "added-data-files=1,added-records=1,added-files-size=10"); + "added-data-files=1,added-records=1,added-files-size=10") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -1522,6 +1563,9 @@ public void testIncludedPartitionSummaryLimit() { assertThat(table.currentSnapshot().summary()) .doesNotContainKey(SnapshotSummary.PARTITION_SUMMARY_PROP) - .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2"); + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } } diff --git a/core/src/test/java/org/apache/iceberg/TestRowDelta.java b/core/src/test/java/org/apache/iceberg/TestRowDelta.java index 4eff3a400f72..0358ef6a455b 100644 --- a/core/src/test/java/org/apache/iceberg/TestRowDelta.java +++ b/core/src/test/java/org/apache/iceberg/TestRowDelta.java @@ -1070,6 +1070,11 @@ public void testAddDeleteFilesMultipleSpecs() { .containsEntry(TOTAL_DELETE_FILES_PROP, "3") .containsEntry(ADDED_POS_DELETES_PROP, String.valueOf(posDeletesCount)) .containsEntry(TOTAL_POS_DELETES_PROP, String.valueOf(posDeletesCount)) + // 4 manifests created: 1 data manifest + 3 delete manifests (one per partition spec) + // 3 manifests kept: 3 data manifests from previous appends + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "4") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "3") .hasEntrySatisfying( CHANGED_PARTITION_PREFIX + "data_bucket=0", v -> assertThat(v).contains(ADDED_DELETE_FILES_PROP + "=1")) diff --git a/core/src/test/java/org/apache/iceberg/TestSnapshotSummary.java b/core/src/test/java/org/apache/iceberg/TestSnapshotSummary.java index 1eee2d293ec0..61084b4e6749 100644 --- a/core/src/test/java/org/apache/iceberg/TestSnapshotSummary.java +++ b/core/src/test/java/org/apache/iceberg/TestSnapshotSummary.java @@ -101,7 +101,7 @@ public void fastAppendWithDuplicates() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(11) + .hasSize(14) .containsEntry(SnapshotSummary.ADDED_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_FILE_SIZE_PROP, "10") .containsEntry(SnapshotSummary.ADDED_RECORDS_PROP, "1") @@ -111,7 +111,10 @@ public void fastAppendWithDuplicates() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "10") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -126,7 +129,7 @@ public void mergeAppendWithDuplicates() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(11) + .hasSize(14) .containsEntry(SnapshotSummary.ADDED_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_FILE_SIZE_PROP, "10") .containsEntry(SnapshotSummary.ADDED_RECORDS_PROP, "1") @@ -136,7 +139,10 @@ public void mergeAppendWithDuplicates() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "10") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -155,7 +161,7 @@ public void overwriteWithDuplicates() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(14) + .hasSize(17) .containsEntry(SnapshotSummary.ADDED_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_FILE_SIZE_PROP, "10") .containsEntry(SnapshotSummary.ADDED_RECORDS_PROP, "1") @@ -168,7 +174,10 @@ public void overwriteWithDuplicates() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "10") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "2") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "1"); } @TestTemplate @@ -187,7 +196,7 @@ public void deleteWithDuplicates() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(11) + .hasSize(14) .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2") .containsEntry(SnapshotSummary.DELETED_FILES_PROP, "2") .containsEntry(SnapshotSummary.DELETED_RECORDS_PROP, "2") @@ -197,7 +206,10 @@ public void deleteWithDuplicates() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "0") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "0"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "0") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "1"); } @TestTemplate @@ -212,7 +224,7 @@ public void replacePartitionsWithDuplicates() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(12) + .hasSize(15) .containsEntry(SnapshotSummary.ADDED_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_FILE_SIZE_PROP, "10") .containsEntry(SnapshotSummary.ADDED_RECORDS_PROP, "1") @@ -223,7 +235,10 @@ public void replacePartitionsWithDuplicates() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "10") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -238,7 +253,7 @@ public void rowDeltaWithDuplicates() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(11) + .hasSize(14) .containsEntry(SnapshotSummary.ADDED_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_FILE_SIZE_PROP, "10") .containsEntry(SnapshotSummary.ADDED_RECORDS_PROP, "1") @@ -248,7 +263,10 @@ public void rowDeltaWithDuplicates() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "10") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -267,7 +285,7 @@ public void rowDeltaWithDeletesAndDuplicates() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(14) + .hasSize(17) .containsEntry(SnapshotSummary.ADDED_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_DELETE_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_FILE_SIZE_PROP, "20") // size of data + delete file @@ -280,7 +298,10 @@ public void rowDeltaWithDeletesAndDuplicates() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "1") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "20") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "2") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -300,7 +321,7 @@ public void rewriteWithDuplicateFiles() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(14) + .hasSize(17) .containsEntry(SnapshotSummary.ADDED_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_FILE_SIZE_PROP, "10") .containsEntry(SnapshotSummary.ADDED_RECORDS_PROP, "1") @@ -313,7 +334,10 @@ public void rewriteWithDuplicateFiles() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "10") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "2") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0"); } @TestTemplate @@ -334,7 +358,7 @@ public void rewriteWithDeletesAndDuplicates() { .commit(); assertThat(table.currentSnapshot().summary()) - .hasSize(16) + .hasSize(19) .containsEntry(SnapshotSummary.ADDED_DELETE_FILES_PROP, "1") .containsEntry(SnapshotSummary.ADDED_FILE_SIZE_PROP, "10") .containsEntry(SnapshotSummary.ADD_POS_DELETE_FILES_PROP, "1") @@ -349,7 +373,10 @@ public void rewriteWithDeletesAndDuplicates() { .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_POS_DELETES_PROP, "1") .containsEntry(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "20") - .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1"); + .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "2") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "1"); } @TestTemplate @@ -368,7 +395,7 @@ public void testFileSizeSummaryWithDVs() { long totalPosDeletes1 = dv1.recordCount() + dv2.recordCount(); long totalFileSize1 = dv1.contentSizeInBytes() + dv2.contentSizeInBytes(); assertThat(summary1) - .hasSize(12) + .hasSize(15) .doesNotContainKey(SnapshotSummary.ADD_POS_DELETE_FILES_PROP) .doesNotContainKey(SnapshotSummary.REMOVED_POS_DELETE_FILES_PROP) .containsEntry(SnapshotSummary.ADDED_DELETE_FILES_PROP, "1") @@ -385,7 +412,10 @@ public void testFileSizeSummaryWithDVs() { .containsEntry(SnapshotSummary.TOTAL_DATA_FILES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "0") - .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1"); + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "1") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "0"); DeleteFile dv3 = newDV(FILE_A); table @@ -404,7 +434,7 @@ public void testFileSizeSummaryWithDVs() { long totalPosDeletes2 = dv3.recordCount(); long totalFileSize2 = dv3.contentSizeInBytes(); assertThat(summary2) - .hasSize(16) + .hasSize(19) .doesNotContainKey(SnapshotSummary.ADD_POS_DELETE_FILES_PROP) .doesNotContainKey(SnapshotSummary.REMOVED_POS_DELETE_FILES_PROP) .containsEntry(SnapshotSummary.ADDED_DELETE_FILES_PROP, "1") @@ -421,7 +451,10 @@ public void testFileSizeSummaryWithDVs() { .containsEntry(SnapshotSummary.TOTAL_DATA_FILES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_EQ_DELETES_PROP, "0") .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "0") - .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2"); + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2") + .containsEntry(SnapshotSummary.CREATED_MANIFESTS_COUNT, "3") + .containsEntry(SnapshotSummary.KEPT_MANIFESTS_COUNT, "0") + .containsEntry(SnapshotSummary.REPLACED_MANIFESTS_COUNT, "2"); } @TestTemplate