From df314e9b4146aab85ce7dedb6d3646e69cfabb95 Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Fri, 2 Jan 2026 14:27:40 -0800 Subject: [PATCH 1/6] Fail publish_changes procedure if there's multiple matching snapshots --- .../TestPublishChangesProcedure.java | 19 ++++++++++ .../procedures/PublishChangesProcedure.java | 36 ++++++++++--------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java index 4958fde15d55..93a37956579c 100644 --- a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java +++ b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java @@ -159,6 +159,25 @@ public void testApplyInvalidWapId() { .hasMessage("Cannot apply unknown WAP ID 'not_valid'"); } + @TestTemplate + public void testApplyDuplicateWapId() { + + String wapId = "wap_id_1"; + + sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName); + sql("ALTER TABLE %s SET TBLPROPERTIES ('%s' 'true')", tableName, WRITE_AUDIT_PUBLISH_ENABLED); + + spark.conf().set("spark.wap.id", wapId); + + sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName); + sql("INSERT INTO TABLE %s VALUES (2, 'b')", tableName); + + assertThatThrownBy( + () -> sql("CALL %s.system.publish_changes('%s', '%s')", catalogName, tableIdent, wapId)) + .isInstanceOf(ValidationException.class) + .hasMessage("Cannot apply non-unique WAP ID. Found 2 snapshots with WAP ID 'wap_id_1'"); + } + @TestTemplate public void testInvalidApplyWapChangesCases() { assertThatThrownBy( diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java index 874888204334..788657609a2f 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java @@ -19,7 +19,6 @@ package org.apache.iceberg.spark.procedures; import java.util.Iterator; -import java.util.Optional; import org.apache.iceberg.Snapshot; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; @@ -97,23 +96,26 @@ public Iterator call(InternalRow args) { return modifyIcebergTable( tableIdent, table -> { - Optional wapSnapshot = - Optional.ofNullable( - Iterables.find( - table.snapshots(), - snapshot -> wapId.equals(WapUtil.stagedWapId(snapshot)), - null)); - if (!wapSnapshot.isPresent()) { - throw new ValidationException("Cannot apply unknown WAP ID '%s'", wapId); + Iterable wapSnapshots = + Iterables.filter( + table.snapshots(), snapshot -> wapId.equals(WapUtil.stagedWapId(snapshot))); + + int numMatchingSnapshots = Iterables.size(wapSnapshots); + + switch (numMatchingSnapshots) { + case 0: + throw new ValidationException("Cannot apply unknown WAP ID '%s'", wapId); + case 1: + long wapSnapshotId = Iterables.getOnlyElement(wapSnapshots).snapshotId(); + table.manageSnapshots().cherrypick(wapSnapshotId).commit(); + Snapshot currentSnapshot = table.currentSnapshot(); + InternalRow outputRow = newInternalRow(wapSnapshotId, currentSnapshot.snapshotId()); + return asScanIterator(OUTPUT_TYPE, outputRow); + default: + throw new ValidationException( + "Cannot apply non-unique WAP ID. Found %d snapshots with WAP ID '%s'", + numMatchingSnapshots, wapId); } - - long wapSnapshotId = wapSnapshot.get().snapshotId(); - table.manageSnapshots().cherrypick(wapSnapshotId).commit(); - - Snapshot currentSnapshot = table.currentSnapshot(); - - InternalRow outputRow = newInternalRow(wapSnapshotId, currentSnapshot.snapshotId()); - return asScanIterator(OUTPUT_TYPE, outputRow); }); } From 8acad3341658d2ed9cb8e29beba1aa6c6adcb266 Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Mon, 19 Jan 2026 11:18:35 -0800 Subject: [PATCH 2/6] rewrite publish_changes procedure to early-exit on duplicated wap.id --- .../TestPublishChangesProcedure.java | 2 +- .../procedures/PublishChangesProcedure.java | 40 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java index 93a37956579c..c7c442e29b16 100644 --- a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java +++ b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java @@ -175,7 +175,7 @@ public void testApplyDuplicateWapId() { assertThatThrownBy( () -> sql("CALL %s.system.publish_changes('%s', '%s')", catalogName, tableIdent, wapId)) .isInstanceOf(ValidationException.class) - .hasMessage("Cannot apply non-unique WAP ID. Found 2 snapshots with WAP ID 'wap_id_1'"); + .hasMessage("Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID 'wap_id_1'"); } @TestTemplate diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java index 788657609a2f..f052882854a3 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java @@ -21,7 +21,6 @@ import java.util.Iterator; import org.apache.iceberg.Snapshot; import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.spark.procedures.SparkProcedures.ProcedureBuilder; import org.apache.iceberg.util.WapUtil; import org.apache.spark.sql.catalyst.InternalRow; @@ -96,26 +95,27 @@ public Iterator call(InternalRow args) { return modifyIcebergTable( tableIdent, table -> { - Iterable wapSnapshots = - Iterables.filter( - table.snapshots(), snapshot -> wapId.equals(WapUtil.stagedWapId(snapshot))); - - int numMatchingSnapshots = Iterables.size(wapSnapshots); - - switch (numMatchingSnapshots) { - case 0: - throw new ValidationException("Cannot apply unknown WAP ID '%s'", wapId); - case 1: - long wapSnapshotId = Iterables.getOnlyElement(wapSnapshots).snapshotId(); - table.manageSnapshots().cherrypick(wapSnapshotId).commit(); - Snapshot currentSnapshot = table.currentSnapshot(); - InternalRow outputRow = newInternalRow(wapSnapshotId, currentSnapshot.snapshotId()); - return asScanIterator(OUTPUT_TYPE, outputRow); - default: - throw new ValidationException( - "Cannot apply non-unique WAP ID. Found %d snapshots with WAP ID '%s'", - numMatchingSnapshots, wapId); + Snapshot matchingSnap = null; + for (Snapshot snap : table.snapshots()) { + if (wapId.equals(WapUtil.stagedWapId(snap))) { + if (matchingSnap != null) { + throw new ValidationException( + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID '%s'", wapId); + } else { + matchingSnap = snap; + } + } } + + if (matchingSnap == null) { + throw new ValidationException("Cannot apply unknown WAP ID '%s'", wapId); + } + + long wapSnapshotId = matchingSnap.snapshotId(); + table.manageSnapshots().cherrypick(wapSnapshotId).commit(); + Snapshot currentSnapshot = table.currentSnapshot(); + InternalRow outputRow = newInternalRow(wapSnapshotId, currentSnapshot.snapshotId()); + return asScanIterator(OUTPUT_TYPE, outputRow); }); } From 98ecb309cdc110233f45cc16c006c34fdb78d65d Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Mon, 19 Jan 2026 11:19:03 -0800 Subject: [PATCH 3/6] Update docs for publish_changes procedure --- docs/docs/spark-procedures.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/docs/spark-procedures.md b/docs/docs/spark-procedures.md index 7f211d9f260b..0e00df2b2558 100644 --- a/docs/docs/spark-procedures.md +++ b/docs/docs/spark-procedures.md @@ -189,6 +189,8 @@ publish_changes creates a new snapshot from an existing snapshot without alterin Only append and dynamic overwrite snapshots can be successfully published. +The `publish_changes` procedure will fail if there are multiple snapshots in the table with the provided `wap.id`. + !!! info This procedure invalidates all cached Spark plans that reference the affected table. From 115b302b7047d7d222e7df417e2c52227bc5d31b Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Mon, 19 Jan 2026 11:30:21 -0800 Subject: [PATCH 4/6] run spotlessApply --- .../iceberg/spark/extensions/TestPublishChangesProcedure.java | 3 ++- .../iceberg/spark/procedures/PublishChangesProcedure.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java index c7c442e29b16..c72770e1cec6 100644 --- a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java +++ b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java @@ -175,7 +175,8 @@ public void testApplyDuplicateWapId() { assertThatThrownBy( () -> sql("CALL %s.system.publish_changes('%s', '%s')", catalogName, tableIdent, wapId)) .isInstanceOf(ValidationException.class) - .hasMessage("Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID 'wap_id_1'"); + .hasMessage( + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID 'wap_id_1'"); } @TestTemplate diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java index f052882854a3..8cb0a2bfb759 100644 --- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java +++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java @@ -100,7 +100,8 @@ public Iterator call(InternalRow args) { if (wapId.equals(WapUtil.stagedWapId(snap))) { if (matchingSnap != null) { throw new ValidationException( - "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID '%s'", wapId); + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID '%s'", + wapId); } else { matchingSnap = snap; } From 3b6421e1ad81a89a8ba8b8f66e11246e83488541 Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Fri, 23 Jan 2026 16:32:09 -0800 Subject: [PATCH 5/6] Update docs/docs/spark-procedures.md --- docs/docs/spark-procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/spark-procedures.md b/docs/docs/spark-procedures.md index 0e00df2b2558..e29b00e5f81a 100644 --- a/docs/docs/spark-procedures.md +++ b/docs/docs/spark-procedures.md @@ -189,7 +189,7 @@ publish_changes creates a new snapshot from an existing snapshot without alterin Only append and dynamic overwrite snapshots can be successfully published. -The `publish_changes` procedure will fail if there are multiple snapshots in the table with the provided `wap.id`. +The `publish_changes` procedure will fail if there are multiple snapshots in the table with the provided `wap_id`. !!! info This procedure invalidates all cached Spark plans that reference the affected table. From c6502ef01874e6c74fe75e3d233eafcbac4222c5 Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Mon, 26 Jan 2026 17:30:24 -0800 Subject: [PATCH 6/6] backport fix to spark 3.4, 3.5, 4.0 --- .../TestPublishChangesProcedure.java | 20 ++++++++++++++ .../procedures/PublishChangesProcedure.java | 27 ++++++++++--------- .../TestPublishChangesProcedure.java | 20 ++++++++++++++ .../procedures/PublishChangesProcedure.java | 27 ++++++++++--------- .../TestPublishChangesProcedure.java | 20 ++++++++++++++ .../procedures/PublishChangesProcedure.java | 27 ++++++++++--------- 6 files changed, 105 insertions(+), 36 deletions(-) diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java index 08f44c8f01f2..d9319801d154 100644 --- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java +++ b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java @@ -161,6 +161,26 @@ public void testApplyInvalidWapId() { .hasMessage("Cannot apply unknown WAP ID 'not_valid'"); } + @TestTemplate + public void testApplyDuplicateWapId() { + + String wapId = "wap_id_1"; + + sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName); + sql("ALTER TABLE %s SET TBLPROPERTIES ('%s' 'true')", tableName, WRITE_AUDIT_PUBLISH_ENABLED); + + spark.conf().set("spark.wap.id", wapId); + + sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName); + sql("INSERT INTO TABLE %s VALUES (2, 'b')", tableName); + + assertThatThrownBy( + () -> sql("CALL %s.system.publish_changes('%s', '%s')", catalogName, tableIdent, wapId)) + .isInstanceOf(ValidationException.class) + .hasMessage( + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID 'wap_id_1'"); + } + @TestTemplate public void testInvalidApplyWapChangesCases() { assertThatThrownBy( diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java index 2c3ce7418e08..a47e75415336 100644 --- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java +++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java @@ -18,10 +18,8 @@ */ package org.apache.iceberg.spark.procedures; -import java.util.Optional; import org.apache.iceberg.Snapshot; import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.spark.procedures.SparkProcedures.ProcedureBuilder; import org.apache.iceberg.util.WapUtil; import org.apache.spark.sql.catalyst.InternalRow; @@ -92,21 +90,26 @@ public InternalRow[] call(InternalRow args) { return modifyIcebergTable( tableIdent, table -> { - Optional wapSnapshot = - Optional.ofNullable( - Iterables.find( - table.snapshots(), - snapshot -> wapId.equals(WapUtil.stagedWapId(snapshot)), - null)); - if (!wapSnapshot.isPresent()) { + Snapshot matchingSnap = null; + for (Snapshot snap : table.snapshots()) { + if (wapId.equals(WapUtil.stagedWapId(snap))) { + if (matchingSnap != null) { + throw new ValidationException( + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID '%s'", + wapId); + } else { + matchingSnap = snap; + } + } + } + + if (matchingSnap == null) { throw new ValidationException("Cannot apply unknown WAP ID '%s'", wapId); } - long wapSnapshotId = wapSnapshot.get().snapshotId(); + long wapSnapshotId = matchingSnap.snapshotId(); table.manageSnapshots().cherrypick(wapSnapshotId).commit(); - Snapshot currentSnapshot = table.currentSnapshot(); - InternalRow outputRow = newInternalRow(wapSnapshotId, currentSnapshot.snapshotId()); return new InternalRow[] {outputRow}; }); diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java index 08f44c8f01f2..d9319801d154 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java @@ -161,6 +161,26 @@ public void testApplyInvalidWapId() { .hasMessage("Cannot apply unknown WAP ID 'not_valid'"); } + @TestTemplate + public void testApplyDuplicateWapId() { + + String wapId = "wap_id_1"; + + sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName); + sql("ALTER TABLE %s SET TBLPROPERTIES ('%s' 'true')", tableName, WRITE_AUDIT_PUBLISH_ENABLED); + + spark.conf().set("spark.wap.id", wapId); + + sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName); + sql("INSERT INTO TABLE %s VALUES (2, 'b')", tableName); + + assertThatThrownBy( + () -> sql("CALL %s.system.publish_changes('%s', '%s')", catalogName, tableIdent, wapId)) + .isInstanceOf(ValidationException.class) + .hasMessage( + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID 'wap_id_1'"); + } + @TestTemplate public void testInvalidApplyWapChangesCases() { assertThatThrownBy( diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java index 2c3ce7418e08..a47e75415336 100644 --- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java +++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java @@ -18,10 +18,8 @@ */ package org.apache.iceberg.spark.procedures; -import java.util.Optional; import org.apache.iceberg.Snapshot; import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.spark.procedures.SparkProcedures.ProcedureBuilder; import org.apache.iceberg.util.WapUtil; import org.apache.spark.sql.catalyst.InternalRow; @@ -92,21 +90,26 @@ public InternalRow[] call(InternalRow args) { return modifyIcebergTable( tableIdent, table -> { - Optional wapSnapshot = - Optional.ofNullable( - Iterables.find( - table.snapshots(), - snapshot -> wapId.equals(WapUtil.stagedWapId(snapshot)), - null)); - if (!wapSnapshot.isPresent()) { + Snapshot matchingSnap = null; + for (Snapshot snap : table.snapshots()) { + if (wapId.equals(WapUtil.stagedWapId(snap))) { + if (matchingSnap != null) { + throw new ValidationException( + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID '%s'", + wapId); + } else { + matchingSnap = snap; + } + } + } + + if (matchingSnap == null) { throw new ValidationException("Cannot apply unknown WAP ID '%s'", wapId); } - long wapSnapshotId = wapSnapshot.get().snapshotId(); + long wapSnapshotId = matchingSnap.snapshotId(); table.manageSnapshots().cherrypick(wapSnapshotId).commit(); - Snapshot currentSnapshot = table.currentSnapshot(); - InternalRow outputRow = newInternalRow(wapSnapshotId, currentSnapshot.snapshotId()); return new InternalRow[] {outputRow}; }); diff --git a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java index 4958fde15d55..c72770e1cec6 100644 --- a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java +++ b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestPublishChangesProcedure.java @@ -159,6 +159,26 @@ public void testApplyInvalidWapId() { .hasMessage("Cannot apply unknown WAP ID 'not_valid'"); } + @TestTemplate + public void testApplyDuplicateWapId() { + + String wapId = "wap_id_1"; + + sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName); + sql("ALTER TABLE %s SET TBLPROPERTIES ('%s' 'true')", tableName, WRITE_AUDIT_PUBLISH_ENABLED); + + spark.conf().set("spark.wap.id", wapId); + + sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName); + sql("INSERT INTO TABLE %s VALUES (2, 'b')", tableName); + + assertThatThrownBy( + () -> sql("CALL %s.system.publish_changes('%s', '%s')", catalogName, tableIdent, wapId)) + .isInstanceOf(ValidationException.class) + .hasMessage( + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID 'wap_id_1'"); + } + @TestTemplate public void testInvalidApplyWapChangesCases() { assertThatThrownBy( diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java index 874888204334..8cb0a2bfb759 100644 --- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java +++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/procedures/PublishChangesProcedure.java @@ -19,10 +19,8 @@ package org.apache.iceberg.spark.procedures; import java.util.Iterator; -import java.util.Optional; import org.apache.iceberg.Snapshot; import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.spark.procedures.SparkProcedures.ProcedureBuilder; import org.apache.iceberg.util.WapUtil; import org.apache.spark.sql.catalyst.InternalRow; @@ -97,21 +95,26 @@ public Iterator call(InternalRow args) { return modifyIcebergTable( tableIdent, table -> { - Optional wapSnapshot = - Optional.ofNullable( - Iterables.find( - table.snapshots(), - snapshot -> wapId.equals(WapUtil.stagedWapId(snapshot)), - null)); - if (!wapSnapshot.isPresent()) { + Snapshot matchingSnap = null; + for (Snapshot snap : table.snapshots()) { + if (wapId.equals(WapUtil.stagedWapId(snap))) { + if (matchingSnap != null) { + throw new ValidationException( + "Cannot apply non-unique WAP ID. Found multiple snapshots with WAP ID '%s'", + wapId); + } else { + matchingSnap = snap; + } + } + } + + if (matchingSnap == null) { throw new ValidationException("Cannot apply unknown WAP ID '%s'", wapId); } - long wapSnapshotId = wapSnapshot.get().snapshotId(); + long wapSnapshotId = matchingSnap.snapshotId(); table.manageSnapshots().cherrypick(wapSnapshotId).commit(); - Snapshot currentSnapshot = table.currentSnapshot(); - InternalRow outputRow = newInternalRow(wapSnapshotId, currentSnapshot.snapshotId()); return asScanIterator(OUTPUT_TYPE, outputRow); });