From 90102250810514c302e0597d2ecce87cdf2b1256 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Fri, 12 Sep 2025 20:34:12 +0900 Subject: [PATCH] Add support for printing deletion vectors --- .../deletes/RoaringPositionBitmaps.java | 27 +++++++++++ src/main/java/org/ebyhr/puffin/Console.java | 42 ++++++++++++++++++ .../apache-datasketches-theta-v1.puffin | Bin 0 -> 843 bytes src/test/resources/deletion-vector-v1.puffin | Bin 0 -> 521 bytes 4 files changed, 69 insertions(+) create mode 100644 src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmaps.java create mode 100644 src/test/resources/apache-datasketches-theta-v1.puffin create mode 100644 src/test/resources/deletion-vector-v1.puffin diff --git a/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmaps.java b/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmaps.java new file mode 100644 index 0000000..2d93ea0 --- /dev/null +++ b/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmaps.java @@ -0,0 +1,27 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.iceberg.deletes; + +import java.nio.ByteBuffer; + +public class RoaringPositionBitmaps + extends RoaringPositionBitmap +{ + public static RoaringPositionBitmaps deserialize(ByteBuffer buffer) + { + RoaringPositionBitmaps bitmap = new RoaringPositionBitmaps(); + bitmap.setAll(RoaringPositionBitmap.deserialize(buffer)); + return bitmap; + } +} diff --git a/src/main/java/org/ebyhr/puffin/Console.java b/src/main/java/org/ebyhr/puffin/Console.java index 47cc5d7..5ded44f 100644 --- a/src/main/java/org/ebyhr/puffin/Console.java +++ b/src/main/java/org/ebyhr/puffin/Console.java @@ -14,17 +14,25 @@ package org.ebyhr.puffin; import org.apache.iceberg.Files; +import org.apache.iceberg.deletes.RoaringPositionBitmaps; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.puffin.BlobMetadata; import org.apache.iceberg.puffin.FileMetadata; import org.apache.iceberg.puffin.Puffin; import org.apache.iceberg.puffin.PuffinReader; +import org.apache.iceberg.util.Pair; import picocli.CommandLine.Command; import picocli.CommandLine.Option; import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.Callable; +import static org.apache.iceberg.puffin.StandardBlobTypes.DV_V1; + @Command( name = "puffin", header = "Puffin command line interface", @@ -37,6 +45,9 @@ public class Console @Option(names = "--path", paramLabel = "", description = "File path to Puffin") public String path; + private static final int BITMAP_DATA_OFFSET = 4; + private static final int MAGIC_NUMBER = 1681511377; + @Override public Integer call() { @@ -48,6 +59,7 @@ public boolean run() try { InputFile inputFile = Files.localInput(path); try (PuffinReader reader = Puffin.read(inputFile).build()) { + // metadata FileMetadata metadata = reader.fileMetadata(); for (BlobMetadata blobMetadata : metadata.blobs()) { System.out.println("type: " + blobMetadata.type()); @@ -61,6 +73,19 @@ public boolean run() System.out.println(); } System.out.println("properties: " + metadata.properties()); + // blobs + for (Pair read : reader.readAll(metadata.blobs())) { + BlobMetadata blobMetadata = read.first(); + System.out.println("type: " + blobMetadata.type()); + if (blobMetadata.type().equals(DV_V1)) { + ByteBuffer buffer = read.second(); + int bitmapDataLength = buffer.getInt(); + RoaringPositionBitmaps bitmap = deserializeBitmap(buffer.array(), bitmapDataLength); + List deletedRows = new ArrayList<>(); + bitmap.forEach(deletedRows::add); + System.out.println("deletedRows: " + deletedRows); + } + } } } catch (RuntimeException | IOException e) { @@ -68,4 +93,21 @@ public boolean run() } return true; } + + private static RoaringPositionBitmaps deserializeBitmap(byte[] bytes, int bitmapDataLength) + { + ByteBuffer bitmapData = pointToBitmapData(bytes, bitmapDataLength); + int magicNumber = bitmapData.getInt(); + if (magicNumber != MAGIC_NUMBER) { + throw new RuntimeException("Invalid magic number: %s, expected %s".formatted(magicNumber, MAGIC_NUMBER)); + } + return RoaringPositionBitmaps.deserialize(bitmapData); + } + + private static ByteBuffer pointToBitmapData(byte[] bytes, int bitmapDataLength) + { + ByteBuffer bitmapData = ByteBuffer.wrap(bytes, BITMAP_DATA_OFFSET, bitmapDataLength); + bitmapData.order(ByteOrder.LITTLE_ENDIAN); + return bitmapData; + } } diff --git a/src/test/resources/apache-datasketches-theta-v1.puffin b/src/test/resources/apache-datasketches-theta-v1.puffin new file mode 100644 index 0000000000000000000000000000000000000000..9beca220d097a8962532f4fcb101046b5673ce26 GIT binary patch literal 843 zcmWG=b2QZ0s{dET;vgdf6EiadgVdSHtPCK~U@!W!_VnJig{%(u^d6qPZOd_Z`X8aj zg1K@}vlHK)XmC;3<*@nSEw;>9j`ceCaT>D5=BV_816zcWPNfCRnPMoqiC?RYdFtk% z1ofV_DZixh=6dWp*|CeY!YA)<08T>$@4Yz@&!8i<<+WYdlT#B^&dX@UsTEyN&uWT0cJic3;}mKGG{7o--IWTpZwu2#xRDFaHGD%IBNknCzB zid}7PJOIu%rr6nrCPoyy8|Vnsm`E;4O)N=G(M_rZMo36eW?sHRS!xj|1{F$Di%Xzv PU_@mwfwKw&D9r%?`JfwR literal 0 HcmV?d00001 diff --git a/src/test/resources/deletion-vector-v1.puffin b/src/test/resources/deletion-vector-v1.puffin new file mode 100644 index 0000000000000000000000000000000000000000..c0d8992bce5f7ee354d66c8761b74704388e279e GIT binary patch literal 521 zcmY*WJC58i40RTJfE;0=VyjW~9r@FxNxj8(L14%-HDg4P6v&bY{BNqa&%vAOw&L*T#{(j+WC-)HknZD^ zluH8Akr4uvX^wOSSLRGtL3YGX5XLP{3SH|)8NL6sBTKYtInISnBk8EX0DL_`bU@?j z%K-T$ID5Z9A)Vfl0P#?c=$ai#c}|ea1X%V;GI+=vbflv#mij4dWVWYctIf>MVT8P} zc47VZbJV$yvTujPKj#pVNI^M{c1Y}zO%Aa8p+_?L(&tl*kX56!*>i2AYHNc>8CH@z zvJ8i9+S=D%Xl@$dQiE%3jRkAfG*(C@9I%ZRlU(k3D`YQ(tflI)Bi`T4c|B9^_~1cW z+Gt%<4cgJca7(QhgK-*R%D+oCBddavI-AES+Ax(14ywr06Ds<@C@zC7sK~HoJmx9R WcPnaubngl*<<&>z&)1D_EBy<2IGd&b literal 0 HcmV?d00001