apache · jbewing · Dec 15, 2025 · Dec 16, 2025 · Jan 21, 2026 · Jan 21, 2026
diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java
@@ -34,6 +34,7 @@
 import org.apache.parquet.column.values.RequiresPreviousReader;
 import org.apache.parquet.column.values.ValuesReader;
 import org.apache.parquet.io.ParquetDecodingException;
+import org.apache.parquet.schema.PrimitiveType;
 
 public class VectorizedPageIterator extends BasePageIterator {
   private final boolean setArrowValidityVector;
@@ -100,6 +101,14 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i
         case DELTA_BINARY_PACKED:
           valuesReader = new VectorizedDeltaEncodedValuesReader();
           break;
+        case RLE:
+          if (desc.getPrimitiveType().getPrimitiveTypeName()
+              == PrimitiveType.PrimitiveTypeName.BOOLEAN) {
+            valuesReader =
+                new VectorizedRunLengthEncodedParquetValuesReader(setArrowValidityVector);
+            break;
+          }
+          // fall through
         default:
           throw new UnsupportedOperationException(
               "Cannot support vectorized reads for column "

diff --git a/...pache/iceberg/arrow/vectorized/parquet/VectorizedRunLengthEncodedParquetValuesReader.java b/...pache/iceberg/arrow/vectorized/parquet/VectorizedRunLengthEncodedParquetValuesReader.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.arrow.vectorized.parquet;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.parquet.io.api.Binary;
+
+/**
+ * A {@link VectorizedValuesReader} implementation for the encoding type Run Length Encoding / RLE.
+ *
+ * @see <a
+ *     href="https://parquet.apache.org/docs/file-format/data-pages/encodings/#run-length-encoding--bit-packing-hybrid-rle--3">
+ *     Parquet format encodings: RLE</a>
+ */
+public class VectorizedRunLengthEncodedParquetValuesReader extends BaseVectorizedParquetValuesReader
+    implements VectorizedValuesReader {
+
+  // Since we can only read booleans, bit-width is always 1
+  private static final int BOOLEAN_BIT_WIDTH = 1;
+  // Since this can only be used in the context of a data page, the definition level can be set to
+  // anything, and it doesn't really matter
+  private static final int IRRELEVANT_MAX_DEFINITION_LEVEL = 1;
+  // For boolean values in data page v1 & v2, length is always prepended to the encoded data
+  // See
+  // https://parquet.apache.org/docs/file-format/data-pages/encodings/#run-length-encoding--bit-packing-hybrid-rle--3
+  private static final boolean ALWAYS_READ_LENGTH = true;
+
+  public VectorizedRunLengthEncodedParquetValuesReader(boolean setArrowValidityVector) {
+    super(
+        BOOLEAN_BIT_WIDTH,
+        IRRELEVANT_MAX_DEFINITION_LEVEL,
+        ALWAYS_READ_LENGTH,
+        setArrowValidityVector);
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public byte readByte() {
+    throw new UnsupportedOperationException("readByte is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public short readShort() {
+    throw new UnsupportedOperationException("readShort is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public long readLong() {
+    throw new UnsupportedOperationException("readLong is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public float readFloat() {
+    throw new UnsupportedOperationException("readFloat is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public double readDouble() {
+    throw new UnsupportedOperationException("readDouble is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public Binary readBinary(int len) {
+    throw new UnsupportedOperationException("readBinary is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public void readIntegers(int total, FieldVector vec, int rowId) {
+    throw new UnsupportedOperationException("readIntegers is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public void readLongs(int total, FieldVector vec, int rowId) {
+    throw new UnsupportedOperationException("readLongs is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public void readFloats(int total, FieldVector vec, int rowId) {
+    throw new UnsupportedOperationException("readFloats is not supported");
+  }
+
+  /** RLE only supports BOOLEAN as a data page encoding */
+  @Override
+  public void readDoubles(int total, FieldVector vec, int rowId) {
+    throw new UnsupportedOperationException("readDoubles is not supported");
+  }
+}
diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN/boolean_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN/boolean_with_nulls.parquet
diff --git a/parquet/src/testFixtures/resources/encodings/RLE/boolean.parquet b/parquet/src/testFixtures/resources/encodings/RLE/boolean.parquet
diff --git a/parquet/src/testFixtures/resources/encodings/RLE/boolean_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/RLE/boolean_with_nulls.parquet
diff --git a/...he/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java b/...he/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java
@@ -55,6 +55,13 @@ public void testVectorizedReadsWithNewContainers() throws IOException {
     // Disabled since this code path is already tested in TestParquetVectorizedReads
   }
 
+  @Test
+  @Override
+  public void testUnsupportedReadsForParquetV2() throws Exception {
+    // Disabled since vectorized reads are supported for parquet v2 written files over
+    // dictionary-encoded files
+  }
+
   @Test
   public void testMixedDictionaryNonDictionaryReads() throws IOException {
     Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields());

diff --git a/...est/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java b/...est/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java
@@ -20,17 +20,20 @@
 
 import static org.apache.iceberg.types.Types.NestedField.optional;
 import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.apache.parquet.schema.Types.primitive;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatThrownBy;
 import static org.assertj.core.api.Assumptions.assumeThat;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.Iterator;
 import java.util.function.Consumer;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.avro.generic.GenericData;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.arrow.ArrowAllocation;
+import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator;
 import org.apache.iceberg.inmemory.InMemoryOutputFile;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.io.FileAppender;
@@ -48,9 +51,13 @@
 import org.apache.iceberg.spark.data.vectorized.VectorizedSparkParquetReaders;
 import org.apache.iceberg.types.TypeUtil;
 import org.apache.iceberg.types.Types;
+import org.apache.parquet.bytes.ByteBufferInputStream;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.ParquetProperties;
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 import org.junit.jupiter.api.Test;
@@ -295,12 +302,16 @@ public void testReadsForTypePromotedColumns() throws Exception {
   public void testSupportedReadsForParquetV2() throws Exception {
     // Float and double column types are written using plain encoding with Parquet V2,
     // also Parquet V2 will dictionary encode decimals that use fixed length binary
-    // (i.e. decimals > 8 bytes)
+    // (i.e. decimals > 8 bytes). Int and long types use DELTA_BINARY_PACKED.
+    // Boolean types use RLE.
     Schema schema =
         new Schema(
             optional(102, "float_data", Types.FloatType.get()),
             optional(103, "double_data", Types.DoubleType.get()),
-            optional(104, "decimal_data", Types.DecimalType.of(25, 5)));
+            optional(104, "decimal_data", Types.DecimalType.of(25, 5)),
+            optional(105, "int_data", Types.IntegerType.get()),
+            optional(106, "long_data", Types.LongType.get()),
+            optional(107, "boolean_data", Types.BooleanType.get()));
 
     OutputFile outputFile = new InMemoryOutputFile();
     Iterable<GenericData.Record> data =
@@ -331,6 +342,33 @@ public void testUnsupportedReadsForParquetV2() throws Exception {
         .hasMessageEndingWith("Disable vectorized reads to read this table/file");
   }
 
+  @Test
+  public void testRLEEncodingOnlySupportsBooleanDataPage() {
+    MessageType schema =
+        new MessageType(
+            "test",
+            primitive(PrimitiveTypeName.INT32, Type.Repetition.OPTIONAL).id(1).named("int_col"));
+    ColumnDescriptor intColumnDesc = schema.getColumnDescription(new String[] {"int_col"});
+    ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.allocate(0));
+
+    String expectedMessage =
+        "Cannot support vectorized reads for column "
+            + intColumnDesc
+            + " with encoding "
+            + Encoding.RLE
+            + ". Disable vectorized reads to read this table/file";
+
+    assertThatThrownBy(
+            () ->
+                new VectorizedPageIterator(intColumnDesc, "parquet-mr", false) {
+                  {
+                    initDataReader(Encoding.RLE, stream, 0);
+                  }
+                })
+        .isInstanceOf(UnsupportedOperationException.class)
+        .hasMessage(expectedMessage);
+  }
+
   @Test
   public void testUuidReads() throws Exception {
     // Just one row to maintain dictionary encoding

diff --git a/...he/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java b/...he/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java
@@ -94,6 +94,12 @@ Iterable<Record> generateData(
   @Disabled // Ignored since this code path is already tested in TestParquetVectorizedReads
   public void testVectorizedReadsWithNewContainers() throws IOException {}
 
+  @Test
+  @Override
+  @Disabled // Ignored since vectorized reads are supported for parquet v2 written files over
+  // dictionary-encoded files
+  public void testUnsupportedReadsForParquetV2() throws IOException {}
+
   @Test
   public void testMixedDictionaryNonDictionaryReads() throws IOException {
     Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields());

diff --git a/...est/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/...est/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java
@@ -21,6 +21,7 @@
 import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
 import static org.apache.iceberg.types.Types.NestedField.optional;
 import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.apache.parquet.schema.Types.primitive;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatThrownBy;
 import static org.assertj.core.api.Assumptions.assumeThat;
@@ -30,6 +31,7 @@
 import java.io.InputStream;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.nio.ByteBuffer;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Iterator;
@@ -41,6 +43,7 @@
 import org.apache.iceberg.Files;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.arrow.ArrowAllocation;
+import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator;
 import org.apache.iceberg.data.RandomGenericData;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.data.parquet.GenericParquetReaders;
@@ -64,9 +67,13 @@
 import org.apache.iceberg.types.Type.PrimitiveType;
 import org.apache.iceberg.types.TypeUtil;
 import org.apache.iceberg.types.Types;
+import org.apache.parquet.bytes.ByteBufferInputStream;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.ParquetProperties;
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
@@ -81,7 +88,7 @@ public class TestParquetVectorizedReads extends AvroDataTestBase {
 
   private static final String PLAIN = "PLAIN";
   private static final List<String> GOLDEN_FILE_ENCODINGS =
-      ImmutableList.of("PLAIN_DICTIONARY", "RLE_DICTIONARY", "DELTA_BINARY_PACKED");
+      ImmutableList.of("PLAIN_DICTIONARY", "RLE_DICTIONARY", "DELTA_BINARY_PACKED", "RLE");
   private static final Map<String, PrimitiveType> GOLDEN_FILE_TYPES =
       ImmutableMap.of(
           "string", Types.StringType.get(),
@@ -404,13 +411,15 @@ public void testSupportedReadsForParquetV2() throws Exception {
     // Float and double column types are written using plain encoding with Parquet V2,
     // also Parquet V2 will dictionary encode decimals that use fixed length binary
     // (i.e. decimals > 8 bytes). Int and long types use DELTA_BINARY_PACKED.
+    // Boolean types use RLE.
     Schema schema =
         new Schema(
             optional(102, "float_data", Types.FloatType.get()),
             optional(103, "double_data", Types.DoubleType.get()),
             optional(104, "decimal_data", Types.DecimalType.of(25, 5)),
             optional(105, "int_data", Types.IntegerType.get()),
-            optional(106, "long_data", Types.LongType.get()));
+            optional(106, "long_data", Types.LongType.get()),
+            optional(107, "boolean_data", Types.BooleanType.get()));
 
     File dataFile = File.createTempFile("junit", null, temp.toFile());
     assertThat(dataFile.delete()).as("Delete should succeed").isTrue();
@@ -439,6 +448,33 @@ public void testUnsupportedReadsForParquetV2() throws Exception {
         .hasMessageEndingWith("Disable vectorized reads to read this table/file");
   }
 
+  @Test
+  public void testRLEEncodingOnlySupportsBooleanDataPage() {
+    MessageType schema =
+        new MessageType(
+            "test",
+            primitive(PrimitiveTypeName.INT32, Type.Repetition.OPTIONAL).id(1).named("int_col"));
+    ColumnDescriptor intColumnDesc = schema.getColumnDescription(new String[] {"int_col"});
+    ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.allocate(0));
+
+    String expectedMessage =
+        "Cannot support vectorized reads for column "
+            + intColumnDesc
+            + " with encoding "
+            + Encoding.RLE
+            + ". Disable vectorized reads to read this table/file";
+
+    assertThatThrownBy(
+            () ->
+                new VectorizedPageIterator(intColumnDesc, "parquet-mr", false) {
+                  {
+                    initDataReader(Encoding.RLE, stream, 0);
+                  }
+                })
+        .isInstanceOf(UnsupportedOperationException.class)
+        .hasMessage(expectedMessage);
+  }
+
   @Test
   public void testUuidReads() throws Exception {
     // Just one row to maintain dictionary encoding
@@ -504,10 +540,16 @@ static Stream<Arguments> goldenFilesAndEncodings() {
                     .flatMap(
                         e ->
                             Stream.of(true, false)
-                                .map(
+                                .flatMap(
                                     vectorized ->
-                                        Arguments.of(
-                                            encoding, e.getKey(), e.getValue(), vectorized))));
+                                        Stream.of(
+                                            Arguments.of(
+                                                encoding, e.getKey(), e.getValue(), vectorized),
+                                            Arguments.of(
+                                                encoding,
+                                                e.getKey() + "_with_nulls",
+                                                e.getValue(),
+                                                vectorized)))));
   }
 
   private File resourceUrlToLocalFile(URL url) throws IOException, URISyntaxException {

diff --git a/...he/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java b/...he/iceberg/spark/data/vectorized/parquet/TestParquetDictionaryEncodedVectorizedReads.java
@@ -196,4 +196,10 @@ public void testDecimalNotAllPagesDictionaryEncoded() throws Exception {
           }
         });
   }
+
+  @Test
+  @Override
+  @Disabled // Ignored since vectorized reads are supported for parquet v2 written files over
+  // dictionary-encoded files
+  public void testUnsupportedReadsForParquetV2() throws IOException {}
 }