From a69ec52a7f08470212a615311fd63e7229ea1f38 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 10 Jun 2025 10:26:36 -0700 Subject: [PATCH 01/47] rebase --- .../parquet/VectorizedPageIterator.java | 33 ++++---- ...ectorizedParquetDefinitionLevelReader.java | 50 ++++++------ .../parquet/VectorizedPlainValuesReader.java | 77 +++++++++++++++++++ .../parquet/VectorizedValuesReader.java | 55 +++++++++++++ 4 files changed, 174 insertions(+), 41 deletions(-) create mode 100644 arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java create mode 100644 arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index b97eb1545550..4f01216f35b3 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -45,7 +45,7 @@ public VectorizedPageIterator( this.setArrowValidityVector = setValidityVector; } - private ValuesAsBytesReader plainValuesReader = null; + private VectorizedValuesReader valuesReader = null; private VectorizedDictionaryEncodedParquetValuesReader dictionaryEncodedValuesReader = null; private boolean allPagesDictEncoded; private VectorizedParquetDefinitionLevelReader vectorizedDefinitionLevelReader; @@ -65,13 +65,13 @@ public void setAllPagesDictEncoded(boolean allDictEncoded) { @Override protected void reset() { super.reset(); - this.plainValuesReader = null; + this.valuesReader = null; this.vectorizedDefinitionLevelReader = null; } @Override protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) { - ValuesReader previousReader = plainValuesReader; + ValuesReader previousReader = (ValuesReader) valuesReader; if (dataEncoding.usesDictionary()) { if (dictionary == null) { throw new ParquetDecodingException( @@ -94,7 +94,9 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i throw new ParquetDecodingException("could not read page in col " + desc, e); } } else { - if (dataEncoding != Encoding.PLAIN) { + if (dataEncoding == Encoding.PLAIN) { + valuesReader = new VectorizedPlainValuesReader(); + } else { throw new UnsupportedOperationException( "Cannot support vectorized reads for column " + desc @@ -103,14 +105,13 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i + dataEncoding + ". Disable vectorized reads to read this table/file"); } - plainValuesReader = new ValuesAsBytesReader(); - plainValuesReader.initFromPage(valueCount, in); + valuesReader.initFromPage(valueCount, in); dictionaryDecodeMode = DictionaryDecodeMode.NONE; } if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) && previousReader instanceof RequiresPreviousReader) { // previous reader can only be set if reading sequentially - ((RequiresPreviousReader) plainValuesReader).setPreviousReader(previousReader); + ((RequiresPreviousReader) valuesReader).setPreviousReader(previousReader); } } @@ -204,7 +205,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .integerReader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override @@ -231,7 +232,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .longReader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override @@ -262,7 +263,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .timestampMillisReader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override @@ -288,7 +289,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .timestampInt96Reader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override @@ -315,7 +316,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .floatReader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override @@ -342,7 +343,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .doubleReader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override @@ -371,7 +372,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .fixedSizeBinaryReader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override @@ -397,7 +398,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .varWidthReader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override @@ -423,7 +424,7 @@ protected void nextVal( FieldVector vector, int batchSize, int numVals, int typeWidth, NullabilityHolder holder) { vectorizedDefinitionLevelReader .booleanReader() - .nextBatch(vector, numVals, typeWidth, batchSize, holder, plainValuesReader); + .nextBatch(vector, numVals, typeWidth, batchSize, holder, valuesReader); } @Override diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index 0d7bbc6e4977..b85e350fd063 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -84,7 +84,7 @@ public void nextBatch( final int typeWidth, final int numValsToRead, NullabilityHolder nullabilityHolder, - ValuesAsBytesReader valuesReader) { + VectorizedValuesReader valuesReader) { nextBatch( vector, startOffset, @@ -147,7 +147,7 @@ protected abstract void nextRleBatch( FieldVector vector, int typeWidth, NullabilityHolder nullabilityHolder, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int idx, int numValues, byte[] byteArray); @@ -156,7 +156,7 @@ protected abstract void nextPackedBatch( FieldVector vector, int typeWidth, NullabilityHolder nullabilityHolder, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int idx, int numValues, byte[] byteArray); @@ -220,7 +220,7 @@ protected void nextRleBatch( final FieldVector vector, final int typeWidth, NullabilityHolder nullabilityHolder, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int idx, int numValues, byte[] byteArray) { @@ -232,7 +232,7 @@ protected void nextPackedBatch( final FieldVector vector, final int typeWidth, NullabilityHolder nullabilityHolder, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int idx, int numValues, byte[] byteArray) { @@ -252,13 +252,13 @@ protected void nextPackedBatch( } protected abstract void nextVal( - FieldVector vector, int idx, ValuesAsBytesReader valuesReader, Mode mode); + FieldVector vector, int idx, VectorizedValuesReader valuesReader, Mode mode); } class LongReader extends NumericBaseReader { @Override protected void nextVal( - FieldVector vector, int idx, ValuesAsBytesReader valuesReader, Mode mode) { + FieldVector vector, int idx, VectorizedValuesReader valuesReader, Mode mode) { vector.getDataBuffer().setLong(idx, valuesReader.readLong()); } @@ -285,7 +285,7 @@ protected void nextDictEncodedVal( class DoubleReader extends NumericBaseReader { @Override protected void nextVal( - FieldVector vector, int idx, ValuesAsBytesReader valuesReader, Mode mode) { + FieldVector vector, int idx, VectorizedValuesReader valuesReader, Mode mode) { vector.getDataBuffer().setDouble(idx, valuesReader.readDouble()); } @@ -312,7 +312,7 @@ protected void nextDictEncodedVal( class FloatReader extends NumericBaseReader { @Override protected void nextVal( - FieldVector vector, int idx, ValuesAsBytesReader valuesReader, Mode mode) { + FieldVector vector, int idx, VectorizedValuesReader valuesReader, Mode mode) { vector.getDataBuffer().setFloat(idx, valuesReader.readFloat()); } @@ -339,7 +339,7 @@ protected void nextDictEncodedVal( class IntegerReader extends NumericBaseReader { @Override protected void nextVal( - FieldVector vector, int idx, ValuesAsBytesReader valuesReader, Mode mode) { + FieldVector vector, int idx, VectorizedValuesReader valuesReader, Mode mode) { vector.getDataBuffer().setInt(idx, valuesReader.readInteger()); } @@ -371,7 +371,7 @@ protected void nextRleBatch( FieldVector vector, int typeWidth, NullabilityHolder nullabilityHolder, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int idx, int numValues, byte[] byteArray) { @@ -392,7 +392,7 @@ protected void nextPackedBatch( FieldVector vector, int typeWidth, NullabilityHolder nullabilityHolder, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int idx, int numValues, byte[] byteArray) { @@ -411,7 +411,7 @@ protected void nextPackedBatch( protected abstract void nextVal( FieldVector vector, int idx, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int typeWidth, byte[] byteArray); } @@ -422,7 +422,7 @@ class TimestampMillisReader extends BaseReader { protected void nextVal( FieldVector vector, int idx, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int typeWidth, byte[] byteArray) { vector.getDataBuffer().setLong((long) idx * typeWidth, valuesReader.readLong() * 1000); @@ -455,11 +455,11 @@ class TimestampInt96Reader extends BaseReader { protected void nextVal( FieldVector vector, int idx, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int typeWidth, byte[] byteArray) { // 8 bytes (time of day nanos) + 4 bytes(julianDay) = 12 bytes - ByteBuffer buffer = valuesReader.getBuffer(12).order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer buffer = valuesReader.readBinary(12).toByteBuffer().order(ByteOrder.LITTLE_ENDIAN); long timestampInt96 = ParquetUtil.extractTimestampInt96(buffer); vector.getDataBuffer().setLong((long) idx * typeWidth, timestampInt96); } @@ -500,10 +500,10 @@ class FixedSizeBinaryReader extends BaseReader { protected void nextVal( FieldVector vector, int idx, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int typeWidth, byte[] byteArray) { - valuesReader.getBuffer(typeWidth).get(byteArray, 0, typeWidth); + valuesReader.readBinary(typeWidth).toByteBuffer().get(byteArray, 0, typeWidth); ((FixedSizeBinaryVector) vector).set(idx, byteArray); } @@ -535,11 +535,11 @@ class VarWidthReader extends BaseReader { protected void nextVal( FieldVector vector, int idx, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int typeWidth, byte[] byteArray) { int len = valuesReader.readInteger(); - ByteBuffer buffer = valuesReader.getBuffer(len); + ByteBuffer buffer = valuesReader.readBinary(len).toByteBuffer(); // Calling setValueLengthSafe takes care of allocating a larger buffer if // running out of space. ((BaseVariableWidthVector) vector).setValueLengthSafe(idx, len); @@ -580,10 +580,10 @@ class BooleanReader extends BaseReader { protected void nextVal( FieldVector vector, int idx, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int typeWidth, byte[] byteArray) { - ((BitVector) vector).setSafe(idx, valuesReader.readBooleanAsInt()); + ((BitVector) vector).setSafe(idx, valuesReader.readBoolean() ? 1 : 0); } @Override @@ -606,7 +606,7 @@ class DictionaryIdReader extends BaseReader { protected void nextVal( FieldVector vector, int idx, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int typeWidth, byte[] byteArray) { throw new UnsupportedOperationException(); @@ -651,13 +651,13 @@ private void setNulls( private void setNextNValuesInVector( int typeWidth, NullabilityHolder nullabilityHolder, - ValuesAsBytesReader valuesReader, + VectorizedValuesReader valuesReader, int bufferIdx, FieldVector vector, int numValues) { ArrowBuf validityBuffer = vector.getValidityBuffer(); if (currentValue == maxDefLevel) { - ByteBuffer buffer = valuesReader.getBuffer(numValues * typeWidth); + ByteBuffer buffer = valuesReader.readBinary(numValues * typeWidth).toByteBuffer(); vector.getDataBuffer().setBytes((long) bufferIdx * typeWidth, buffer); nullabilityHolder.setNotNulls(bufferIdx, numValues); if (setArrowValidityVector) { diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java new file mode 100644 index 000000000000..9bee621f7760 --- /dev/null +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow.vectorized.parquet; + +import java.nio.ByteBuffer; +import org.apache.arrow.vector.FieldVector; +import org.apache.iceberg.parquet.ValuesAsBytesReader; +import org.apache.parquet.io.api.Binary; + +class VectorizedPlainValuesReader extends ValuesAsBytesReader implements VectorizedValuesReader { + + VectorizedPlainValuesReader() {} + + @Override + public byte readByte() { + return (byte) readInteger(); + } + + @Override + public short readShort() { + return (short) readInteger(); + } + + @Override + public Binary readBinary(int len) { + ByteBuffer buffer = getBuffer(len); + if (buffer.hasArray()) { + return Binary.fromConstantByteArray( + buffer.array(), buffer.arrayOffset() + buffer.position(), len); + } else { + byte[] bytes = new byte[len]; + buffer.get(bytes); + return Binary.fromConstantByteArray(bytes); + } + } + + private void readValues(int total, FieldVector vec, int rowId, int typeWidth) { + ByteBuffer buffer = getBuffer(total * typeWidth); + vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); + } + + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, 4); + } + + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, 8); + } + + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, 4); + } + + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, 8); + } +} \ No newline at end of file diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java new file mode 100644 index 000000000000..e1d65e5e8a3a --- /dev/null +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow.vectorized.parquet; + +import org.apache.arrow.vector.FieldVector; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.io.api.Binary; + +/** Interface for value decoding that supports vectorized (aka batched) decoding. */ +interface VectorizedValuesReader { + boolean readBoolean(); + + byte readByte(); + + short readShort(); + + int readInteger(); + + long readLong(); + + float readFloat(); + + double readDouble(); + + Binary readBinary(int len); + + /* + * Reads `total` values into `vec` start at `vec[rowId]` + */ + void readIntegers(int total, FieldVector vec, int rowId); + + void readLongs(int total, FieldVector vec, int rowId); + + void readFloats(int total, FieldVector vec, int rowId); + + void readDoubles(int total, FieldVector vec, int rowId); + + void initFromPage(int valueCount, ByteBufferInputStream in); +} \ No newline at end of file From 0bba5eff56b46035d3a3755afd88417f486c0f2e Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 10 Jun 2025 10:33:29 -0700 Subject: [PATCH 02/47] lint --- .../parquet/VectorizedPageIterator.java | 1 - ...ectorizedParquetDefinitionLevelReader.java | 1 - .../parquet/VectorizedPlainValuesReader.java | 82 +++++++++---------- .../parquet/VectorizedValuesReader.java | 34 ++++---- 4 files changed, 58 insertions(+), 60 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index 4f01216f35b3..7551776853b8 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -24,7 +24,6 @@ import org.apache.iceberg.arrow.vectorized.NullabilityHolder; import org.apache.iceberg.parquet.BasePageIterator; import org.apache.iceberg.parquet.ParquetUtil; -import org.apache.iceberg.parquet.ValuesAsBytesReader; import org.apache.parquet.CorruptDeltaByteArrays; import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.bytes.BytesUtils; diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index b85e350fd063..0f85101a5b79 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -29,7 +29,6 @@ import org.apache.arrow.vector.IntVector; import org.apache.iceberg.arrow.vectorized.NullabilityHolder; import org.apache.iceberg.parquet.ParquetUtil; -import org.apache.iceberg.parquet.ValuesAsBytesReader; import org.apache.parquet.column.Dictionary; public final class VectorizedParquetDefinitionLevelReader diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java index 9bee621f7760..2c5467c9c57c 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java @@ -25,53 +25,53 @@ class VectorizedPlainValuesReader extends ValuesAsBytesReader implements VectorizedValuesReader { - VectorizedPlainValuesReader() {} + VectorizedPlainValuesReader() {} - @Override - public byte readByte() { - return (byte) readInteger(); - } + @Override + public byte readByte() { + return (byte) readInteger(); + } - @Override - public short readShort() { - return (short) readInteger(); - } + @Override + public short readShort() { + return (short) readInteger(); + } - @Override - public Binary readBinary(int len) { - ByteBuffer buffer = getBuffer(len); - if (buffer.hasArray()) { - return Binary.fromConstantByteArray( - buffer.array(), buffer.arrayOffset() + buffer.position(), len); - } else { - byte[] bytes = new byte[len]; - buffer.get(bytes); - return Binary.fromConstantByteArray(bytes); - } + @Override + public Binary readBinary(int len) { + ByteBuffer buffer = getBuffer(len); + if (buffer.hasArray()) { + return Binary.fromConstantByteArray( + buffer.array(), buffer.arrayOffset() + buffer.position(), len); + } else { + byte[] bytes = new byte[len]; + buffer.get(bytes); + return Binary.fromConstantByteArray(bytes); } + } - private void readValues(int total, FieldVector vec, int rowId, int typeWidth) { - ByteBuffer buffer = getBuffer(total * typeWidth); - vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); - } + private void readValues(int total, FieldVector vec, int rowId, int typeWidth) { + ByteBuffer buffer = getBuffer(total * typeWidth); + vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); + } - @Override - public void readIntegers(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, 4); - } + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, 4); + } - @Override - public void readLongs(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, 8); - } + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, 8); + } - @Override - public void readFloats(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, 4); - } + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, 4); + } - @Override - public void readDoubles(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, 8); - } -} \ No newline at end of file + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, 8); + } +} diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index e1d65e5e8a3a..13d87b40b795 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -24,32 +24,32 @@ /** Interface for value decoding that supports vectorized (aka batched) decoding. */ interface VectorizedValuesReader { - boolean readBoolean(); + boolean readBoolean(); - byte readByte(); + byte readByte(); - short readShort(); + short readShort(); - int readInteger(); + int readInteger(); - long readLong(); + long readLong(); - float readFloat(); + float readFloat(); - double readDouble(); + double readDouble(); - Binary readBinary(int len); + Binary readBinary(int len); - /* - * Reads `total` values into `vec` start at `vec[rowId]` - */ - void readIntegers(int total, FieldVector vec, int rowId); + /* + * Reads `total` values into `vec` start at `vec[rowId]` + */ + void readIntegers(int total, FieldVector vec, int rowId); - void readLongs(int total, FieldVector vec, int rowId); + void readLongs(int total, FieldVector vec, int rowId); - void readFloats(int total, FieldVector vec, int rowId); + void readFloats(int total, FieldVector vec, int rowId); - void readDoubles(int total, FieldVector vec, int rowId); + void readDoubles(int total, FieldVector vec, int rowId); - void initFromPage(int valueCount, ByteBufferInputStream in); -} \ No newline at end of file + void initFromPage(int valueCount, ByteBufferInputStream in); +} From 9ecc2be0e052adefd681cadc67aea406e20a5143 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Wed, 18 Jun 2025 09:20:32 -0700 Subject: [PATCH 03/47] some changes per comments --- .../parquet/VectorizedPlainValuesReader.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java index 2c5467c9c57c..ac98da17f2ab 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java @@ -25,6 +25,11 @@ class VectorizedPlainValuesReader extends ValuesAsBytesReader implements VectorizedValuesReader { + public static final int INT_SIZE = 4; + public static final int LONG_SIZE = 8; + public static final int FLOAT_SIZE = 4; + public static final int DOUBLE_SIZE = 8; + VectorizedPlainValuesReader() {} @Override @@ -57,21 +62,21 @@ private void readValues(int total, FieldVector vec, int rowId, int typeWidth) { @Override public void readIntegers(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, 4); + readValues(total, vec, rowId, INT_SIZE); } @Override public void readLongs(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, 8); + readValues(total, vec, rowId, LONG_SIZE); } @Override public void readFloats(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, 4); + readValues(total, vec, rowId, FLOAT_SIZE); } @Override public void readDoubles(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, 8); + readValues(total, vec, rowId, DOUBLE_SIZE); } } From 8d186fe9f089c9d59524fc090415321bad63f768 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 23 Jun 2025 10:14:47 -0700 Subject: [PATCH 04/47] javadoc --- .../parquet/VectorizedValuesReader.java | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index 13d87b40b795..d24bafd2942f 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -20,36 +20,79 @@ import org.apache.arrow.vector.FieldVector; import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.api.Binary; -/** Interface for value decoding that supports vectorized (aka batched) decoding. */ +/** + * Interface for value decoding that supports vectorized (aka batched) decoding. + * Implementations are expected to be {@link ValuesReader} instances, and this interface + * "extends" that abstract class by overriding the salient methods. + */ interface VectorizedValuesReader { + + /** + * Read a single boolean + */ boolean readBoolean(); + /** + * Read a single byte + */ byte readByte(); + /** + * Read a single short + */ short readShort(); + /** + * Read a single integer + */ int readInteger(); + /** + * Read a single long + */ long readLong(); + /** + * Read a single float + */ float readFloat(); + /** + * Read a single double + */ double readDouble(); + /** + * Read binary data of some length + * @param len The number of bytes to read + */ Binary readBinary(int len); - /* - * Reads `total` values into `vec` start at `vec[rowId]` + /** + * Read `total` integers into `vec` starting at `vec[rowId]` */ void readIntegers(int total, FieldVector vec, int rowId); + /** + * Read `total` longs into `vec` starting at `vec[rowId]` + */ void readLongs(int total, FieldVector vec, int rowId); + /** + * Read `total` floats into `vec` starting at `vec[rowId]` + */ void readFloats(int total, FieldVector vec, int rowId); + /** + * Read `total` doubles into `vec` starting at `vec[rowId]` + */ void readDoubles(int total, FieldVector vec, int rowId); + /** + * Initialize the reader from a page. See {@link ValuesReader#initFromPage(int, ByteBufferInputStream)}. + */ void initFromPage(int valueCount, ByteBufferInputStream in); } From 5ce8913013000bfd6fd800f7f401e7d2d4e8ba02 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 23 Jun 2025 10:18:19 -0700 Subject: [PATCH 05/47] lint --- .../parquet/VectorizedValuesReader.java | 54 ++++++------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index d24bafd2942f..7f02752bd5c9 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -24,75 +24,55 @@ import org.apache.parquet.io.api.Binary; /** - * Interface for value decoding that supports vectorized (aka batched) decoding. - * Implementations are expected to be {@link ValuesReader} instances, and this interface - * "extends" that abstract class by overriding the salient methods. + * Interface for value decoding that supports vectorized (aka batched) decoding. Implementations are + * expected to be {@link ValuesReader} instances, and this interface "extends" that abstract class + * by overriding the salient methods. */ interface VectorizedValuesReader { - /** - * Read a single boolean - */ + /** Read a single boolean */ boolean readBoolean(); - /** - * Read a single byte - */ + /** Read a single byte */ byte readByte(); - /** - * Read a single short - */ + /** Read a single short */ short readShort(); - /** - * Read a single integer - */ + /** Read a single integer */ int readInteger(); - /** - * Read a single long - */ + /** Read a single long */ long readLong(); - /** - * Read a single float - */ + /** Read a single float */ float readFloat(); - /** - * Read a single double - */ + /** Read a single double */ double readDouble(); /** * Read binary data of some length + * * @param len The number of bytes to read */ Binary readBinary(int len); - /** - * Read `total` integers into `vec` starting at `vec[rowId]` - */ + /** Read `total` integers into `vec` starting at `vec[rowId]` */ void readIntegers(int total, FieldVector vec, int rowId); - /** - * Read `total` longs into `vec` starting at `vec[rowId]` - */ + /** Read `total` longs into `vec` starting at `vec[rowId]` */ void readLongs(int total, FieldVector vec, int rowId); - /** - * Read `total` floats into `vec` starting at `vec[rowId]` - */ + /** Read `total` floats into `vec` starting at `vec[rowId]` */ void readFloats(int total, FieldVector vec, int rowId); - /** - * Read `total` doubles into `vec` starting at `vec[rowId]` - */ + /** Read `total` doubles into `vec` starting at `vec[rowId]` */ void readDoubles(int total, FieldVector vec, int rowId); /** - * Initialize the reader from a page. See {@link ValuesReader#initFromPage(int, ByteBufferInputStream)}. + * Initialize the reader from a page. See {@link ValuesReader#initFromPage(int, + * ByteBufferInputStream)}. */ void initFromPage(int valueCount, ByteBufferInputStream in); } From 9fe0bba8e1f58d277bf83b1bedb4f33906c34256 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 23 Jun 2025 10:48:08 -0700 Subject: [PATCH 06/47] create class --- .../VectorizedDeltaEncodedValuesReader.java | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java new file mode 100644 index 000000000000..62b44aef3d1d --- /dev/null +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -0,0 +1,69 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.arrow.vectorized.parquet; + +import org.apache.arrow.vector.FieldVector; +import org.apache.parquet.column.values.ValuesReader; +import org.apache.parquet.io.api.Binary; + +public class VectorizedDeltaEncodedValuesReader extends ValuesReader implements VectorizedValuesReader { + + @Override + public void skip() { + + } + + @Override + public byte readByte() { + return 0; + } + + @Override + public short readShort() { + return 0; + } + + @Override + public Binary readBinary(int len) { + return null; + } + + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + + } + + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + + } + + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + + } + + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + + } +} From 6cecf96db382b9f37123d26e3545457ddb19d728 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 23 Jun 2025 10:49:55 -0700 Subject: [PATCH 07/47] remove clash --- .../arrow/vectorized/parquet/VectorizedPageIterator.java | 7 ++++++- .../arrow/vectorized/parquet/VectorizedValuesReader.java | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index 7551776853b8..99cf2dc45864 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -104,7 +104,12 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i + dataEncoding + ". Disable vectorized reads to read this table/file"); } - valuesReader.initFromPage(valueCount, in); + try { + valuesReader.initFromPage(valueCount, in); + } catch (IOException e) { + throw new ParquetDecodingException( + "could not read page " + valueCount + " in col " + desc, e); + } dictionaryDecodeMode = DictionaryDecodeMode.NONE; } if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index 7f02752bd5c9..f8b7c92f5999 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -18,6 +18,7 @@ */ package org.apache.iceberg.arrow.vectorized.parquet; +import java.io.IOException; import org.apache.arrow.vector.FieldVector; import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.column.values.ValuesReader; @@ -74,5 +75,5 @@ interface VectorizedValuesReader { * Initialize the reader from a page. See {@link ValuesReader#initFromPage(int, * ByteBufferInputStream)}. */ - void initFromPage(int valueCount, ByteBufferInputStream in); + void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException; } From 3aed168ac53a1241c33bec4ff0c76343bc0402be Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 23 Jun 2025 14:57:41 -0700 Subject: [PATCH 08/47] refactoring --- .../VectorizedDeltaEncodedValuesReader.java | 243 +++++++++++++++++- .../parquet/VectorizedPlainValuesReader.java | 5 - .../parquet/VectorizedValuesReader.java | 5 + .../iceberg/parquet/ValuesAsBytesReader.java | 4 +- 4 files changed, 243 insertions(+), 14 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index 62b44aef3d1d..dc10b251f4aa 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -21,15 +21,80 @@ package org.apache.iceberg.arrow.vectorized.parquet; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; import org.apache.arrow.vector.FieldVector; +import org.apache.parquet.Preconditions; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.bytes.BytesUtils; import org.apache.parquet.column.values.ValuesReader; +import org.apache.parquet.column.values.bitpacking.BytePackerForLong; +import org.apache.parquet.column.values.bitpacking.Packer; +import org.apache.parquet.column.values.plain.PlainValuesReader; +import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.api.Binary; -public class VectorizedDeltaEncodedValuesReader extends ValuesReader implements VectorizedValuesReader { +/** + * A {@link VectorizedValuesReader} implementation for the encoding type DELTA_BINARY_PACKED. + * This is adapted from Spark's VectorizedDeltaBinaryPackedReader. + * + * @see + * Parquet format encodings: DELTA_BINARY_PACKED + */ +public class VectorizedDeltaEncodedValuesReader + extends ValuesReader implements VectorizedValuesReader { - @Override - public void skip() { + // header data + private int blockSizeInValues; + private int miniBlockNumInABlock; + private int totalValueCount; + private long firstValue; + + private int miniBlockSizeInValues; + + // values read by the caller + private int valuesRead = 0; + // variables to keep state of the current block and miniblock + private long lastValueRead; // needed to compute the next value + private long minDeltaInCurrentBlock; // needed to compute the next value + // currentMiniBlock keeps track of the mini block within the current block that + // we read and decoded most recently. Only used as an index into + // bitWidths array + private int currentMiniBlock = 0; + private int[] bitWidths; // bit widths for each miniBlock in the current block + private int remainingInBlock = 0; // values in current block still to be read + private int remainingInMiniBlock = 0; // values in current mini block still to be read + private long[] unpackedValuesBuffer; + + private ByteBufferInputStream in; + + // temporary buffers used by readByte, readShort, readInteger, and readLong + private byte byteVal; + private short shortVal; + private int intVal; + private long longVal; + + @Override + public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { + Preconditions.checkArgument(valueCount >= 1, + "Page must have at least one value, but it has " + valueCount); + this.in = in; + // Read the header + this.blockSizeInValues = BytesUtils.readUnsignedVarInt(in); + this.miniBlockNumInABlock = BytesUtils.readUnsignedVarInt(in); + double miniSize = (double) blockSizeInValues / miniBlockNumInABlock; + Preconditions.checkArgument(miniSize % 8 == 0, + "miniBlockSize must be multiple of 8, but it's " + miniSize); + this.miniBlockSizeInValues = (int) miniSize; + // True value count. May be less than valueCount because of nulls + this.totalValueCount = BytesUtils.readUnsignedVarInt(in); + this.bitWidths = new int[miniBlockNumInABlock]; + this.unpackedValuesBuffer = new long[miniBlockSizeInValues]; + // read the first value + firstValue = BytesUtils.readZigZagVarLong(in); } @Override @@ -42,28 +107,192 @@ public short readShort() { return 0; } + @Override + public int readInteger() { + return -1; + } + + @Override + public long readLong() { + return -1; + } + + @Override + public void skip() { + throw new UnsupportedOperationException("skip is not supported"); + } + @Override public Binary readBinary(int len) { - return null; + throw new UnsupportedOperationException("readBinary is not supported"); } @Override public void readIntegers(int total, FieldVector vec, int rowId) { - + readValues( + total, + vec, + rowId, + INT_SIZE, + (b, v) -> b.putInt((int) v)); } @Override public void readLongs(int total, FieldVector vec, int rowId) { - + readValues( + total, + vec, + rowId, + LONG_SIZE, + ByteBuffer::putLong); } @Override public void readFloats(int total, FieldVector vec, int rowId) { - + throw new UnsupportedOperationException("readFloats is not supported"); } @Override public void readDoubles(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readDoubles is not supported"); + } + + private void readValues( + int total, FieldVector vec, int rowId, int typeWidth, IntegerOutputWriter outputWriter) { + if (valuesRead + total > totalValueCount) { + throw new ParquetDecodingException( + "No more values to read. Total values read: " + valuesRead + ", total count: " + + totalValueCount + ", trying to read " + total + " more."); + } + int remaining = total; + // First value + if (valuesRead == 0) { + ByteBuffer firstValueBuffer = getBuffer(typeWidth); + outputWriter.write(firstValueBuffer, firstValue); + vec.getDataBuffer().setBytes((long) rowId * typeWidth, firstValueBuffer); + lastValueRead = firstValue; + rowId++; + remaining--; + } + while (remaining > 0) { + int n; + try { + n = loadMiniBlockToOutput(remaining, vec, rowId, typeWidth, outputWriter); + } catch (IOException e) { + throw new ParquetDecodingException("Error reading mini block.", e); + } + rowId += n; + remaining -= n; + } + valuesRead = total - remaining; + } + + /** + * Read from a mini block. Read at most 'remaining' values into output. + * + * @return the number of values read into output + */ + private int loadMiniBlockToOutput( + int remaining, + FieldVector vec, + int rowId, + int typeWidth, + IntegerOutputWriter outputWriter) throws IOException { + + // new block; read the block header + if (remainingInBlock == 0) { + readBlockHeader(); + } + + // new miniblock, unpack the miniblock + if (remainingInMiniBlock == 0) { + unpackMiniBlock(); + } + + // read values from miniblock + ByteBuffer buffer = getBuffer(remainingInMiniBlock * typeWidth); + int valuesRead = 0; + for (int i = miniBlockSizeInValues - remainingInMiniBlock; + i < miniBlockSizeInValues && valuesRead < remaining; i++) { + // calculate values from deltas unpacked for current block + long outValue = lastValueRead + minDeltaInCurrentBlock + unpackedValuesBuffer[i]; + lastValueRead = outValue; + outputWriter.write(buffer, outValue); + remainingInBlock--; + remainingInMiniBlock--; + valuesRead++; + } + vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); + + return valuesRead; + } + + private void readBlockHeader() { + try { + minDeltaInCurrentBlock = BytesUtils.readZigZagVarLong(in); + } catch (IOException e) { + throw new ParquetDecodingException("Can not read min delta in current block", e); + } + readBitWidthsForMiniBlocks(); + remainingInBlock = blockSizeInValues; + currentMiniBlock = 0; + remainingInMiniBlock = 0; + } + + private ByteBuffer getBuffer(int length) { + try { + return this.in.slice(length).order(ByteOrder.LITTLE_ENDIAN); + } catch (IOException e) { + throw new ParquetDecodingException("Failed to read " + length + " bytes", e); + } + } + + /** + * mini block has a size of 8*n, unpack 32 value each time + * + * see org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader#unpackMiniBlock + */ + private void unpackMiniBlock() throws IOException { + Arrays.fill(this.unpackedValuesBuffer, 0); + BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong( + bitWidths[currentMiniBlock]); + for (int j = 0; j < miniBlockSizeInValues; j += 8) { + ByteBuffer buffer = in.slice(packer.getBitWidth()); + if (buffer.hasArray()) { + packer.unpack8Values(buffer.array(), + buffer.arrayOffset() + buffer.position(), unpackedValuesBuffer, j); + } else { + packer.unpack8Values(buffer, buffer.position(), unpackedValuesBuffer, j); + } + } + remainingInMiniBlock = miniBlockSizeInValues; + currentMiniBlock++; + } + + // From org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader + private void readBitWidthsForMiniBlocks() { + for (int i = 0; i < miniBlockNumInABlock; i++) { + try { + bitWidths[i] = BytesUtils.readIntLittleEndianOnOneByte(in); + } catch (IOException e) { + throw new ParquetDecodingException("Can not decode bitwidth in block header", e); + } + } + } + + /** + * A functional interface to write long values to into a ByteBuffer + */ + @FunctionalInterface + interface IntegerOutputWriter { + /** + * A functional interface that writes a long value to a specified row in a ByteBuffer, + * which will be written into a FieldVector + * + * @param buffer a ByteBuffer to write the value into + * @param val value to write + */ + void write(ByteBuffer buffer, long val); } } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java index ac98da17f2ab..764b2fc353e3 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java @@ -25,11 +25,6 @@ class VectorizedPlainValuesReader extends ValuesAsBytesReader implements VectorizedValuesReader { - public static final int INT_SIZE = 4; - public static final int LONG_SIZE = 8; - public static final int FLOAT_SIZE = 4; - public static final int DOUBLE_SIZE = 8; - VectorizedPlainValuesReader() {} @Override diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index f8b7c92f5999..8eb6431a2c85 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -31,6 +31,11 @@ */ interface VectorizedValuesReader { + public static final int INT_SIZE = 4; + public static final int LONG_SIZE = 8; + public static final int FLOAT_SIZE = 4; + public static final int DOUBLE_SIZE = 8; + /** Read a single boolean */ boolean readBoolean(); diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java b/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java index 71e10247af37..ca876d16df29 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java @@ -56,12 +56,12 @@ public ByteBuffer getBuffer(int length) { } @Override - public final int readInteger() { + public int readInteger() { return getBuffer(4).getInt(); } @Override - public final long readLong() { + public long readLong() { return getBuffer(8).getLong(); } From 98d1c5c5720bf160021706d074320d03d21951b6 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 23 Jun 2025 14:59:02 -0700 Subject: [PATCH 09/47] clean up --- .../VectorizedDeltaEncodedValuesReader.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index dc10b251f4aa..3b41a1e6188b 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -71,9 +71,7 @@ public class VectorizedDeltaEncodedValuesReader private ByteBufferInputStream in; - // temporary buffers used by readByte, readShort, readInteger, and readLong - private byte byteVal; - private short shortVal; + // temporary buffers used by readInteger and readLong private int intVal; private long longVal; @@ -99,22 +97,24 @@ public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOExce @Override public byte readByte() { - return 0; + throw new UnsupportedOperationException("readByte is not supported"); } @Override public short readShort() { - return 0; + throw new UnsupportedOperationException("readShort is not supported"); } @Override public int readInteger() { - return -1; + readValues(1, null, 0, INT_SIZE, (b, v) -> intVal = (int) v); + return intVal; } @Override public long readLong() { - return -1; + readValues(1, null, 0, LONG_SIZE, (b, v) -> longVal = (int) v); + return longVal; } @Override From b72e3386607598f8eab0908f69543a961490d7bb Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 23 Jun 2025 15:28:06 -0700 Subject: [PATCH 10/47] wire up --- .../arrow/vectorized/parquet/VectorizedPageIterator.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index 99cf2dc45864..ca39f4011513 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -95,6 +95,8 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i } else { if (dataEncoding == Encoding.PLAIN) { valuesReader = new VectorizedPlainValuesReader(); + } else if (dataEncoding == Encoding.DELTA_BINARY_PACKED) { + valuesReader = new VectorizedDeltaEncodedValuesReader(); } else { throw new UnsupportedOperationException( "Cannot support vectorized reads for column " From b76cc47e832c4085b5defb9db6757b3a134ed0ac Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Wed, 25 Jun 2025 15:22:19 -0700 Subject: [PATCH 11/47] tweak header --- .../VectorizedDeltaEncodedValuesReader.java | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index 3b41a1e6188b..b94b2fc797c4 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.arrow.vectorized.parquet; import java.io.IOException; From ec077750579ebeed715ae9e528ff2b7118e45309 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Wed, 25 Jun 2025 16:59:04 -0700 Subject: [PATCH 12/47] check in --- .../VectorizedDeltaEncodedValuesReader.java | 435 +++++++++--------- .../parquet/VectorizedPageIterator.java | 27 +- 2 files changed, 228 insertions(+), 234 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index b94b2fc797c4..4232dc8796d9 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -29,267 +29,258 @@ import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.column.values.bitpacking.BytePackerForLong; import org.apache.parquet.column.values.bitpacking.Packer; -import org.apache.parquet.column.values.plain.PlainValuesReader; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.api.Binary; /** - * A {@link VectorizedValuesReader} implementation for the encoding type DELTA_BINARY_PACKED. - * This is adapted from Spark's VectorizedDeltaBinaryPackedReader. + * A {@link VectorizedValuesReader} implementation for the encoding type DELTA_BINARY_PACKED. This + * is adapted from Spark's VectorizedDeltaBinaryPackedReader. * - * @see - * Parquet format encodings: DELTA_BINARY_PACKED + * @see + * Parquet format encodings: DELTA_BINARY_PACKED */ -public class VectorizedDeltaEncodedValuesReader - extends ValuesReader implements VectorizedValuesReader { +public class VectorizedDeltaEncodedValuesReader extends ValuesReader + implements VectorizedValuesReader { - // header data - private int blockSizeInValues; - private int miniBlockNumInABlock; - private int totalValueCount; - private long firstValue; + // header data + private int blockSizeInValues; + private int miniBlockNumInABlock; + private int totalValueCount; + private long firstValue; - private int miniBlockSizeInValues; + private int miniBlockSizeInValues; - // values read by the caller - private int valuesRead = 0; + // values read by the caller + private int valuesRead = 0; - // variables to keep state of the current block and miniblock - private long lastValueRead; // needed to compute the next value - private long minDeltaInCurrentBlock; // needed to compute the next value - // currentMiniBlock keeps track of the mini block within the current block that - // we read and decoded most recently. Only used as an index into - // bitWidths array - private int currentMiniBlock = 0; - private int[] bitWidths; // bit widths for each miniBlock in the current block - private int remainingInBlock = 0; // values in current block still to be read - private int remainingInMiniBlock = 0; // values in current mini block still to be read - private long[] unpackedValuesBuffer; + // variables to keep state of the current block and miniblock + private long lastValueRead; // needed to compute the next value + private long minDeltaInCurrentBlock; // needed to compute the next value + // currentMiniBlock keeps track of the mini block within the current block that + // we read and decoded most recently. Only used as an index into + // bitWidths array + private int currentMiniBlock = 0; + private int[] bitWidths; // bit widths for each miniBlock in the current block + private int remainingInBlock = 0; // values in current block still to be read + private int remainingInMiniBlock = 0; // values in current mini block still to be read + private long[] unpackedValuesBuffer; - private ByteBufferInputStream in; + private ByteBufferInputStream in; - // temporary buffers used by readInteger and readLong - private int intVal; - private long longVal; + // temporary buffers used by readInteger and readLong + private int intVal; + private long longVal; - @Override - public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { - Preconditions.checkArgument(valueCount >= 1, - "Page must have at least one value, but it has " + valueCount); - this.in = in; - // Read the header - this.blockSizeInValues = BytesUtils.readUnsignedVarInt(in); - this.miniBlockNumInABlock = BytesUtils.readUnsignedVarInt(in); - double miniSize = (double) blockSizeInValues / miniBlockNumInABlock; - Preconditions.checkArgument(miniSize % 8 == 0, - "miniBlockSize must be multiple of 8, but it's " + miniSize); - this.miniBlockSizeInValues = (int) miniSize; - // True value count. May be less than valueCount because of nulls - this.totalValueCount = BytesUtils.readUnsignedVarInt(in); - this.bitWidths = new int[miniBlockNumInABlock]; - this.unpackedValuesBuffer = new long[miniBlockSizeInValues]; - // read the first value - firstValue = BytesUtils.readZigZagVarLong(in); - } + @Override + public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { + Preconditions.checkArgument( + valueCount >= 1, "Page must have at least one value, but it has " + valueCount); + this.in = in; + // Read the header + this.blockSizeInValues = BytesUtils.readUnsignedVarInt(in); + this.miniBlockNumInABlock = BytesUtils.readUnsignedVarInt(in); + double miniSize = (double) blockSizeInValues / miniBlockNumInABlock; + Preconditions.checkArgument( + miniSize % 8 == 0, "miniBlockSize must be multiple of 8, but it's " + miniSize); + this.miniBlockSizeInValues = (int) miniSize; + // True value count. May be less than valueCount because of nulls + this.totalValueCount = BytesUtils.readUnsignedVarInt(in); + this.bitWidths = new int[miniBlockNumInABlock]; + this.unpackedValuesBuffer = new long[miniBlockSizeInValues]; + // read the first value + firstValue = BytesUtils.readZigZagVarLong(in); + } - @Override - public byte readByte() { - throw new UnsupportedOperationException("readByte is not supported"); - } + @Override + public byte readByte() { + throw new UnsupportedOperationException("readByte is not supported"); + } - @Override - public short readShort() { - throw new UnsupportedOperationException("readShort is not supported"); - } + @Override + public short readShort() { + throw new UnsupportedOperationException("readShort is not supported"); + } - @Override - public int readInteger() { - readValues(1, null, 0, INT_SIZE, (b, v) -> intVal = (int) v); - return intVal; - } + @Override + public int readInteger() { + readValues(1, null, 0, INT_SIZE, (b, v) -> intVal = (int) v); + return intVal; + } - @Override - public long readLong() { - readValues(1, null, 0, LONG_SIZE, (b, v) -> longVal = (int) v); - return longVal; - } + @Override + public long readLong() { + readValues(1, null, 0, LONG_SIZE, (b, v) -> longVal = (int) v); + return longVal; + } - @Override - public void skip() { - throw new UnsupportedOperationException("skip is not supported"); - } + @Override + public void skip() { + throw new UnsupportedOperationException("skip is not supported"); + } - @Override - public Binary readBinary(int len) { - throw new UnsupportedOperationException("readBinary is not supported"); - } + @Override + public Binary readBinary(int len) { + throw new UnsupportedOperationException("readBinary is not supported"); + } - @Override - public void readIntegers(int total, FieldVector vec, int rowId) { - readValues( - total, - vec, - rowId, - INT_SIZE, - (b, v) -> b.putInt((int) v)); - } + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, INT_SIZE, (b, v) -> b.putInt((int) v)); + } - @Override - public void readLongs(int total, FieldVector vec, int rowId) { - readValues( - total, - vec, - rowId, - LONG_SIZE, - ByteBuffer::putLong); - } + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, LONG_SIZE, ByteBuffer::putLong); + } - @Override - public void readFloats(int total, FieldVector vec, int rowId) { - throw new UnsupportedOperationException("readFloats is not supported"); - } + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readFloats is not supported"); + } - @Override - public void readDoubles(int total, FieldVector vec, int rowId) { - throw new UnsupportedOperationException("readDoubles is not supported"); - } + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readDoubles is not supported"); + } - private void readValues( - int total, FieldVector vec, int rowId, int typeWidth, IntegerOutputWriter outputWriter) { - if (valuesRead + total > totalValueCount) { - throw new ParquetDecodingException( - "No more values to read. Total values read: " + valuesRead + ", total count: " - + totalValueCount + ", trying to read " + total + " more."); - } - int remaining = total; - // First value - if (valuesRead == 0) { - ByteBuffer firstValueBuffer = getBuffer(typeWidth); - outputWriter.write(firstValueBuffer, firstValue); - vec.getDataBuffer().setBytes((long) rowId * typeWidth, firstValueBuffer); - lastValueRead = firstValue; - rowId++; - remaining--; - } - while (remaining > 0) { - int n; - try { - n = loadMiniBlockToOutput(remaining, vec, rowId, typeWidth, outputWriter); - } catch (IOException e) { - throw new ParquetDecodingException("Error reading mini block.", e); - } - rowId += n; - remaining -= n; - } - valuesRead = total - remaining; + private void readValues( + int total, FieldVector vec, int rowId, int typeWidth, IntegerOutputWriter outputWriter) { + if (valuesRead + total > totalValueCount) { + throw new ParquetDecodingException( + "No more values to read. Total values read: " + + valuesRead + + ", total count: " + + totalValueCount + + ", trying to read " + + total + + " more."); } + int remaining = total; + // First value + if (valuesRead == 0) { + ByteBuffer firstValueBuffer = getBuffer(typeWidth); + outputWriter.write(firstValueBuffer, firstValue); + vec.getDataBuffer().setBytes((long) rowId * typeWidth, firstValueBuffer); + lastValueRead = firstValue; + rowId++; + remaining--; + } + while (remaining > 0) { + int n; + try { + n = loadMiniBlockToOutput(remaining, vec, rowId, typeWidth, outputWriter); + } catch (IOException e) { + throw new ParquetDecodingException("Error reading mini block.", e); + } + rowId += n; + remaining -= n; + } + valuesRead = total - remaining; + } - /** - * Read from a mini block. Read at most 'remaining' values into output. - * - * @return the number of values read into output - */ - private int loadMiniBlockToOutput( - int remaining, - FieldVector vec, - int rowId, - int typeWidth, - IntegerOutputWriter outputWriter) throws IOException { - - // new block; read the block header - if (remainingInBlock == 0) { - readBlockHeader(); - } + /** + * Read from a mini block. Read at most 'remaining' values into output. + * + * @return the number of values read into output + */ + private int loadMiniBlockToOutput( + int remaining, FieldVector vec, int rowId, int typeWidth, IntegerOutputWriter outputWriter) + throws IOException { - // new miniblock, unpack the miniblock - if (remainingInMiniBlock == 0) { - unpackMiniBlock(); - } + // new block; read the block header + if (remainingInBlock == 0) { + readBlockHeader(); + } - // read values from miniblock - ByteBuffer buffer = getBuffer(remainingInMiniBlock * typeWidth); - int valuesRead = 0; - for (int i = miniBlockSizeInValues - remainingInMiniBlock; - i < miniBlockSizeInValues && valuesRead < remaining; i++) { - // calculate values from deltas unpacked for current block - long outValue = lastValueRead + minDeltaInCurrentBlock + unpackedValuesBuffer[i]; - lastValueRead = outValue; - outputWriter.write(buffer, outValue); - remainingInBlock--; - remainingInMiniBlock--; - valuesRead++; - } - vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); + // new miniblock, unpack the miniblock + if (remainingInMiniBlock == 0) { + unpackMiniBlock(); + } - return valuesRead; + // read values from miniblock + ByteBuffer buffer = getBuffer(remainingInMiniBlock * typeWidth); + int valuesRead = 0; + for (int i = miniBlockSizeInValues - remainingInMiniBlock; + i < miniBlockSizeInValues && valuesRead < remaining; + i++) { + // calculate values from deltas unpacked for current block + long outValue = lastValueRead + minDeltaInCurrentBlock + unpackedValuesBuffer[i]; + lastValueRead = outValue; + outputWriter.write(buffer, outValue); + remainingInBlock--; + remainingInMiniBlock--; + valuesRead++; } + vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); + + return valuesRead; + } - private void readBlockHeader() { - try { - minDeltaInCurrentBlock = BytesUtils.readZigZagVarLong(in); - } catch (IOException e) { - throw new ParquetDecodingException("Can not read min delta in current block", e); - } - readBitWidthsForMiniBlocks(); - remainingInBlock = blockSizeInValues; - currentMiniBlock = 0; - remainingInMiniBlock = 0; + private void readBlockHeader() { + try { + minDeltaInCurrentBlock = BytesUtils.readZigZagVarLong(in); + } catch (IOException e) { + throw new ParquetDecodingException("Can not read min delta in current block", e); } + readBitWidthsForMiniBlocks(); + remainingInBlock = blockSizeInValues; + currentMiniBlock = 0; + remainingInMiniBlock = 0; + } - private ByteBuffer getBuffer(int length) { - try { - return this.in.slice(length).order(ByteOrder.LITTLE_ENDIAN); - } catch (IOException e) { - throw new ParquetDecodingException("Failed to read " + length + " bytes", e); - } + private ByteBuffer getBuffer(int length) { + try { + return this.in.slice(length).order(ByteOrder.LITTLE_ENDIAN); + } catch (IOException e) { + throw new ParquetDecodingException("Failed to read " + length + " bytes", e); } + } - /** - * mini block has a size of 8*n, unpack 32 value each time - * - * see org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader#unpackMiniBlock - */ - private void unpackMiniBlock() throws IOException { - Arrays.fill(this.unpackedValuesBuffer, 0); - BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong( - bitWidths[currentMiniBlock]); - for (int j = 0; j < miniBlockSizeInValues; j += 8) { - ByteBuffer buffer = in.slice(packer.getBitWidth()); - if (buffer.hasArray()) { - packer.unpack8Values(buffer.array(), - buffer.arrayOffset() + buffer.position(), unpackedValuesBuffer, j); - } else { - packer.unpack8Values(buffer, buffer.position(), unpackedValuesBuffer, j); - } - } - remainingInMiniBlock = miniBlockSizeInValues; - currentMiniBlock++; + /** + * mini block has a size of 8*n, unpack 32 value each time + * + *

see org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader#unpackMiniBlock + */ + private void unpackMiniBlock() throws IOException { + Arrays.fill(this.unpackedValuesBuffer, 0); + BytePackerForLong packer = + Packer.LITTLE_ENDIAN.newBytePackerForLong(bitWidths[currentMiniBlock]); + for (int j = 0; j < miniBlockSizeInValues; j += 8) { + ByteBuffer buffer = in.slice(packer.getBitWidth()); + if (buffer.hasArray()) { + packer.unpack8Values( + buffer.array(), buffer.arrayOffset() + buffer.position(), unpackedValuesBuffer, j); + } else { + packer.unpack8Values(buffer, buffer.position(), unpackedValuesBuffer, j); + } } + remainingInMiniBlock = miniBlockSizeInValues; + currentMiniBlock++; + } - // From org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader - private void readBitWidthsForMiniBlocks() { - for (int i = 0; i < miniBlockNumInABlock; i++) { - try { - bitWidths[i] = BytesUtils.readIntLittleEndianOnOneByte(in); - } catch (IOException e) { - throw new ParquetDecodingException("Can not decode bitwidth in block header", e); - } - } + // From org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader + private void readBitWidthsForMiniBlocks() { + for (int i = 0; i < miniBlockNumInABlock; i++) { + try { + bitWidths[i] = BytesUtils.readIntLittleEndianOnOneByte(in); + } catch (IOException e) { + throw new ParquetDecodingException("Can not decode bitwidth in block header", e); + } } + } + + /** A functional interface to write long values to into a ByteBuffer */ + @FunctionalInterface + interface IntegerOutputWriter { /** - * A functional interface to write long values to into a ByteBuffer + * A functional interface that writes a long value to a specified row in a ByteBuffer, which + * will be written into a FieldVector + * + * @param buffer a ByteBuffer to write the value into + * @param val value to write */ - @FunctionalInterface - interface IntegerOutputWriter { - - /** - * A functional interface that writes a long value to a specified row in a ByteBuffer, - * which will be written into a FieldVector - * - * @param buffer a ByteBuffer to write the value into - * @param val value to write - */ - void write(ByteBuffer buffer, long val); - } + void write(ByteBuffer buffer, long val); + } } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index ca39f4011513..be1a3324ae43 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -93,18 +93,21 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i throw new ParquetDecodingException("could not read page in col " + desc, e); } } else { - if (dataEncoding == Encoding.PLAIN) { - valuesReader = new VectorizedPlainValuesReader(); - } else if (dataEncoding == Encoding.DELTA_BINARY_PACKED) { - valuesReader = new VectorizedDeltaEncodedValuesReader(); - } else { - throw new UnsupportedOperationException( - "Cannot support vectorized reads for column " - + desc - + " with " - + "encoding " - + dataEncoding - + ". Disable vectorized reads to read this table/file"); + switch (dataEncoding) { + case PLAIN: + valuesReader = new VectorizedPlainValuesReader(); + break; + case DELTA_BINARY_PACKED: + valuesReader = new VectorizedDeltaEncodedValuesReader(); + break; + default: + throw new UnsupportedOperationException( + "Cannot support vectorized reads for column " + + desc + + " with " + + "encoding " + + dataEncoding + + ". Disable vectorized reads to read this table/file"); } try { valuesReader.initFromPage(valueCount, in); From 1969466d696f963eb18a6513a515244879f0dff9 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Thu, 26 Jun 2025 22:25:27 -0700 Subject: [PATCH 13/47] debugging --- .../vectorized/parquet/TestParquetVectorizedReads.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index ff9d624ae68f..094c9d6a9053 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -25,6 +25,7 @@ import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.util.Iterator; import org.apache.iceberg.Files; @@ -50,6 +51,7 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.Type; +import org.apache.spark.sql.catalyst.plans.logical.Except; import org.apache.spark.sql.vectorized.ColumnarBatch; import org.junit.jupiter.api.Test; @@ -298,9 +300,11 @@ public void testSupportedReadsForParquetV2() throws Exception { // (i.e. decimals > 8 bytes) Schema schema = new Schema( - optional(102, "float_data", Types.FloatType.get()), - optional(103, "double_data", Types.DoubleType.get()), - optional(104, "decimal_data", Types.DecimalType.of(25, 5))); +// optional(102, "float_data", Types.FloatType.get()), +// optional(103, "double_data", Types.DoubleType.get()), +// optional(104, "decimal_data", Types.DecimalType.of(25, 5)), + optional(105, "int_data", Types.IntegerType.get()), + optional(106, "long_data", Types.LongType.get())); File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); From d2b173b468687302aa21817ac830da5f88892a54 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Thu, 26 Jun 2025 22:25:41 -0700 Subject: [PATCH 14/47] debugging --- .../VectorizedDeltaEncodedValuesReader.java | 8 +++- ...ectorizedParquetDefinitionLevelReader.java | 37 +++++++++++-------- .../parquet/VectorizedValuesReader.java | 8 ++-- .../apache/iceberg/parquet/PageIterator.java | 7 ---- 4 files changed, 31 insertions(+), 29 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index 4232dc8796d9..5022b8f82147 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -161,7 +161,9 @@ private void readValues( if (valuesRead == 0) { ByteBuffer firstValueBuffer = getBuffer(typeWidth); outputWriter.write(firstValueBuffer, firstValue); - vec.getDataBuffer().setBytes((long) rowId * typeWidth, firstValueBuffer); + if (vec != null) { + vec.getDataBuffer().setBytes((long) rowId * typeWidth, firstValueBuffer); + } lastValueRead = firstValue; rowId++; remaining--; @@ -212,7 +214,9 @@ private int loadMiniBlockToOutput( remainingInMiniBlock--; valuesRead++; } - vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); + if (vec != null) { + vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); + } return valuesRead; } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index 0f85101a5b79..3a8875c58b07 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -84,22 +84,27 @@ public void nextBatch( final int numValsToRead, NullabilityHolder nullabilityHolder, VectorizedValuesReader valuesReader) { - nextBatch( - vector, - startOffset, - typeWidth, - numValsToRead, - (mode, idx, numValues, byteArray, validityBuffer) -> { - switch (mode) { - case RLE: - nextRleBatch( - vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); - break; - case PACKED: - nextPackedBatch( - vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); - } - }); + if (valuesReader instanceof VectorizedPlainValuesReader) { + nextBatch( + vector, + startOffset, + typeWidth, + numValsToRead, + (mode, idx, numValues, byteArray, validityBuffer) -> { + switch (mode) { + case RLE: + nextRleBatch( + vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); + break; + case PACKED: + nextPackedBatch( + vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); + } + }); + } else { + // TODO actually call the appropriate methods + valuesReader.readIntegers(numValsToRead, vector, startOffset); + } } public void nextDictEncodedBatch( diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index 8eb6431a2c85..7c23149b18ab 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -31,10 +31,10 @@ */ interface VectorizedValuesReader { - public static final int INT_SIZE = 4; - public static final int LONG_SIZE = 8; - public static final int FLOAT_SIZE = 4; - public static final int DOUBLE_SIZE = 8; + int INT_SIZE = 4; + int LONG_SIZE = 8; + int FLOAT_SIZE = 4; + int DOUBLE_SIZE = 8; /** Read a single boolean */ boolean readBoolean(); diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java b/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java index bff13603002f..a68d2f9b82e7 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java @@ -257,13 +257,6 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i } else { this.values = dataEncoding.getValuesReader(desc, ValuesType.VALUES); } - - // if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) { - // bindToDictionary(dictionary); - // } else { - // bind(path.getType()); - // } - try { values.initFromPage(valueCount, in); } catch (IOException e) { From 1f219e589a608a17024a77abdb8ee7c231b95775 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 13:02:36 -0700 Subject: [PATCH 15/47] debugging commit --- .../VectorizedDeltaEncodedValuesReader.java | 48 ++++------- ...ectorizedParquetDefinitionLevelReader.java | 85 ++++++++++++++----- .../iceberg/data/RandomGenericData.java | 5 ++ .../parquet/TestParquetVectorizedReads.java | 12 ++- 4 files changed, 93 insertions(+), 57 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index 5022b8f82147..4ff29141313a 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.ByteOrder; import java.util.Arrays; import org.apache.arrow.vector.FieldVector; import org.apache.parquet.Preconditions; @@ -78,18 +77,18 @@ public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOExce valueCount >= 1, "Page must have at least one value, but it has " + valueCount); this.in = in; // Read the header - this.blockSizeInValues = BytesUtils.readUnsignedVarInt(in); - this.miniBlockNumInABlock = BytesUtils.readUnsignedVarInt(in); + this.blockSizeInValues = BytesUtils.readUnsignedVarInt(this.in); + this.miniBlockNumInABlock = BytesUtils.readUnsignedVarInt(this.in); double miniSize = (double) blockSizeInValues / miniBlockNumInABlock; Preconditions.checkArgument( miniSize % 8 == 0, "miniBlockSize must be multiple of 8, but it's " + miniSize); this.miniBlockSizeInValues = (int) miniSize; // True value count. May be less than valueCount because of nulls - this.totalValueCount = BytesUtils.readUnsignedVarInt(in); + this.totalValueCount = BytesUtils.readUnsignedVarInt(this.in); this.bitWidths = new int[miniBlockNumInABlock]; this.unpackedValuesBuffer = new long[miniBlockSizeInValues]; // read the first value - firstValue = BytesUtils.readZigZagVarLong(in); + firstValue = BytesUtils.readZigZagVarLong(this.in); } @Override @@ -104,13 +103,13 @@ public short readShort() { @Override public int readInteger() { - readValues(1, null, 0, INT_SIZE, (b, v) -> intVal = (int) v); + readValues(1, null, 0, INT_SIZE, (f, i, v) -> intVal = (int) v); return intVal; } @Override public long readLong() { - readValues(1, null, 0, LONG_SIZE, (b, v) -> longVal = (int) v); + readValues(1, null, 0, LONG_SIZE, (f, i, v) -> longVal = v); return longVal; } @@ -126,12 +125,12 @@ public Binary readBinary(int len) { @Override public void readIntegers(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, INT_SIZE, (b, v) -> b.putInt((int) v)); + readValues(total, vec, rowId, INT_SIZE, (f, i, v) -> f.getDataBuffer().setLong(i, v)); } @Override public void readLongs(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, LONG_SIZE, ByteBuffer::putLong); + readValues(total, vec, rowId, LONG_SIZE, (f, i, v) -> f.getDataBuffer().setLong(i, v)); } @Override @@ -159,11 +158,7 @@ private void readValues( int remaining = total; // First value if (valuesRead == 0) { - ByteBuffer firstValueBuffer = getBuffer(typeWidth); - outputWriter.write(firstValueBuffer, firstValue); - if (vec != null) { - vec.getDataBuffer().setBytes((long) rowId * typeWidth, firstValueBuffer); - } + outputWriter.write(vec, (long) rowId * typeWidth, firstValue); lastValueRead = firstValue; rowId++; remaining--; @@ -201,7 +196,6 @@ private int loadMiniBlockToOutput( } // read values from miniblock - ByteBuffer buffer = getBuffer(remainingInMiniBlock * typeWidth); int valuesRead = 0; for (int i = miniBlockSizeInValues - remainingInMiniBlock; i < miniBlockSizeInValues && valuesRead < remaining; @@ -209,14 +203,11 @@ private int loadMiniBlockToOutput( // calculate values from deltas unpacked for current block long outValue = lastValueRead + minDeltaInCurrentBlock + unpackedValuesBuffer[i]; lastValueRead = outValue; - outputWriter.write(buffer, outValue); + outputWriter.write(vec, ((long) (rowId + valuesRead) * typeWidth), outValue); remainingInBlock--; remainingInMiniBlock--; valuesRead++; } - if (vec != null) { - vec.getDataBuffer().setBytes((long) rowId * typeWidth, buffer); - } return valuesRead; } @@ -233,14 +224,6 @@ private void readBlockHeader() { remainingInMiniBlock = 0; } - private ByteBuffer getBuffer(int length) { - try { - return this.in.slice(length).order(ByteOrder.LITTLE_ENDIAN); - } catch (IOException e) { - throw new ParquetDecodingException("Failed to read " + length + " bytes", e); - } - } - /** * mini block has a size of 8*n, unpack 32 value each time * @@ -274,17 +257,18 @@ private void readBitWidthsForMiniBlocks() { } } - /** A functional interface to write long values to into a ByteBuffer */ + /** A functional interface to write long values to into a FieldVector */ @FunctionalInterface interface IntegerOutputWriter { /** - * A functional interface that writes a long value to a specified row in a ByteBuffer, which - * will be written into a FieldVector + * A functional interface that can be used to write a long value to a specified row in a + * FieldVector * - * @param buffer a ByteBuffer to write the value into + * @param vec a FieldVector to write the value into + * @param index The offset to write to * @param val value to write */ - void write(ByteBuffer buffer, long val); + void write(FieldVector vec, long index, long val); } } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index 3a8875c58b07..c04e7e0cb1b5 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -84,27 +84,39 @@ public void nextBatch( final int numValsToRead, NullabilityHolder nullabilityHolder, VectorizedValuesReader valuesReader) { - if (valuesReader instanceof VectorizedPlainValuesReader) { - nextBatch( - vector, - startOffset, - typeWidth, - numValsToRead, - (mode, idx, numValues, byteArray, validityBuffer) -> { - switch (mode) { - case RLE: - nextRleBatch( - vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); - break; - case PACKED: - nextPackedBatch( - vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); - } - }); - } else { - // TODO actually call the appropriate methods - valuesReader.readIntegers(numValsToRead, vector, startOffset); - } + nextBatch( + vector, + startOffset, + typeWidth, + numValsToRead, + (mode, idx, numValues, byteArray, validityBuffer) -> { + if (valuesReader instanceof VectorizedPlainValuesReader) { + switch (mode) { + case RLE: + nextRleBatch( + vector, + typeWidth, + nullabilityHolder, + valuesReader, + idx, + numValues, + byteArray); + break; + case PACKED: + nextPackedBatch( + vector, + typeWidth, + nullabilityHolder, + valuesReader, + idx, + numValues, + byteArray); + } + } else { + nextVectorizedBatch( + vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues); + } + }); } public void nextDictEncodedBatch( @@ -165,6 +177,19 @@ protected abstract void nextPackedBatch( int numValues, byte[] byteArray); + protected void nextVectorizedBatch( + FieldVector vector, + int typeWidth, + NullabilityHolder nullabilityHolder, + VectorizedValuesReader valuesReader, + int idx, + int numValues) { + throw new UnsupportedOperationException( + this.getClass().getName() + + " does not support reader " + + valuesReader.getClass().getName()); + } + protected void nextRleDictEncodedBatch( FieldVector vector, int typeWidth, @@ -284,6 +309,24 @@ protected void nextDictEncodedVal( .setLong((long) idx * typeWidth, dict.decodeToLong(reader.readInteger())); } } + + @Override + protected void nextVectorizedBatch( + FieldVector vector, + int typeWidth, + NullabilityHolder nullabilityHolder, + VectorizedValuesReader valuesReader, + int idx, + int numValues) { + if (currentValue == maxDefLevel) { + valuesReader.readLongs(numValues, vector, idx); + for (int i = 0; i < numValues; i++) { + nullabilityHolder.setNotNull(idx + i); + } + } else { + setNulls(nullabilityHolder, idx + numValues, numValues, vector.getValidityBuffer()); + } + } } class DoubleReader extends NumericBaseReader { diff --git a/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java b/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java index 4963052e0877..eb3795fb8b79 100644 --- a/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java +++ b/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java @@ -175,6 +175,7 @@ public abstract static class RandomDataGenerator private final Random random; private final float nullPercentage; + private int currentInt = 1; protected RandomDataGenerator(long seed) { this(seed, DEFAULT_NULL_PERCENTAGE); @@ -289,6 +290,10 @@ public Object primitive(Type.PrimitiveType primitive) { } else { return EPOCH.plus((long) result, NANOS).toLocalDateTime(); } + case INTEGER: + return currentInt++; + case LONG: + return (long)currentInt++; default: return result; } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 094c9d6a9053..74e0bcecd5c5 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -185,7 +185,11 @@ void assertRecordsMatch( while (batches.hasNext()) { ColumnarBatch batch = batches.next(); numRowsRead += batch.numRows(); - GenericsHelpers.assertEqualsBatch(schema.asStruct(), expectedIter, batch); + if (numRowsRead != batch.numRows()) { + // todo skip the first batch for debugging + GenericsHelpers.assertEqualsBatch(schema.asStruct(), expectedIter, batch); + + } } assertThat(numRowsRead).isEqualTo(expectedSize); } @@ -303,13 +307,13 @@ public void testSupportedReadsForParquetV2() throws Exception { // optional(102, "float_data", Types.FloatType.get()), // optional(103, "double_data", Types.DoubleType.get()), // optional(104, "decimal_data", Types.DecimalType.of(25, 5)), - optional(105, "int_data", Types.IntegerType.get()), +// optional(105, "int_data", Types.IntegerType.get()), optional(106, "long_data", Types.LongType.get())); File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = - generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); + generateData(schema, 30000, 0L, 0, IDENTITY); try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { writer.addAll(data); } @@ -318,7 +322,7 @@ public void testSupportedReadsForParquetV2() throws Exception { @Test public void testUnsupportedReadsForParquetV2() throws Exception { - // Longs, ints, string types etc use delta encoding and which are not supported for vectorized + // Longs, ints, string types etc. use delta encoding and which are not supported for vectorized // reads Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); File dataFile = File.createTempFile("junit", null, temp.toFile()); From 21c11d84b6d070f0616a6f467871eb68e9f4164e Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 13:23:13 -0700 Subject: [PATCH 16/47] move code --- ...ectorizedParquetDefinitionLevelReader.java | 70 +++++-------------- 1 file changed, 18 insertions(+), 52 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index c04e7e0cb1b5..c9e1647d6904 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -30,6 +30,7 @@ import org.apache.iceberg.arrow.vectorized.NullabilityHolder; import org.apache.iceberg.parquet.ParquetUtil; import org.apache.parquet.column.Dictionary; +import org.apache.parquet.column.values.plain.PlainValuesReader; public final class VectorizedParquetDefinitionLevelReader extends BaseVectorizedParquetValuesReader { @@ -90,32 +91,27 @@ public void nextBatch( typeWidth, numValsToRead, (mode, idx, numValues, byteArray, validityBuffer) -> { - if (valuesReader instanceof VectorizedPlainValuesReader) { switch (mode) { case RLE: nextRleBatch( - vector, - typeWidth, - nullabilityHolder, - valuesReader, - idx, - numValues, - byteArray); + vector, + typeWidth, + nullabilityHolder, + valuesReader, + idx, + numValues, + byteArray); break; case PACKED: nextPackedBatch( - vector, - typeWidth, - nullabilityHolder, - valuesReader, - idx, - numValues, - byteArray); + vector, + typeWidth, + nullabilityHolder, + valuesReader, + idx, + numValues, + byteArray); } - } else { - nextVectorizedBatch( - vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues); - } }); } @@ -177,19 +173,6 @@ protected abstract void nextPackedBatch( int numValues, byte[] byteArray); - protected void nextVectorizedBatch( - FieldVector vector, - int typeWidth, - NullabilityHolder nullabilityHolder, - VectorizedValuesReader valuesReader, - int idx, - int numValues) { - throw new UnsupportedOperationException( - this.getClass().getName() - + " does not support reader " - + valuesReader.getClass().getName()); - } - protected void nextRleDictEncodedBatch( FieldVector vector, int typeWidth, @@ -309,24 +292,6 @@ protected void nextDictEncodedVal( .setLong((long) idx * typeWidth, dict.decodeToLong(reader.readInteger())); } } - - @Override - protected void nextVectorizedBatch( - FieldVector vector, - int typeWidth, - NullabilityHolder nullabilityHolder, - VectorizedValuesReader valuesReader, - int idx, - int numValues) { - if (currentValue == maxDefLevel) { - valuesReader.readLongs(numValues, vector, idx); - for (int i = 0; i < numValues; i++) { - nullabilityHolder.setNotNull(idx + i); - } - } else { - setNulls(nullabilityHolder, idx + numValues, numValues, vector.getValidityBuffer()); - } - } } class DoubleReader extends NumericBaseReader { @@ -695,6 +660,7 @@ private void setNulls( } } + @SuppressWarnings({"all"}) private void setNextNValuesInVector( int typeWidth, NullabilityHolder nullabilityHolder, @@ -704,8 +670,8 @@ private void setNextNValuesInVector( int numValues) { ArrowBuf validityBuffer = vector.getValidityBuffer(); if (currentValue == maxDefLevel) { - ByteBuffer buffer = valuesReader.readBinary(numValues * typeWidth).toByteBuffer(); - vector.getDataBuffer().setBytes((long) bufferIdx * typeWidth, buffer); + // TODO read the correct type not just hard-coded longs here + valuesReader.readLongs(numValues, vector, bufferIdx); nullabilityHolder.setNotNulls(bufferIdx, numValues); if (setArrowValidityVector) { for (int i = 0; i < numValues; i++) { From e4bc23fb2035def5d35dab1669e8bd1180f1d589 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 13:37:57 -0700 Subject: [PATCH 17/47] switch back to floats --- .../parquet/VectorizedDeltaEncodedValuesReader.java | 5 ++++- .../parquet/VectorizedParquetDefinitionLevelReader.java | 2 +- .../data/vectorized/parquet/TestParquetVectorizedReads.java | 5 +++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index 4ff29141313a..66db9e9319c7 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -158,7 +158,8 @@ private void readValues( int remaining = total; // First value if (valuesRead == 0) { - outputWriter.write(vec, (long) rowId * typeWidth, firstValue); + System.out.println("#### (F) Wrote value " + firstValue + " to " + ((long) (rowId + valuesRead) * typeWidth)); + outputWriter.write(vec, ((long) (rowId + valuesRead) * typeWidth), firstValue); lastValueRead = firstValue; rowId++; remaining--; @@ -203,6 +204,8 @@ private int loadMiniBlockToOutput( // calculate values from deltas unpacked for current block long outValue = lastValueRead + minDeltaInCurrentBlock + unpackedValuesBuffer[i]; lastValueRead = outValue; + System.out.println("#### (O) Wrote value " + outValue + " to " + ((long) (rowId + valuesRead) * typeWidth) + + " vec IS null == " + (vec == null)); outputWriter.write(vec, ((long) (rowId + valuesRead) * typeWidth), outValue); remainingInBlock--; remainingInMiniBlock--; diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index c9e1647d6904..9edc91ace6be 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -671,7 +671,7 @@ private void setNextNValuesInVector( ArrowBuf validityBuffer = vector.getValidityBuffer(); if (currentValue == maxDefLevel) { // TODO read the correct type not just hard-coded longs here - valuesReader.readLongs(numValues, vector, bufferIdx); + valuesReader.readFloats(numValues, vector, bufferIdx); nullabilityHolder.setNotNulls(bufferIdx, numValues); if (setArrowValidityVector) { for (int i = 0; i < numValues; i++) { diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 74e0bcecd5c5..0889118e1fd9 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -304,11 +304,12 @@ public void testSupportedReadsForParquetV2() throws Exception { // (i.e. decimals > 8 bytes) Schema schema = new Schema( -// optional(102, "float_data", Types.FloatType.get()), + optional(102, "float_data", Types.FloatType.get()) // optional(103, "double_data", Types.DoubleType.get()), // optional(104, "decimal_data", Types.DecimalType.of(25, 5)), // optional(105, "int_data", Types.IntegerType.get()), - optional(106, "long_data", Types.LongType.get())); +// optional(106, "long_data", Types.LongType.get()) + ); File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); From a88af2e8e7500b693816a0aba16ebfc4520979b0 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 13:41:09 -0700 Subject: [PATCH 18/47] clean a bit --- .../org/apache/iceberg/parquet/ValuesAsBytesReader.java | 4 ++-- .../data/vectorized/parquet/TestParquetVectorizedReads.java | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java b/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java index ca876d16df29..71e10247af37 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java @@ -56,12 +56,12 @@ public ByteBuffer getBuffer(int length) { } @Override - public int readInteger() { + public final int readInteger() { return getBuffer(4).getInt(); } @Override - public long readLong() { + public final long readLong() { return getBuffer(8).getLong(); } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 0889118e1fd9..7f55843d706a 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -185,11 +185,7 @@ void assertRecordsMatch( while (batches.hasNext()) { ColumnarBatch batch = batches.next(); numRowsRead += batch.numRows(); - if (numRowsRead != batch.numRows()) { - // todo skip the first batch for debugging - GenericsHelpers.assertEqualsBatch(schema.asStruct(), expectedIter, batch); - - } + GenericsHelpers.assertEqualsBatch(schema.asStruct(), expectedIter, batch); } assertThat(numRowsRead).isEqualTo(expectedSize); } From c375e99d0e35b1255d179f51e4449c60a478791c Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 13:54:18 -0700 Subject: [PATCH 19/47] semistable --- ...ectorizedParquetDefinitionLevelReader.java | 33 ++++++++++++++++--- .../iceberg/data/RandomGenericData.java | 5 --- .../parquet/TestParquetVectorizedReads.java | 13 ++++---- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index 9edc91ace6be..ef6f9c4ba2a0 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -30,6 +30,7 @@ import org.apache.iceberg.arrow.vectorized.NullabilityHolder; import org.apache.iceberg.parquet.ParquetUtil; import org.apache.parquet.column.Dictionary; +import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.column.values.plain.PlainValuesReader; public final class VectorizedParquetDefinitionLevelReader @@ -236,7 +237,7 @@ protected void nextRleBatch( int idx, int numValues, byte[] byteArray) { - setNextNValuesInVector(typeWidth, nullabilityHolder, valuesReader, idx, vector, numValues); + setNextNValuesInVector(nullabilityHolder, valuesReader, idx, vector, numValues, this); } @Override @@ -265,6 +266,9 @@ protected void nextPackedBatch( protected abstract void nextVal( FieldVector vector, int idx, VectorizedValuesReader valuesReader, Mode mode); + + public abstract void nextVals( + FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total); } class LongReader extends NumericBaseReader { @@ -292,6 +296,11 @@ protected void nextDictEncodedVal( .setLong((long) idx * typeWidth, dict.decodeToLong(reader.readInteger())); } } + + @Override + public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { + valuesReader.readLongs(total, vector, rowId); + } } class DoubleReader extends NumericBaseReader { @@ -319,6 +328,11 @@ protected void nextDictEncodedVal( .setDouble((long) idx * typeWidth, dict.decodeToDouble(reader.readInteger())); } } + + @Override + public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { + valuesReader.readDoubles(total, vector, rowId); + } } class FloatReader extends NumericBaseReader { @@ -346,6 +360,11 @@ protected void nextDictEncodedVal( .setFloat((long) idx * typeWidth, dict.decodeToFloat(reader.readInteger())); } } + + @Override + public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { + valuesReader.readFloats(total, vector, rowId); + } } class IntegerReader extends NumericBaseReader { @@ -375,6 +394,11 @@ protected void nextDictEncodedVal( .setInt((long) idx * typeWidth, dict.decodeToInt(reader.readInteger())); } } + + @Override + public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { + valuesReader.readIntegers(total, vector, rowId); + } } abstract class BaseReader extends CommonReader { @@ -662,16 +686,15 @@ private void setNulls( @SuppressWarnings({"all"}) private void setNextNValuesInVector( - int typeWidth, NullabilityHolder nullabilityHolder, VectorizedValuesReader valuesReader, int bufferIdx, FieldVector vector, - int numValues) { + int numValues, + NumericBaseReader reader) { ArrowBuf validityBuffer = vector.getValidityBuffer(); if (currentValue == maxDefLevel) { - // TODO read the correct type not just hard-coded longs here - valuesReader.readFloats(numValues, vector, bufferIdx); + reader.nextVals(vector, bufferIdx, valuesReader, numValues); nullabilityHolder.setNotNulls(bufferIdx, numValues); if (setArrowValidityVector) { for (int i = 0; i < numValues; i++) { diff --git a/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java b/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java index eb3795fb8b79..4963052e0877 100644 --- a/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java +++ b/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java @@ -175,7 +175,6 @@ public abstract static class RandomDataGenerator private final Random random; private final float nullPercentage; - private int currentInt = 1; protected RandomDataGenerator(long seed) { this(seed, DEFAULT_NULL_PERCENTAGE); @@ -290,10 +289,6 @@ public Object primitive(Type.PrimitiveType primitive) { } else { return EPOCH.plus((long) result, NANOS).toLocalDateTime(); } - case INTEGER: - return currentInt++; - case LONG: - return (long)currentInt++; default: return result; } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 7f55843d706a..054d5c0c6b4a 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -300,11 +300,11 @@ public void testSupportedReadsForParquetV2() throws Exception { // (i.e. decimals > 8 bytes) Schema schema = new Schema( - optional(102, "float_data", Types.FloatType.get()) -// optional(103, "double_data", Types.DoubleType.get()), -// optional(104, "decimal_data", Types.DecimalType.of(25, 5)), -// optional(105, "int_data", Types.IntegerType.get()), -// optional(106, "long_data", Types.LongType.get()) + optional(102, "float_data", Types.FloatType.get()), + optional(103, "double_data", Types.DoubleType.get()), + optional(104, "decimal_data", Types.DecimalType.of(25, 5)), + optional(105, "int_data", Types.IntegerType.get()), + optional(106, "long_data", Types.LongType.get()) ); File dataFile = File.createTempFile("junit", null, temp.toFile()); @@ -319,8 +319,7 @@ public void testSupportedReadsForParquetV2() throws Exception { @Test public void testUnsupportedReadsForParquetV2() throws Exception { - // Longs, ints, string types etc. use delta encoding and which are not supported for vectorized - // reads + // Some types use delta encoding and which are not supported for vectorized reads Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); From f8cfbb28f0a037c22628c10fc9dc16ec13976e4e Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 13:56:04 -0700 Subject: [PATCH 20/47] polish --- ...ectorizedParquetDefinitionLevelReader.java | 31 ++++++------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index ef6f9c4ba2a0..e64a3b7e5a2d 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -92,27 +92,15 @@ public void nextBatch( typeWidth, numValsToRead, (mode, idx, numValues, byteArray, validityBuffer) -> { - switch (mode) { - case RLE: - nextRleBatch( - vector, - typeWidth, - nullabilityHolder, - valuesReader, - idx, - numValues, - byteArray); - break; - case PACKED: - nextPackedBatch( - vector, - typeWidth, - nullabilityHolder, - valuesReader, - idx, - numValues, - byteArray); - } + switch (mode) { + case RLE: + nextRleBatch( + vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); + break; + case PACKED: + nextPackedBatch( + vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); + } }); } @@ -684,7 +672,6 @@ private void setNulls( } } - @SuppressWarnings({"all"}) private void setNextNValuesInVector( NullabilityHolder nullabilityHolder, VectorizedValuesReader valuesReader, From 9d27297cc7736e9877b3907065c3cbda27e5eae8 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 13:57:35 -0700 Subject: [PATCH 21/47] stable: --- .../vectorized/parquet/VectorizedDeltaEncodedValuesReader.java | 3 --- .../data/vectorized/parquet/TestParquetVectorizedReads.java | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index 66db9e9319c7..efe4b72ccb65 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -158,7 +158,6 @@ private void readValues( int remaining = total; // First value if (valuesRead == 0) { - System.out.println("#### (F) Wrote value " + firstValue + " to " + ((long) (rowId + valuesRead) * typeWidth)); outputWriter.write(vec, ((long) (rowId + valuesRead) * typeWidth), firstValue); lastValueRead = firstValue; rowId++; @@ -204,8 +203,6 @@ private int loadMiniBlockToOutput( // calculate values from deltas unpacked for current block long outValue = lastValueRead + minDeltaInCurrentBlock + unpackedValuesBuffer[i]; lastValueRead = outValue; - System.out.println("#### (O) Wrote value " + outValue + " to " + ((long) (rowId + valuesRead) * typeWidth) + - " vec IS null == " + (vec == null)); outputWriter.write(vec, ((long) (rowId + valuesRead) * typeWidth), outValue); remainingInBlock--; remainingInMiniBlock--; diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 054d5c0c6b4a..f53a1ba1fc1f 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -310,7 +310,7 @@ public void testSupportedReadsForParquetV2() throws Exception { File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = - generateData(schema, 30000, 0L, 0, IDENTITY); + generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { writer.addAll(data); } From d75f85e1a532fd8bbada723ad4aa0e39ef415b63 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 13:59:46 -0700 Subject: [PATCH 22/47] spotless; polish --- ...ectorizedParquetDefinitionLevelReader.java | 20 ++++++++++--------- .../parquet/TestParquetVectorizedReads.java | 3 +-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index e64a3b7e5a2d..26872c686ec3 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -30,8 +30,6 @@ import org.apache.iceberg.arrow.vectorized.NullabilityHolder; import org.apache.iceberg.parquet.ParquetUtil; import org.apache.parquet.column.Dictionary; -import org.apache.parquet.column.values.ValuesReader; -import org.apache.parquet.column.values.plain.PlainValuesReader; public final class VectorizedParquetDefinitionLevelReader extends BaseVectorizedParquetValuesReader { @@ -95,11 +93,11 @@ public void nextBatch( switch (mode) { case RLE: nextRleBatch( - vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); + vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); break; case PACKED: nextPackedBatch( - vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); + vector, typeWidth, nullabilityHolder, valuesReader, idx, numValues, byteArray); } }); } @@ -256,7 +254,7 @@ protected abstract void nextVal( FieldVector vector, int idx, VectorizedValuesReader valuesReader, Mode mode); public abstract void nextVals( - FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total); + FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total); } class LongReader extends NumericBaseReader { @@ -286,7 +284,8 @@ protected void nextDictEncodedVal( } @Override - public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { + public void nextVals( + FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { valuesReader.readLongs(total, vector, rowId); } } @@ -318,7 +317,8 @@ protected void nextDictEncodedVal( } @Override - public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { + public void nextVals( + FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { valuesReader.readDoubles(total, vector, rowId); } } @@ -350,7 +350,8 @@ protected void nextDictEncodedVal( } @Override - public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { + public void nextVals( + FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { valuesReader.readFloats(total, vector, rowId); } } @@ -384,7 +385,8 @@ protected void nextDictEncodedVal( } @Override - public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { + public void nextVals( + FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) { valuesReader.readIntegers(total, vector, rowId); } } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index f53a1ba1fc1f..b9fba3813123 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -25,7 +25,6 @@ import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.util.Iterator; import org.apache.iceberg.Files; @@ -297,7 +296,7 @@ public void testReadsForTypePromotedColumns() throws Exception { public void testSupportedReadsForParquetV2() throws Exception { // Float and double column types are written using plain encoding with Parquet V2, // also Parquet V2 will dictionary encode decimals that use fixed length binary - // (i.e. decimals > 8 bytes) + // (i.e. decimals > 8 bytes). Int and long types use DELTA_BINARY_PACKED. Schema schema = new Schema( optional(102, "float_data", Types.FloatType.get()), From 03f63953850b46318a26e6c100380e49bd3eda56 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 16:34:55 -0700 Subject: [PATCH 23/47] spotless --- .../data/vectorized/parquet/TestParquetVectorizedReads.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index b9fba3813123..d7f7f9c68d21 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -50,7 +50,6 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.Type; -import org.apache.spark.sql.catalyst.plans.logical.Except; import org.apache.spark.sql.vectorized.ColumnarBatch; import org.junit.jupiter.api.Test; @@ -303,8 +302,7 @@ public void testSupportedReadsForParquetV2() throws Exception { optional(103, "double_data", Types.DoubleType.get()), optional(104, "decimal_data", Types.DecimalType.of(25, 5)), optional(105, "int_data", Types.IntegerType.get()), - optional(106, "long_data", Types.LongType.get()) - ); + optional(106, "long_data", Types.LongType.get())); File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); From c39570d6e7c44d721005bf4c2132e8c5ce28a65d Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 17:12:27 -0700 Subject: [PATCH 24/47] fix lints --- .../VectorizedDeltaEncodedValuesReader.java | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index efe4b72ccb65..8f7b6f20057e 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -22,7 +22,7 @@ import java.nio.ByteBuffer; import java.util.Arrays; import org.apache.arrow.vector.FieldVector; -import org.apache.parquet.Preconditions; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.bytes.BytesUtils; import org.apache.parquet.column.values.ValuesReader; @@ -65,7 +65,7 @@ public class VectorizedDeltaEncodedValuesReader extends ValuesReader private int remainingInMiniBlock = 0; // values in current mini block still to be read private long[] unpackedValuesBuffer; - private ByteBufferInputStream in; + private ByteBufferInputStream inputStream; // temporary buffers used by readInteger and readLong private int intVal; @@ -75,20 +75,20 @@ public class VectorizedDeltaEncodedValuesReader extends ValuesReader public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { Preconditions.checkArgument( valueCount >= 1, "Page must have at least one value, but it has " + valueCount); - this.in = in; + this.inputStream = in; // Read the header - this.blockSizeInValues = BytesUtils.readUnsignedVarInt(this.in); - this.miniBlockNumInABlock = BytesUtils.readUnsignedVarInt(this.in); + this.blockSizeInValues = BytesUtils.readUnsignedVarInt(this.inputStream); + this.miniBlockNumInABlock = BytesUtils.readUnsignedVarInt(this.inputStream); double miniSize = (double) blockSizeInValues / miniBlockNumInABlock; Preconditions.checkArgument( miniSize % 8 == 0, "miniBlockSize must be multiple of 8, but it's " + miniSize); this.miniBlockSizeInValues = (int) miniSize; // True value count. May be less than valueCount because of nulls - this.totalValueCount = BytesUtils.readUnsignedVarInt(this.in); + this.totalValueCount = BytesUtils.readUnsignedVarInt(this.inputStream); this.bitWidths = new int[miniBlockNumInABlock]; this.unpackedValuesBuffer = new long[miniBlockSizeInValues]; // read the first value - firstValue = BytesUtils.readZigZagVarLong(this.in); + firstValue = BytesUtils.readZigZagVarLong(this.inputStream); } @Override @@ -156,22 +156,23 @@ private void readValues( + " more."); } int remaining = total; + int currentRowId = rowId; // First value if (valuesRead == 0) { - outputWriter.write(vec, ((long) (rowId + valuesRead) * typeWidth), firstValue); + outputWriter.write(vec, ((long) (currentRowId + valuesRead) * typeWidth), firstValue); lastValueRead = firstValue; - rowId++; + currentRowId++; remaining--; } while (remaining > 0) { - int n; + int loadedRows; try { - n = loadMiniBlockToOutput(remaining, vec, rowId, typeWidth, outputWriter); + loadedRows = loadMiniBlockToOutput(remaining, vec, currentRowId, typeWidth, outputWriter); } catch (IOException e) { throw new ParquetDecodingException("Error reading mini block.", e); } - rowId += n; - remaining -= n; + currentRowId += loadedRows; + remaining -= loadedRows; } valuesRead = total - remaining; } @@ -196,25 +197,25 @@ private int loadMiniBlockToOutput( } // read values from miniblock - int valuesRead = 0; + int valuesReadInMiniBlock = 0; for (int i = miniBlockSizeInValues - remainingInMiniBlock; - i < miniBlockSizeInValues && valuesRead < remaining; + i < miniBlockSizeInValues && valuesReadInMiniBlock < remaining; i++) { // calculate values from deltas unpacked for current block long outValue = lastValueRead + minDeltaInCurrentBlock + unpackedValuesBuffer[i]; lastValueRead = outValue; - outputWriter.write(vec, ((long) (rowId + valuesRead) * typeWidth), outValue); + outputWriter.write(vec, ((long) (rowId + valuesReadInMiniBlock) * typeWidth), outValue); remainingInBlock--; remainingInMiniBlock--; - valuesRead++; + valuesReadInMiniBlock++; } - return valuesRead; + return valuesReadInMiniBlock; } private void readBlockHeader() { try { - minDeltaInCurrentBlock = BytesUtils.readZigZagVarLong(in); + minDeltaInCurrentBlock = BytesUtils.readZigZagVarLong(inputStream); } catch (IOException e) { throw new ParquetDecodingException("Can not read min delta in current block", e); } @@ -234,7 +235,7 @@ private void unpackMiniBlock() throws IOException { BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(bitWidths[currentMiniBlock]); for (int j = 0; j < miniBlockSizeInValues; j += 8) { - ByteBuffer buffer = in.slice(packer.getBitWidth()); + ByteBuffer buffer = inputStream.slice(packer.getBitWidth()); if (buffer.hasArray()) { packer.unpack8Values( buffer.array(), buffer.arrayOffset() + buffer.position(), unpackedValuesBuffer, j); @@ -250,7 +251,7 @@ private void unpackMiniBlock() throws IOException { private void readBitWidthsForMiniBlocks() { for (int i = 0; i < miniBlockNumInABlock; i++) { try { - bitWidths[i] = BytesUtils.readIntLittleEndianOnOneByte(in); + bitWidths[i] = BytesUtils.readIntLittleEndianOnOneByte(inputStream); } catch (IOException e) { throw new ParquetDecodingException("Can not decode bitwidth in block header", e); } From 1ac89a9bed20af8b6ceca290e7c8c02e401b0dd0 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 22:58:31 -0700 Subject: [PATCH 25/47] initial impl --- .../VectorizedDeltaEncodedValuesReader.java | 10 ++ ...rizedDeltaLengthByteArrayValuesReader.java | 165 ++++++++++++++++++ .../parquet/VectorizedPageIterator.java | 3 + .../parquet/VectorizedValuesReader.java | 5 +- 4 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index 8f7b6f20057e..c1ea987ade0d 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -91,6 +91,11 @@ public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOExce firstValue = BytesUtils.readZigZagVarLong(this.inputStream); } + // True value count. May be less than valueCount because of nulls + int getTotalValueCount() { + return totalValueCount; + } + @Override public byte readByte() { throw new UnsupportedOperationException("readByte is not supported"); @@ -143,6 +148,11 @@ public void readDoubles(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readDoubles is not supported"); } + @Override + public void readBinary(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readBinary is not supported"); + } + private void readValues( int total, FieldVector vec, int rowId, int typeWidth, IntegerOutputWriter outputWriter) { if (valuesRead + total > totalValueCount) { diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java new file mode 100644 index 000000000000..a63d6401ed2d --- /dev/null +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -0,0 +1,165 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.arrow.vectorized.parquet; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.UUID; +import java.util.function.IntUnaryOperator; +import java.util.function.Supplier; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.IntVector; +import org.apache.iceberg.arrow.ArrowAllocation; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.io.ParquetDecodingException; +import org.apache.parquet.io.api.Binary; + +public class VectorizedDeltaLengthByteArrayValuesReader implements VectorizedValuesReader, AutoCloseable { + + private final VectorizedDeltaEncodedValuesReader lengthReader; + private final CloseableGroup closeables; + + private ByteBufferInputStream in; + private IntVector lengthsVector; + private ByteBuffer byteBuffer; + + VectorizedDeltaLengthByteArrayValuesReader() { + lengthReader = new VectorizedDeltaEncodedValuesReader(); + closeables = new CloseableGroup(); + } + + @Override + public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { + lengthsVector = new IntVector("length-" + UUID.randomUUID(), ArrowAllocation.rootAllocator()); + closeables.addCloseable(lengthsVector); + lengthReader.initFromPage(valueCount, in); + lengthReader.readIntegers(lengthReader.getTotalValueCount(), lengthsVector, 0); + this.in = in.remainingStream(); + } + + @Override + public Binary readBinary(int len) { + readValues(1, null, 0, x -> len, (f, i, v) -> byteBuffer = v); + return Binary.fromReusedByteBuffer(byteBuffer); + } + + @Override + public void readBinary(int total, FieldVector vec, int rowId) { + readValues(total, vec, rowId, x -> lengthsVector.get(x), (f, i, v) -> f.getDataBuffer().setBytes(i, v)); + } + + private void readValues( + int total, + FieldVector vec, + int rowId, + IntUnaryOperator getLength, + BinaryOutputWriter outputWriter) { + ByteBuffer buffer; + int length; + for (int i = 0; i < total; i++) { + length = getLength.applyAsInt(rowId + i); + try { + buffer = in.slice(length); + } catch (EOFException e) { + throw new ParquetDecodingException("Failed to read " + length + " bytes"); + } + outputWriter.write(vec, rowId + i, buffer); + } + } + + @Override + public boolean readBoolean() { + throw new UnsupportedOperationException("readBoolean is not supported"); + } + + @Override + public byte readByte() { + throw new UnsupportedOperationException("readByte is not supported"); + } + + @Override + public short readShort() { + throw new UnsupportedOperationException("readShort is not supported"); + } + + @Override + public int readInteger() { + throw new UnsupportedOperationException("readInteger is not supported"); + } + + @Override + public long readLong() { + throw new UnsupportedOperationException("readLong is not supported"); + } + + @Override + public float readFloat() { + throw new UnsupportedOperationException("readFloat is not supported"); + } + + @Override + public double readDouble() { + throw new UnsupportedOperationException("readDouble is not supported"); + } + + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readIntegers is not supported"); + } + + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readLongs is not supported"); + } + + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readFloats is not supported"); + } + + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readDoubles is not supported"); + } + + @Override + public void close() throws Exception { + closeables.close(); + } + + /** A functional interface to write binary values into a FieldVector */ + @FunctionalInterface + interface BinaryOutputWriter { + + /** + * A functional interface that can be used to write a binary value to a specified row in a + * FieldVector + * + * @param vec a FieldVector to write the value into + * @param index The offset to write to + * @param val value to write + */ + void write(FieldVector vec, long index, ByteBuffer val); + } +} diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index be1a3324ae43..578d743314a5 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -100,6 +100,9 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i case DELTA_BINARY_PACKED: valuesReader = new VectorizedDeltaEncodedValuesReader(); break; + case DELTA_LENGTH_BYTE_ARRAY: + valuesReader = new VectorizedDeltaLengthByteArrayValuesReader(); + break; default: throw new UnsupportedOperationException( "Cannot support vectorized reads for column " diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index 7c23149b18ab..c07d18aefb45 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -58,7 +58,7 @@ interface VectorizedValuesReader { double readDouble(); /** - * Read binary data of some length + * Read a single binary value of some length * * @param len The number of bytes to read */ @@ -76,6 +76,9 @@ interface VectorizedValuesReader { /** Read `total` doubles into `vec` starting at `vec[rowId]` */ void readDoubles(int total, FieldVector vec, int rowId); + /** Read `total` binary values into `vec` starting at `vec[rowId]` */ + void readBinary(int total, FieldVector vec, int rowId); + /** * Initialize the reader from a page. See {@link ValuesReader#initFromPage(int, * ByteBufferInputStream)}. From ddeadf7af3513370859c10beefe4490c6e55e508 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 1 Jul 2025 23:35:01 -0700 Subject: [PATCH 26/47] convinced I need to use a golden file --- .../parquet/VectorizedPlainValuesReader.java | 7 +++ .../parquet/TestParquetVectorizedReads.java | 47 ++++++++++++++----- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java index 764b2fc353e3..0a43f65f6f2c 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java @@ -74,4 +74,11 @@ public void readFloats(int total, FieldVector vec, int rowId) { public void readDoubles(int total, FieldVector vec, int rowId) { readValues(total, vec, rowId, DOUBLE_SIZE); } + + @Override + public void readBinary(int total, FieldVector vec, int rowId) { + for (int i = 0; i < total; i++) { + readBinary(1, vec, rowId + i); + } + } } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index d7f7f9c68d21..b7426eaf32ce 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -27,6 +27,7 @@ import java.io.File; import java.io.IOException; import java.util.Iterator; +import java.util.Map; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.data.RandomGenericData; @@ -37,6 +38,7 @@ import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.base.Function; import org.apache.iceberg.relocated.com.google.common.base.Strings; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; @@ -148,13 +150,17 @@ FileAppender getParquetWriter(Schema schema, File testFile) throws IOExc .build(); } - FileAppender getParquetV2Writer(Schema schema, File testFile) throws IOException { - return Parquet.write(Files.localOutput(testFile)) + FileAppender getParquetV2Writer( + Schema schema, File testFile, Map writerOptions) throws IOException { + Parquet.WriteBuilder writeBuilder = Parquet.write(Files.localOutput(testFile)) .schema(schema) .createWriterFunc(GenericParquetWriter::create) .named("test") - .writerVersion(ParquetProperties.WriterVersion.PARQUET_2_0) - .build(); + .writerVersion(ParquetProperties.WriterVersion.PARQUET_2_0); + for (var entry: writerOptions.entrySet()) { + writeBuilder.set(entry.getKey(), entry.getValue()); + } + return writeBuilder.build(); } void assertRecordsMatch( @@ -291,6 +297,17 @@ public void testReadsForTypePromotedColumns() throws Exception { assertRecordsMatch(readSchema, 30000, data, dataFile, false, BATCH_SIZE); } + private void testReadsWithSchema(Schema schema, Map writerOptions) throws Exception { + File dataFile = File.createTempFile("junit", null, temp.toFile()); + assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); + Iterable data = + generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); + try (FileAppender writer = getParquetV2Writer(schema, dataFile, writerOptions)) { + writer.addAll(data); + } + assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE); + } + @Test public void testSupportedReadsForParquetV2() throws Exception { // Float and double column types are written using plain encoding with Parquet V2, @@ -303,15 +320,19 @@ public void testSupportedReadsForParquetV2() throws Exception { optional(104, "decimal_data", Types.DecimalType.of(25, 5)), optional(105, "int_data", Types.IntegerType.get()), optional(106, "long_data", Types.LongType.get())); + testReadsWithSchema(schema, ImmutableMap.of()); + } - File dataFile = File.createTempFile("junit", null, temp.toFile()); - assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); - Iterable data = - generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); - try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { - writer.addAll(data); - } - assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE); + @Test + public void testDeltaLengthByteArrayBinaryReadsForParquetV2() throws Exception { + // By default, Parquet seems to want to use DELTA_BYTE_ARRAY for binary columns + // instead of DELTA_LENGTH_BYTE_ARRAY + Schema schema = + new Schema( + optional(106, "binary_data", Types.BinaryType.get())); + testReadsWithSchema(schema, ImmutableMap.of( + "parquet.enable.dictionary", "false" + )); } @Test @@ -322,7 +343,7 @@ public void testUnsupportedReadsForParquetV2() throws Exception { assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); - try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { + try (FileAppender writer = getParquetV2Writer(schema, dataFile, ImmutableMap.of())) { writer.addAll(data); } assertThatThrownBy(() -> assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE)) From f86b93ca835d02495735ec8250b51b23a4cec5e2 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Thu, 31 Jul 2025 02:31:02 +0900 Subject: [PATCH 27/47] resolve more conflicts --- .../VectorizedDeltaEncodedValuesReader.java | 11 +++++++ ...rizedDeltaLengthByteArrayValuesReader.java | 31 +++++++++---------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index 115518e1fb50..b4c575318e09 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -91,6 +91,11 @@ public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOExce firstValue = BytesUtils.readZigZagVarLong(this.inputStream); } + // True value count. May be less than valueCount because of nulls + int getTotalValueCount() { + return totalValueCount; + } + /** DELTA_BINARY_PACKED only supports INT32 and INT64 */ @Override public byte readByte() { @@ -149,6 +154,12 @@ public void readDoubles(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readDoubles is not supported"); } + /** DELTA_BINARY_PACKED only supports INT32 and INT64 */ + @Override + public void readBinary(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readBinary is not supported"); + } + private void readValues( int total, FieldVector vec, int rowId, int typeWidth, IntegerOutputWriter outputWriter) { if (valuesRead + total > totalValueCount) { diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index a63d6401ed2d..1c4e5497eab1 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.arrow.vectorized.parquet; import java.io.EOFException; From 5117f9fd32d0bc59e637141fa0dc3db195e16445 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Thu, 31 Jul 2025 03:01:01 +0900 Subject: [PATCH 28/47] license --- LICENSE | 1 + 1 file changed, 1 insertion(+) diff --git a/LICENSE b/LICENSE index 80cfd3652e69..5b7355e1c349 100644 --- a/LICENSE +++ b/LICENSE @@ -289,6 +289,7 @@ This product includes code from Apache Spark. * implementation of SetAccumulator. * Connector expressions. * implementation of VectorizedDeltaEncodedValuesReader +* implementation of VectorizedDeltaLengthByteArrayValuesReader Copyright: 2011-2018 The Apache Software Foundation Home page: https://spark.apache.org/ From c7e5a68d241c6d29ac45ea66f66095c8f45531a1 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Thu, 31 Jul 2025 03:01:42 +0900 Subject: [PATCH 29/47] revert --- .../main/java/org/apache/iceberg/parquet/PageIterator.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java b/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java index a68d2f9b82e7..bff13603002f 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java @@ -257,6 +257,13 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i } else { this.values = dataEncoding.getValuesReader(desc, ValuesType.VALUES); } + + // if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) { + // bindToDictionary(dictionary); + // } else { + // bind(path.getType()); + // } + try { values.initFromPage(valueCount, in); } catch (IOException e) { From db99901a9072c4b097b1a827ef01cdd44a97ce3c Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Sat, 2 Aug 2025 15:19:57 +0900 Subject: [PATCH 30/47] spotless --- ...rizedDeltaLengthByteArrayValuesReader.java | 261 +++++++++--------- 1 file changed, 133 insertions(+), 128 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index 1c4e5497eab1..df225f0fab1f 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -23,7 +23,6 @@ import java.nio.ByteBuffer; import java.util.UUID; import java.util.function.IntUnaryOperator; -import java.util.function.Supplier; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.IntVector; import org.apache.iceberg.arrow.ArrowAllocation; @@ -32,131 +31,137 @@ import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.api.Binary; -public class VectorizedDeltaLengthByteArrayValuesReader implements VectorizedValuesReader, AutoCloseable { - - private final VectorizedDeltaEncodedValuesReader lengthReader; - private final CloseableGroup closeables; - - private ByteBufferInputStream in; - private IntVector lengthsVector; - private ByteBuffer byteBuffer; - - VectorizedDeltaLengthByteArrayValuesReader() { - lengthReader = new VectorizedDeltaEncodedValuesReader(); - closeables = new CloseableGroup(); - } - - @Override - public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { - lengthsVector = new IntVector("length-" + UUID.randomUUID(), ArrowAllocation.rootAllocator()); - closeables.addCloseable(lengthsVector); - lengthReader.initFromPage(valueCount, in); - lengthReader.readIntegers(lengthReader.getTotalValueCount(), lengthsVector, 0); - this.in = in.remainingStream(); - } - - @Override - public Binary readBinary(int len) { - readValues(1, null, 0, x -> len, (f, i, v) -> byteBuffer = v); - return Binary.fromReusedByteBuffer(byteBuffer); - } - - @Override - public void readBinary(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, x -> lengthsVector.get(x), (f, i, v) -> f.getDataBuffer().setBytes(i, v)); - } - - private void readValues( - int total, - FieldVector vec, - int rowId, - IntUnaryOperator getLength, - BinaryOutputWriter outputWriter) { - ByteBuffer buffer; - int length; - for (int i = 0; i < total; i++) { - length = getLength.applyAsInt(rowId + i); - try { - buffer = in.slice(length); - } catch (EOFException e) { - throw new ParquetDecodingException("Failed to read " + length + " bytes"); - } - outputWriter.write(vec, rowId + i, buffer); - } - } - - @Override - public boolean readBoolean() { - throw new UnsupportedOperationException("readBoolean is not supported"); - } - - @Override - public byte readByte() { - throw new UnsupportedOperationException("readByte is not supported"); - } - - @Override - public short readShort() { - throw new UnsupportedOperationException("readShort is not supported"); - } - - @Override - public int readInteger() { - throw new UnsupportedOperationException("readInteger is not supported"); - } - - @Override - public long readLong() { - throw new UnsupportedOperationException("readLong is not supported"); - } - - @Override - public float readFloat() { - throw new UnsupportedOperationException("readFloat is not supported"); - } - - @Override - public double readDouble() { - throw new UnsupportedOperationException("readDouble is not supported"); - } - - @Override - public void readIntegers(int total, FieldVector vec, int rowId) { - throw new UnsupportedOperationException("readIntegers is not supported"); - } - - @Override - public void readLongs(int total, FieldVector vec, int rowId) { - throw new UnsupportedOperationException("readLongs is not supported"); - } - - @Override - public void readFloats(int total, FieldVector vec, int rowId) { - throw new UnsupportedOperationException("readFloats is not supported"); - } - - @Override - public void readDoubles(int total, FieldVector vec, int rowId) { - throw new UnsupportedOperationException("readDoubles is not supported"); - } - - @Override - public void close() throws Exception { - closeables.close(); - } - - /** A functional interface to write binary values into a FieldVector */ - @FunctionalInterface - interface BinaryOutputWriter { - - /** - * A functional interface that can be used to write a binary value to a specified row in a - * FieldVector - * - * @param vec a FieldVector to write the value into - * @param index The offset to write to - * @param val value to write - */ - void write(FieldVector vec, long index, ByteBuffer val); - } +public class VectorizedDeltaLengthByteArrayValuesReader + implements VectorizedValuesReader, AutoCloseable { + + private final VectorizedDeltaEncodedValuesReader lengthReader; + private final CloseableGroup closeables; + + private ByteBufferInputStream in; + private IntVector lengthsVector; + private ByteBuffer byteBuffer; + + VectorizedDeltaLengthByteArrayValuesReader() { + lengthReader = new VectorizedDeltaEncodedValuesReader(); + closeables = new CloseableGroup(); + } + + @Override + public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { + lengthsVector = new IntVector("length-" + UUID.randomUUID(), ArrowAllocation.rootAllocator()); + closeables.addCloseable(lengthsVector); + lengthReader.initFromPage(valueCount, in); + lengthReader.readIntegers(lengthReader.getTotalValueCount(), lengthsVector, 0); + this.in = in.remainingStream(); + } + + @Override + public Binary readBinary(int len) { + readValues(1, null, 0, x -> len, (f, i, v) -> byteBuffer = v); + return Binary.fromReusedByteBuffer(byteBuffer); + } + + @Override + public void readBinary(int total, FieldVector vec, int rowId) { + readValues( + total, + vec, + rowId, + x -> lengthsVector.get(x), + (f, i, v) -> f.getDataBuffer().setBytes(i, v)); + } + + private void readValues( + int total, + FieldVector vec, + int rowId, + IntUnaryOperator getLength, + BinaryOutputWriter outputWriter) { + ByteBuffer buffer; + int length; + for (int i = 0; i < total; i++) { + length = getLength.applyAsInt(rowId + i); + try { + buffer = in.slice(length); + } catch (EOFException e) { + throw new ParquetDecodingException("Failed to read " + length + " bytes"); + } + outputWriter.write(vec, rowId + i, buffer); + } + } + + @Override + public boolean readBoolean() { + throw new UnsupportedOperationException("readBoolean is not supported"); + } + + @Override + public byte readByte() { + throw new UnsupportedOperationException("readByte is not supported"); + } + + @Override + public short readShort() { + throw new UnsupportedOperationException("readShort is not supported"); + } + + @Override + public int readInteger() { + throw new UnsupportedOperationException("readInteger is not supported"); + } + + @Override + public long readLong() { + throw new UnsupportedOperationException("readLong is not supported"); + } + + @Override + public float readFloat() { + throw new UnsupportedOperationException("readFloat is not supported"); + } + + @Override + public double readDouble() { + throw new UnsupportedOperationException("readDouble is not supported"); + } + + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readIntegers is not supported"); + } + + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readLongs is not supported"); + } + + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readFloats is not supported"); + } + + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readDoubles is not supported"); + } + + @Override + public void close() throws Exception { + closeables.close(); + } + + /** A functional interface to write binary values into a FieldVector */ + @FunctionalInterface + interface BinaryOutputWriter { + + /** + * A functional interface that can be used to write a binary value to a specified row in a + * FieldVector + * + * @param vec a FieldVector to write the value into + * @param index The offset to write to + * @param val value to write + */ + void write(FieldVector vec, long index, ByteBuffer val); + } } From 45284903f916bf34ce84e754cdae058b0199e7bc Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 4 Aug 2025 08:55:00 -0700 Subject: [PATCH 31/47] lint --- .../parquet/VectorizedDeltaLengthByteArrayValuesReader.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index df225f0fab1f..e75a0f511159 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -47,12 +47,12 @@ public class VectorizedDeltaLengthByteArrayValuesReader } @Override - public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { + public void initFromPage(int valueCount, ByteBufferInputStream inputStream) throws IOException { lengthsVector = new IntVector("length-" + UUID.randomUUID(), ArrowAllocation.rootAllocator()); closeables.addCloseable(lengthsVector); - lengthReader.initFromPage(valueCount, in); + lengthReader.initFromPage(valueCount, inputStream); lengthReader.readIntegers(lengthReader.getTotalValueCount(), lengthsVector, 0); - this.in = in.remainingStream(); + this.in = inputStream.remainingStream(); } @Override From 679390e58a9b92d868709cd845e4254083b37c34 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 12 Aug 2025 12:24:35 -0700 Subject: [PATCH 32/47] add golden file --- .../parquet/TestParquetVectorizedReads.java | 2 +- .../DELTA_LENGTH_BYTE_ARRAY/binary.parquet | Bin 0 -> 9732 bytes 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 spark/v4.0/spark/src/test/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary.parquet diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 757331dcb5dc..603aedd224a7 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -73,7 +73,7 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { private static final String PLAIN = "PLAIN"; private static final List GOLDEN_FILE_ENCODINGS = - ImmutableList.of("PLAIN_DICTIONARY", "RLE", "RLE_DICTIONARY", "DELTA_BINARY_PACKED"); + ImmutableList.of("PLAIN_DICTIONARY", "RLE", "RLE_DICTIONARY", "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), diff --git a/spark/v4.0/spark/src/test/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary.parquet b/spark/v4.0/spark/src/test/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary.parquet new file mode 100644 index 0000000000000000000000000000000000000000..959e99bd6228c983116d1f421627a83222480769 GIT binary patch literal 9732 zcmY+qRZtyF6D^9n!v;1U++8*VciFhRySuvt3-0b7+}+*XEw~djguu!Fo%?WZPj%PS zOx0Rzs^+1qYblASumK(z(jUXeHD%=Vkj{iV!8q6Tg9zmQ7nc)63Ew0ldsS?6_0SO$m zmWH4ylsIK`eBGoT4-CKYmNLD}D`5^)riwHza*h__OoNQ5mP#UQWnzYP-Ih|AaC^F* z@+9zopw#5+^EMnc(VHQT%c@pMW&=YvY##j@C5g8+2-+FdKRockH7%}V(NYr9WtroE z%uU%}%~lpZ=3E)ne4ugseYMG&GlW=$ZV#u(D?Ozx6~ObF6`8~F z1L$5v;ZUeqV?xP}Ni6%F^Ej`f62u`Fa?W`|8`kA7A)gzKl{-4sx=<4|_R4Q>i-w8(CCEIXr8fIfe1@oN3*!4jpYwQU4Z0km0 zjy){Kyeu486=gO=D(s|3EZ^&fVVtl+w9&ligi;H&Y^7YQ)#NEgz+CiyO3O!}zlBzG zx5|cGR*WZb&Xu_?{2nIsVRBJ z!bxoQIC2Svi`2l8XNQ^dIigM15c^LlK6rSyA+**+)w^cm*J2V}Z`rz*3$LqGBQ`h*Xoz2||UlqO&-iK{Axa z%;Z(l{0gm2L>!zWKgpo}ebWCoZqmHp{Xt5FZx!3p60oWqk5Ed2ta6UEcCCF%gBzrV zfkTN-^sYsx@?@xm-Or(Gd^sxNUiIZF{lf%`;io7`Q;I(HTFC}O)oGBP02j~^+)SIt zpfmcjO5?+vl&=%Azib#mOhUL3C|h3K-IGs%%@J#5*k@pexon6mwk1Mg6?HX`Yx^Y` zPg!un3iNG5AZG$Y1eP~jpki2gW2t_#eoFE!1kqTF+!s%#G_$IedNb$dc$2mz**59W zK_r*+i)i>P0iyK2;aR6C-yg`FA(hN^P6fWyMwS$x7l5hp6=A;+^Z(cN;Bgr!GRHmPlouPBqAG8=tP@ z-{r;5e{3ifFpULMKchR^r=eFqBlx*A#kOXHxS?iB@4bSk)NXUa?k05aM+fbgRaJ(> zgN9-Vc+PP%aQFqiIW#k_xDCT7?9c11d8wV2`C)(AkyEEuxWiOx$gR%hf&x~?Z%Q;` z^O;<49Mc#Ro-Ngm!fi31$a3fXFY+sAyrXXZ6c>)46V8-9Gd~3+xySiur%Sy@){^z? zV-3ob*Piz`82^fW3M$mAyP0OGe-z504%7rf>>*mMZk;- z*nQhR=i8!CEi*zitpcHMUtMLrcWn%Ha)SjvStE7xd=L0vW?72AjsNB~bF2}d-g}rg zIa=&vAmWqMMF`L7NQhtE4Z#u8rZ_)Z5vY`_>_*{2hqvSd(Oh)~>+ei4M9(TJZ9q1} zxM<@F>^TL#-VBxw?(hfgWv6)Cqhy&z0Ax;@XJ_|tJtWpt%~Yof2){v;2m&Ph42u+C zzIrm%g|G9xoPOktT7r)gA>n2B{XVJc2(V-5Np$x3CkE|wiVWi7-%*?%COcDpP3C_5 zVR7vm=iz^WRM;jhZT)<}=GKOh|LUe)~kC~91yJfFYW z^#H-|*kk*iaC+2Z2+`^1BwZsJH%Qv@zG1VON9A*%9jS&;`oBuiT=Jw@M%(v&)93jb zMNPPVFpy+RqiVd&|C!slXxI)F&Xys9V3u z>M*Cu6XLAPP?Sg`X_7ZKmBDl4y>iuyU6QYdEST}sL!DZC0;jQdcSqsi*Dh_;&_C2t z_~87`-crvk&Yi*RtEYpx)Y7qMDS>v_8B&v|kk3vzJ`%ki4y}L+lE1%IPXbayQ35KR z>#-lEm0F7TgW{$!c%|+)?Qnlc!SgwEqLdxoWv_Rmdq6IHBoJGm7~Ws&E2C5`>!&z! zE0r(0#x+gY?sBdMHsuz8B3m3F7!Hl+5J=Ht_OE7M+rLTmAPd`^()GARTT~ffifktc2@5at;w1)AZV##@rD7L)OJibK;=Q{@*uWRPhZT7!oh3x^Z8n( z1Cute48A}n#cDm{Esic)Bn1k1C#qK5f??%VFo+q zoY0D4(VW~Y#pvxrqSF-X*G>KPxirhUo~@6@^oIA>kUiu^;>qaZ`sO~X;1r%_4al9 zwRr1~TkLi$x7`#zlW=GSwE}3&9D;;jd(T@cQvjtVbMQU27#L65u?eM0xAHYb_NX^f z-Emw5D0g{}FGXKrhgW!6)7F8^FdN|`e0}$c7_X=^C=niUSb`TfmqkiNNy(-~bqtp2 z{C)|NHf+Ksu_L^J-%DsZ=eFW|YP1OruLhpJ;h|NXFb=>=zSz20ocqlk5>Ff_IuE;T zml;?2*Q5>WhqrmZU)w^b!ID}hqLq6ajvq}d!8-c%idsk0Cm4qID$y>6<*$s81NxUz z9iNe=&!Hl}%4wkPV>-Q^Nl&Qw)rvE_ey(O8T)jBn;LFUMzv_DgRN%{-xUw7|njopA ztkgt2&-eKYLCVoBaJ|W*-~GZoeAJGIZMVIsyjIxV1#j?WDSSx5^*VU`5Ipro zj>QmspZBC#h8NeKqaf6)l{3pti$|5bYV!4?BpQtSS^EQP+8Rfwg!_Rg8DB!MI5>3= zJ+Z}g$)Q;Ky1KtG6HF+GEA~Y37|u4o8b$XF4Ec{BI$f5 zFR)0hJ5R-+nN+0HHEQ7N8pdOujb>1^hZ=OI16z#2{Qjga^H%d{ur(gEEmr0v4O`XK zAoAcjfm6!>YkcrmV%1VYd_zA(I+}g(ic!q+D7~dmPd5 z^`H)$`m0EzZ}C%Ba2`m0fCvpaTh-43FC{8*?R#M$uiVQBMNKhaCcw09-LRmhI2n^_ zFm2DyXLu;Z^4HAC?#xI`ypqOu{!u1UG=_un*i3@~yv<4jITG+hj*SIfMZ=r6 zOLo}OWzfH#u*^4Ws$#p^)wehtn(fr-7snpP=x$wrY4qUiNbI)7+eYNs-PReTOgbbg z<0`Da@E@+nPL*3Gr&1j5wVrTD7W`rvF`4F7>~1AYoSJi9C|Z8*T(0iWvU{v>_TEUk zXR9^1_k_0i^Cd_Z0B+wYtEI&5t zs6&Z!0h1!O$U796%^#!h5CI+98roOk3}fl)SZ0X^#IKcP6GzDUS!Iz=DRR#R7d>uh zkWD9U)h(D%5mlF^B^|b%21sH)3qwxEdTW4z>Pr0Qb~Wy|`0Tx{Mvr=gj+*@8)nPi$ zz0NmkZ?w@IABPYXMBQ0(*cb;YuwIhrH~437)kd{J#%~&fw+LV5!H=(!Xp-g!6#%EB zY6!%OIs37;bvESruFPvk`HEsM+^U=5DtyeISo%c1F2Q!)MxLyEhse{tt$i`3*MrP& zuyc6&H7LXQ$?nPyA`{PywSsat&JM4wlS$+!oWN^TcgE_oe)cMlm9(}eEVbO z&sb!cKQGc8qrp61aV=Gf=!OwyZrQ20KiYW!?M47gzYY z{2c`11Fgi+)-PMhTT$!HP2q?a*W(>d8>JSu6dzPDdTXZPvJk&eWWc7ch7D*Z^8 z1qQhI9}voxcFQ-`;c&HC?C?o7Vmt2fNk-ZQ<@Am{4UzkVJY8AB5p2#KS8LGv$kMqL zG?bim2FxF_>_NHK_L!jdI==Jg`Qo`Mvpvn+D=0`1P&~sc^e|6b8J`+<=YQ_F-l0JB zC|t_x87n`HZa?Ssy=S0))8Pu9H=E(n*HcyNegu#op?e29RVj8nhpLqwD4nc}izoM? zTa}!S54VdT*qEvdtpEyJbKdNYFeUo~jMhS-l?kf&NDBz9ty+I}NeIkoI9sndp|oMP zOgkM_QCH3@B&N0eBK~d6J0vRkVhu+l@w;E_nw7)ZCS^k&;jLDVuCc9gBoKS8R7$wz z&IvEDc2s3Wu=T5n0P!|pa>+lZg3h>~#kyL#^?(7%Dp|(9z(ipVa zjDG3E&sDCtC2T6wSB2r#i5t-@X{%7kbTu?1k1lDLgxKJx6eE#JYNZ-<^T<20m>AB8 zi<=ptz(4>rnx(Mg?%Qk^=jZk|Ql#>jlh4T{R}jtlVpLI)CJDt$A7KT7+O!92fhehc zkoz;mziVE`rI)>Lif4|N6ipzDaQ?m{IDo==6a`IdwF|I6DT8lTSLIepTcc_3X_T;b z1PV6v5?G5AQ^Ig#bJQ>Im{dz_%}JNBOQ5@5GE>HoMy=1;4OM%TG#TSCb%$eOGL28?5(r|&M|7Wy?-x-lpiesdND2u4mOFsPeOjTC1#D;U?@P5_cy)-rew4HEEgHqP?*M5`pe=Q ztcuMP(uzzY@@XoMu2RTw9X{P>3cy5e@_E{EV?W(v!GcxI;bfouU?X>6^w;uWeqj1`QmBx4Ng z$Bxw79uOl92_Nmt`4e$@$q}An+xi{#w&i|VJ>`;Zrwn^7A~Gnml%N~H5PW?5uNrXu5~cizt2v`In(CuFHi`>qw8ZF=7LN3>I&j~(=7jOSaK3)S1UbTF=C+`yKxte0 zbo~C13!b#iRt6tUK?WQ}05>$z)5`yLNt2=-#llR=a8xwgH^f=L1i;Z`=bL=Qsf`GS zBH4DAq;0F+UR~X7o|LEg#P4)}h*>J;aaw0E#F%~F8f_sUi?=+q5Sk1h*dKr`tnD1- zW$8-An#m-2tj5e_s=VS#w^dr$5o4YV5*1G=@TZK6%?1(pITMTi3VJ;sSj|A|ReaRU z5>_g@iW{Fplo@VdIKJs<7Cn_|v-VJ>rH0n?jgPJdv#y=`@80A&l(VqYx31OuDz-aJ z#}N$8_3d2d4z4(h%wb!a@P&ZcFfm!#ZNf^!)YX9P@_II!#(UaQ^2 z-o6ZobMMj7PM-{WM+OONnSJO>QRBV2rMB64#P z2k#gizyHW1^+7*iOsbf&q*hl9wFEY;7kFA&!_t0%VVNAyB z*3sv^$_#|7WajLo8eZKx;AAmKiWJgo@%!CK4gDfS;)k)a3kMdo=MAf)eAG~ItiS@ZIt9qHfYJO=Y`0=kLWnLNaz|n=ir3b4MP3!* zKz6{Ijh$<=X|WhZQ**;yMWP6fX&{3IGc}jC8irz5-Y@Ch8CEmM2P2aXi;kPdCCFd2 zi$2Qnp)o`esK_Q^O24g1Wvav(t?LVZnkFI$v=jL`$Ax=V<|ybvE8n+QpUk0#{Bjns z7Ba7~Ug-GXbzBD>PkYQ5>2gpq7+;3Di{~*$p8JauPZ5P|yc0)e1`9y_5W{agVJPBW zjSTL7qOBGpxnwq60Ne9gGst}MR23c(OMZ;FbBo48f&9}sN9emcSm~zj29PX_Hxf-P zo0u2?%{$H)AAY{X@WvnqX_r%cEEw6@#nmjDJs?4kN?>J9Sr^&j(V~$9I&K8eP%6xN z%Wb$G7zUU{$u7V(y1=}5BL>D#Qx|03l?I11v01MrW>rXS$vpSuNX(=@!2-)-Ouhry z>qadbv|^XfqFUb&6gYx24%j=skROM9oO^akJlFLli7rUHPKAj^d+84a>F5d)Eg;hd zQ`XGY;hChGMnCiZAi8~u`)Y7TKPOY)moM}SGJUm_#O0J|@VERYK8tX(=#zFNl}3ti z3W2rH$nz(IDIrFHPxU6}wU#4ub>!*&YG4ESZSNk9;!aR}Wv)U7Zy^y5diX0h`r+rs0#Vu{Dp>{&F6g4; zyIw9>ET6jQn&zqu z(!nQnRkly01V;Wj6p~7|R$t8J)HS&vslPoj-O)&jLCdQ$n99$Q*0^p^Y>f|rX~ zN=Ks6ue(nrdV>Slm6;e*c_K?C;gN^ZC~jny>`Qnx1&(U&@(hSDx^MF&X4Q zHrc1XqsxWSE9XV7dUQSuGYeaI->luQILAUaVfI;#iKPTj&R4>!=kpk4(GpIaM`{$^kIpu8sovxN>1q9u&IE|AqrNC&Wup4VRkBI*#Zcig0HdJRp2K}^o2<)e zdpe~O#Vb}A8+60;4bkarFd8{3qrQF}shFB`W9Ed%lE*!i82N=9+>~d2p)tgv z7y*!N+vnBmd$Wjv3w+}~u|6F?Grj`1dQfPkR|m)DITLne#qFNXl3`cIn2qD^Iil)h zfS=XuNN&AtxWZzywOmY?Pb5qCLlA;;i>&m21(#@yn~g@Iw4IEyD-(yo;zdFS$ou ziMKG^NZ!tECO{v&4|bzGWkaE?yJZ21PxY7kQE#Uw0VbXZ_x0|Ox6(o8vDmT9m=m79MFF~g#d-8R)#Q9H$4N!oJV{ zy5e!;2b|CA=}Yd9dLv0AllKn&+-i?iIX^1XR5XhDBAd4z*b_?k^NSXY9McV(Lck_k+z;`#A;| z&JtSf^@@)&ITO_1Ga%K&po?K2Ql{OXwK7}FmI{6;A7DQ>94AmDHor6eMJ^i50x*2RfAhvtSxI|YgO-R)P`b^1EeY&vdFP;#=%gOw!nqmPlfMjK2eGT%hfMV&j@LgjOJyD5L2NyM6Hc{0>ROk8kS~~rF(#m|GOAybyx3yBP=Lv^S zB;boZYmhYOqN#5=W2Dt*YT^`q*?9OOi~kMW+%UQe<;hctpHkn7fK<0%fe7&pc9-(& zMEXl8Whhu{I`RE&ppJ}pah#C+^(0D|PA#&_0qw9|-0F9#(5fU)+9m#`Rk%mtS3lN$ z_AUuZl7os4)Z*et1^7$~7B#}5p1G+xcFW^RGa^LmvIS}!;V%B|`8eP=kzdX70GX%d=Q_w}kwVwu7TZ z!h7SLgrMwPZic07;AkcfKnp7mSn(5=8D(8$AYM8fTro|emVYqDN9#CnY*_A{U1HUL zCKGDnA5|-hN)?kZS2Q5|UNK07iG71wSWm)hS=kY|klXu?HMaF_7i#eGWBzBt_g@G- z`cox)(b(jiP2Zris>{up^{ZKKb(&QFh(%BPLCZ;lSw1t(t-;j1SelDYU~mWFqX?q^ z3h(&-cRxqfI7708^xU3UX2DczLU1aYTqo%p8?;bD@~-%6*NTJtS*i~ zLSYPp>L|gZzYDcgCwCH-Q=Bu^9x`0myjF;ax9`;1xgMhT^s~zxJaS?;ktf!=xV?7# z?Ns&%#%_lO+4=YlwR* zYR5X>jL@yAGhSB7@s3WJuqSlRE5rqx{YtcqfKoA!!^SO6EJF08g!{-e56KE;t&a19 zcn1aPfzW6r%f!9X*H{SQ8#1cQX}+xjvjEy$PUi<|~X z>9z#>Vtx!(xvu1f5PA^XPy;0sT7hEpbMi5sU!S|8u%_ntRD!?O7C)Lk!HNuj2QpJ0 zY5v*U)nPL~lbT`#^d`sidfuet5(Sq>0LGPU?%{E&8@;0o0VR_YxuW87rq@qzrJBrC zUcr~_{%X^zl7#Y7waDqo?zwm$NNZcED*wu9rIWS6+=o~!xX48=OLb*_4akQ3NfNRd zMWnxh5q198%rjffHgi}?J{#dS(rik&T<(8|FMjeBL-DS)g37dq7^fg`7I8R~t3=i2gp15d}oL^0w3SSaI=>c6cCAzfe5;@_FhMl(j#%7xehYP%hsl0(f&|IGz z1!QB`=qb-H0UH|10wfP~$inB+Aubz{){W?E_ws%r2mANqvB9H`0XRuo%##KAeV=M& zfl_ow2>9!eSl8!g=i3q(TE|2Bsv#Qe%9(9H``~v=uJ*HPpw$C6_SWo8_9V$jTK$kU zrAR@gUFru|u1jsGh>Crix_(uPd`7oeDFB3V0*vZcM)Q&%xeEsv#Zqo*S zv_5~~R@Xkn#fhb1ljfF15aTIl!C+Em`?0D8&*DtPJfj>KFvMwy=;+TPw9`Ri3uW1= zyRXMI{W&sQUb|2f^l|$aE_T{;#rvw@0vLH64OGja6O~%7(49q2@XYg9CN3FAgAxLX zjRCuEa3dQQ1Wi7`aeIJ>|=|nppyT`iWa7Xva2Fm-581oeb+l^w1G7BS33%`hCmAufQSXA z=y_t%O!7xjO>C5i)KC2CueCIAi%jaV?>G=XNnK>n=TJHvXNQCmBds9Yzx(kYc*7Ov zU~#iNU0q^KhAfjzuamp4iTmq3e7?a(W!)FOdzc9aWs=rhkB9nlNc;?5Vs-hd&by&c zrH{R}AxG-|GgiM*is*wKrUO6OTjCQV7%O(@&LYrg7B5N$&K+C9r&bp>&%27}er?t5 z;ajA(6NzF0)buMgp+(Hj_Rx`!mwPet*wi4JCt0*-fQSUwWE~8LPU?a~Ew?(Ss2XtT{l9nhU)L^L@#fWDj;F z3eh0vM~CwLrSGQ0t9E}p6$)Ch0;Y+&N78N)TdXST6Ykn-a;vW=xRY`qi<2TUXQ@KW zKjsDubR~Cym{34`H0>p>=zda-X3Wy6qNBz$f1py?&MF3=_K;l9`0CB^jARz!S zKt?VId^lHAD|35e02BlOg$fEE#?08w7y@@59Y_QuqJ|&>pa7}hP=NpU9Pqyj-2Z|6 zOKJpuZV1}{kLdqL|BvgRKMUm_B>(@IfN;S76hPo!r9cA6af|*{S&08&=FzDU{;C{g?`EO^ literal 0 HcmV?d00001 From 6f5eeee425de28344753b653abf18b12b5239d2b Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 12 Aug 2025 12:29:17 -0700 Subject: [PATCH 33/47] spotless --- .../vectorized/parquet/TestParquetVectorizedReads.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 603aedd224a7..5cd558f98ac8 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -73,7 +73,12 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { private static final String PLAIN = "PLAIN"; private static final List GOLDEN_FILE_ENCODINGS = - ImmutableList.of("PLAIN_DICTIONARY", "RLE", "RLE_DICTIONARY", "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY"); + ImmutableList.of( + "PLAIN_DICTIONARY", + "RLE", + "RLE_DICTIONARY", + "DELTA_BINARY_PACKED", + "DELTA_LENGTH_BYTE_ARRAY"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), From 9f279748b453a0e5c9183e908eaacc6c891241c3 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 12 Aug 2025 12:32:31 -0700 Subject: [PATCH 34/47] spotless --- .../VectorizedDeltaLengthByteArrayValuesReader.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index e75a0f511159..b30d80870a7a 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -90,56 +90,67 @@ private void readValues( } } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public boolean readBoolean() { throw new UnsupportedOperationException("readBoolean is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public byte readByte() { throw new UnsupportedOperationException("readByte is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public short readShort() { throw new UnsupportedOperationException("readShort is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public int readInteger() { throw new UnsupportedOperationException("readInteger is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public long readLong() { throw new UnsupportedOperationException("readLong is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public float readFloat() { throw new UnsupportedOperationException("readFloat is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public double readDouble() { throw new UnsupportedOperationException("readDouble is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public void readIntegers(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readIntegers is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public void readLongs(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readLongs is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public void readFloats(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readFloats is not supported"); } + /** DELTA_LENGTH_BYTE_ARRAY only supports BINARY */ @Override public void readDoubles(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readDoubles is not supported"); From b8173d69546212ba04b4f1b2d0728fb303bce524 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Thu, 14 Aug 2025 13:27:05 -0700 Subject: [PATCH 35/47] change value --- .../VectorizedDeltaLengthByteArrayValuesReader.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index b30d80870a7a..86817a4fb538 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -78,15 +78,19 @@ private void readValues( IntUnaryOperator getLength, BinaryOutputWriter outputWriter) { ByteBuffer buffer; - int length; + long offset = rowId; for (int i = 0; i < total; i++) { - length = getLength.applyAsInt(rowId + i); + int length = getLength.applyAsInt(rowId + i); try { + if (length <= 0) { + throw new IllegalStateException("Invalid length: " + length); + } buffer = in.slice(length); } catch (EOFException e) { throw new ParquetDecodingException("Failed to read " + length + " bytes"); } - outputWriter.write(vec, rowId + i, buffer); + outputWriter.write(vec, offset, buffer); + offset += length; } } From 9954a430fe85c789b60fca407e26c3bff4b72430 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Mon, 25 Aug 2025 14:22:37 -0700 Subject: [PATCH 36/47] spotless --- .../data/vectorized/parquet/TestParquetVectorizedReads.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 5ca35c700ebd..4c7e0eb9e879 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -75,10 +75,7 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { private static final String PLAIN = "PLAIN"; private static final List GOLDEN_FILE_ENCODINGS = ImmutableList.of( - "PLAIN_DICTIONARY", - "RLE_DICTIONARY", - "DELTA_BINARY_PACKED", - "DELTA_LENGTH_BYTE_ARRAY"); + "PLAIN_DICTIONARY", "RLE_DICTIONARY", "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), From 29e59c5b9cb3d09b780e949ca73811e4b76183b8 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 26 Aug 2025 09:33:22 -0700 Subject: [PATCH 37/47] change readBinary path --- .../VectorizedDeltaEncodedValuesReader.java | 2 +- ...rizedDeltaLengthByteArrayValuesReader.java | 7 +++--- ...ectorizedParquetDefinitionLevelReader.java | 15 +----------- .../parquet/VectorizedPlainValuesReader.java | 23 ++++++++++++++++--- .../parquet/VectorizedValuesReader.java | 2 +- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index b4c575318e09..efd631557cdf 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -156,7 +156,7 @@ public void readDoubles(int total, FieldVector vec, int rowId) { /** DELTA_BINARY_PACKED only supports INT32 and INT64 */ @Override - public void readBinary(int total, FieldVector vec, int rowId) { + public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector) { throw new UnsupportedOperationException("readBinary is not supported"); } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index 86817a4fb538..3bfeb10bd7fe 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -62,7 +62,7 @@ public Binary readBinary(int len) { } @Override - public void readBinary(int total, FieldVector vec, int rowId) { + public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector) { readValues( total, vec, @@ -71,6 +71,7 @@ public void readBinary(int total, FieldVector vec, int rowId) { (f, i, v) -> f.getDataBuffer().setBytes(i, v)); } + @SuppressWarnings("UnusedVariable") private void readValues( int total, FieldVector vec, @@ -78,7 +79,6 @@ private void readValues( IntUnaryOperator getLength, BinaryOutputWriter outputWriter) { ByteBuffer buffer; - long offset = rowId; for (int i = 0; i < total; i++) { int length = getLength.applyAsInt(rowId + i); try { @@ -89,8 +89,7 @@ private void readValues( } catch (EOFException e) { throw new ParquetDecodingException("Failed to read " + length + " bytes"); } - outputWriter.write(vec, offset, buffer); - offset += length; + outputWriter.write(vec, rowId + i, buffer); } } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java index c7dbe8de7b92..f02032ac9f98 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java @@ -584,20 +584,7 @@ protected void nextVal( VectorizedValuesReader valuesReader, int typeWidth, byte[] byteArray) { - int len = valuesReader.readInteger(); - ByteBuffer buffer = valuesReader.readBinary(len).toByteBuffer(); - // Calling setValueLengthSafe takes care of allocating a larger buffer if - // running out of space. - ((BaseVariableWidthVector) vector).setValueLengthSafe(idx, len); - int startOffset = ((BaseVariableWidthVector) vector).getStartOffset(idx); - // It is possible that the data buffer was reallocated. So it is important to - // not cache the data buffer reference but instead use vector.getDataBuffer(). - vector.getDataBuffer().setBytes(startOffset, buffer); - // Similarly, we need to get the latest reference to the validity buffer as well - // since reallocation changes reference of the validity buffers as well. - if (setArrowValidityVector) { - BitVectorHelper.setBit(vector.getValidityBuffer(), idx); - } + valuesReader.readBinary(1, vector, idx, setArrowValidityVector); } @Override diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java index 0a43f65f6f2c..e2d69177a544 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java @@ -19,6 +19,8 @@ package org.apache.iceberg.arrow.vectorized.parquet; import java.nio.ByteBuffer; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.FieldVector; import org.apache.iceberg.parquet.ValuesAsBytesReader; import org.apache.parquet.io.api.Binary; @@ -76,9 +78,24 @@ public void readDoubles(int total, FieldVector vec, int rowId) { } @Override - public void readBinary(int total, FieldVector vec, int rowId) { - for (int i = 0; i < total; i++) { - readBinary(1, vec, rowId + i); + public void readBinary( + int total, + FieldVector vec, + int rowId, + boolean setArrowValidityVector) { + int len = readInteger(); + ByteBuffer buffer = readBinary(len).toByteBuffer(); + // Calling setValueLengthSafe takes care of allocating a larger buffer if + // running out of space. + ((BaseVariableWidthVector) vec).setValueLengthSafe(rowId, len); + int startOffset = ((BaseVariableWidthVector) vec).getStartOffset(rowId); + // It is possible that the data buffer was reallocated. So it is important to + // not cache the data buffer reference but instead use vector.getDataBuffer(). + vec.getDataBuffer().setBytes(startOffset, buffer); + // Similarly, we need to get the latest reference to the validity buffer as well + // since reallocation changes reference of the validity buffers as well. + if (setArrowValidityVector) { + BitVectorHelper.setBit(vec.getValidityBuffer(), rowId); } } } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index c07d18aefb45..9911409a001a 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -77,7 +77,7 @@ interface VectorizedValuesReader { void readDoubles(int total, FieldVector vec, int rowId); /** Read `total` binary values into `vec` starting at `vec[rowId]` */ - void readBinary(int total, FieldVector vec, int rowId); + void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector); /** * Initialize the reader from a page. See {@link ValuesReader#initFromPage(int, From dbfd7bbbb8839093d72e99b5597f01bf53102247 Mon Sep 17 00:00:00 2001 From: Eric Maynard Date: Tue, 26 Aug 2025 10:32:27 -0700 Subject: [PATCH 38/47] lint --- .../vectorized/parquet/VectorizedPlainValuesReader.java | 6 +----- .../arrow/vectorized/parquet/VectorizedValuesReader.java | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java index e2d69177a544..56bcfe3ff8e2 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPlainValuesReader.java @@ -78,11 +78,7 @@ public void readDoubles(int total, FieldVector vec, int rowId) { } @Override - public void readBinary( - int total, - FieldVector vec, - int rowId, - boolean setArrowValidityVector) { + public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector) { int len = readInteger(); ByteBuffer buffer = readBinary(len).toByteBuffer(); // Calling setValueLengthSafe takes care of allocating a larger buffer if diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java index 9911409a001a..48f118f387e9 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedValuesReader.java @@ -77,7 +77,7 @@ interface VectorizedValuesReader { void readDoubles(int total, FieldVector vec, int rowId); /** Read `total` binary values into `vec` starting at `vec[rowId]` */ - void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector); + void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector); /** * Initialize the reader from a page. See {@link ValuesReader#initFromPage(int, From bfde527c31b8b068e2a158142035116630862d51 Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Mon, 1 Dec 2025 23:23:03 -0500 Subject: [PATCH 39/47] Finish vectorized support for DELTA_LENGTH_BYTE_ARRAY encoding --- .../VectorizedDeltaEncodedValuesReader.java | 33 +++++++++------- ...rizedDeltaLengthByteArrayValuesReader.java | 37 +++++++++--------- .../DELTA_LENGTH_BYTE_ARRAY/binary.parquet | Bin 9732 -> 9875 bytes .../DELTA_LENGTH_BYTE_ARRAY/string.parquet | Bin 0 -> 8478 bytes 4 files changed, 36 insertions(+), 34 deletions(-) rename {spark/v4.0/spark/src/test => parquet/src/testFixtures}/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary.parquet (78%) create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/string.parquet diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java index efd631557cdf..c803b58fb53b 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaEncodedValuesReader.java @@ -110,13 +110,13 @@ public short readShort() { @Override public int readInteger() { - readValues(1, null, 0, INT_SIZE, (f, i, v) -> intVal = (int) v); + readValues(1, 0, (i, v) -> intVal = (int) v); return intVal; } @Override public long readLong() { - readValues(1, null, 0, LONG_SIZE, (f, i, v) -> longVal = v); + readValues(1, 0, (i, v) -> longVal = v); return longVal; } @@ -134,12 +134,18 @@ public Binary readBinary(int len) { @Override public void readIntegers(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, INT_SIZE, (f, i, v) -> f.getDataBuffer().setInt(i, (int) v)); + readValues(total, rowId, (i, v) -> vec.getDataBuffer().setInt(((long) i) * INT_SIZE, (int) v)); + } + + public int[] readIntegers(int total, int rowId) { + int[] outputBuffer = new int[total]; + readValues(total, rowId, (i, v) -> outputBuffer[i] = (int) v); + return outputBuffer; } @Override public void readLongs(int total, FieldVector vec, int rowId) { - readValues(total, vec, rowId, LONG_SIZE, (f, i, v) -> f.getDataBuffer().setLong(i, v)); + readValues(total, rowId, (i, v) -> vec.getDataBuffer().setLong(((long) i) * LONG_SIZE, v)); } /** DELTA_BINARY_PACKED only supports INT32 and INT64 */ @@ -160,8 +166,7 @@ public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowVa throw new UnsupportedOperationException("readBinary is not supported"); } - private void readValues( - int total, FieldVector vec, int rowId, int typeWidth, IntegerOutputWriter outputWriter) { + private void readValues(int total, int rowId, IntegerOutputWriter outputWriter) { if (valuesRead + total > totalValueCount) { throw new ParquetDecodingException( "No more values to read. Total values read: " @@ -177,7 +182,7 @@ private void readValues( int currentRowId = rowId; // First value if (valuesRead == 0) { - outputWriter.write(vec, ((long) (currentRowId + valuesRead) * typeWidth), firstValue); + outputWriter.write(currentRowId + valuesRead, firstValue); lastValueRead = firstValue; currentRowId++; remaining--; @@ -186,7 +191,7 @@ private void readValues( while (remaining > 0) { int loadedRows; try { - loadedRows = loadMiniBlockToOutput(remaining, vec, currentRowId, typeWidth, outputWriter); + loadedRows = loadMiniBlockToOutput(remaining, currentRowId, outputWriter); } catch (IOException e) { throw new ParquetDecodingException("Error reading mini block.", e); } @@ -201,8 +206,7 @@ private void readValues( * * @return the number of values read into output */ - private int loadMiniBlockToOutput( - int remaining, FieldVector vec, int rowId, int typeWidth, IntegerOutputWriter outputWriter) + private int loadMiniBlockToOutput(int remaining, int rowId, IntegerOutputWriter outputWriter) throws IOException { // new block; read the block header @@ -223,7 +227,7 @@ private int loadMiniBlockToOutput( // calculate values from deltas unpacked for current block long outValue = lastValueRead + minDeltaInCurrentBlock + unpackedValuesBuffer[i]; lastValueRead = outValue; - outputWriter.write(vec, ((long) (rowId + valuesReadInMiniBlock) * typeWidth), outValue); + outputWriter.write(rowId + valuesReadInMiniBlock, outValue); remainingInBlock--; remainingInMiniBlock--; valuesReadInMiniBlock++; @@ -277,18 +281,17 @@ private void readBitWidthsForMiniBlocks() { } } - /** A functional interface to write long values to into a FieldVector */ + /** A functional interface to write long values to into a destination buffer */ @FunctionalInterface interface IntegerOutputWriter { /** * A functional interface that can be used to write a long value to a specified row in a - * FieldVector + * destination buffer * - * @param vec a FieldVector to write the value into * @param index The offset to write to * @param val value to write */ - void write(FieldVector vec, long index, long val); + void write(int index, long val); } } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index 3bfeb10bd7fe..935df4e6bd09 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -21,37 +21,38 @@ import java.io.EOFException; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.UUID; import java.util.function.IntUnaryOperator; +import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.iceberg.arrow.ArrowAllocation; -import org.apache.iceberg.io.CloseableGroup; import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.api.Binary; -public class VectorizedDeltaLengthByteArrayValuesReader - implements VectorizedValuesReader, AutoCloseable { +/** + * A {@link VectorizedValuesReader} implementation for the encoding type DELTA_LENGTH_BYTE_ARRAY. This + * is adapted from Spark's VectorizedDeltaLengthByteArrayReader. + * + * @see + * Parquet format encodings: DELTA_LENGTH_BYTE_ARRAY + */ +public class VectorizedDeltaLengthByteArrayValuesReader implements VectorizedValuesReader { private final VectorizedDeltaEncodedValuesReader lengthReader; - private final CloseableGroup closeables; private ByteBufferInputStream in; - private IntVector lengthsVector; + private int[] lengths; private ByteBuffer byteBuffer; VectorizedDeltaLengthByteArrayValuesReader() { lengthReader = new VectorizedDeltaEncodedValuesReader(); - closeables = new CloseableGroup(); } @Override public void initFromPage(int valueCount, ByteBufferInputStream inputStream) throws IOException { - lengthsVector = new IntVector("length-" + UUID.randomUUID(), ArrowAllocation.rootAllocator()); - closeables.addCloseable(lengthsVector); lengthReader.initFromPage(valueCount, inputStream); - lengthReader.readIntegers(lengthReader.getTotalValueCount(), lengthsVector, 0); + lengths = lengthReader.readIntegers(valueCount, 0); + this.in = inputStream.remainingStream(); } @@ -67,8 +68,11 @@ public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowVa total, vec, rowId, - x -> lengthsVector.get(x), - (f, i, v) -> f.getDataBuffer().setBytes(i, v)); + x -> lengths[x], + (f, i, v) -> + ((BaseVariableWidthVector) vec) + .setSafe( + (int) i, v.array(), v.position() + v.arrayOffset(), v.limit() - v.position())); } @SuppressWarnings("UnusedVariable") @@ -159,11 +163,6 @@ public void readDoubles(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readDoubles is not supported"); } - @Override - public void close() throws Exception { - closeables.close(); - } - /** A functional interface to write binary values into a FieldVector */ @FunctionalInterface interface BinaryOutputWriter { diff --git a/spark/v4.0/spark/src/test/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary.parquet b/parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary.parquet similarity index 78% rename from spark/v4.0/spark/src/test/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary.parquet rename to parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary.parquet index 959e99bd6228c983116d1f421627a83222480769..926d02835f77ca633adae4438c09a89e4f094ce4 100644 GIT binary patch delta 327 zcmZqine013LR!X*L4)o8M4jwgyCk?7+IGZsXSQT-)Jzo=WY{S2Q)RNds-^OUDU4!3 ztj1x3Q}N`5suJ}KAXQ=)_|-T-3<9vZVA9di z+tIPy(J9^0&CxO4(K#I~>g43;=nG;wJ34xTXdoX(19`3>p->P3WC7`PkbV!x5}-jq z5C~EY0j{oKrA}aRFcTsUvfC*;G9oAg#s_Kv%QHwQ6eJcEmZp~ICKnXwCKeUtmn)Q| f78Pga=P4K&=^5x5NXp1CFfhb3GB5-<1{neXTL51^ delta 179 zcmbR2+u}1pVx#d-6*EaeCP@iyhPEAX-I*=f8#PlUCD{H?)XBcJOO#1cj6qY4;Q~K{ zq>LDc=&UJ>VhjwDVlADF43l+KEm=VVvnFS$$|M67h%$)sh%zzQfTShaB*EGk#NJF{ z6jc&i1SGu}#6apV@T+lv7z`Q;1&Kw4rKu&l$prEC PBg4SJuoZ{{9D@u2@Cz<} diff --git a/parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/string.parquet b/parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/string.parquet new file mode 100644 index 0000000000000000000000000000000000000000..686bf91ff076a475742757842834c0b53863e005 GIT binary patch literal 8478 zcmZ`umh2F%BNXo8LT$it}D*Vi@)AGYxa z_@Qb6lef{tA@iJ!(*Kj6(VsEruD#bjKs86nJ=gc_v-a9+uX7=gXSu?klZoWNes+Jp zNq)SW98HcSM<%n0sXKie6SsnReDOshk@(d8{do7rFE?)f{@I<+Z@STYGWV#IuQ$v2 z=Rc)ZP8Y*==K1uS;j{B-=IrFbO4z;kYH>K9T3&i|)_(miw>jmeeDC&54 zl%3vq`=nal*cbxjeRF){B3$g=n%y3o-6%$@rRi#=Ts@s%UOJzic~qTkUEEtu4;D5r z@|n5+mfjVrBU82IwZZcHMz((M>lJ|Hr?(%xT`zBq)Jm0w=f7t17pM19o$SQdw^L8w z6l=dkrJ(ly*4)GTdbu*y&79^hl4tFcwZVAy`9fy-tPGINlcl^Pn6A%6XN8sYU~%H& z^xeH}C&+qwyOq88HGi^x5r!wTt&Q=|XR8y{v8m2f?REO%t5@muN7WKQ)@FZr7-i-b zyW^d+^yb=j^3In}i>ukiRFru;{ORKLlg*9wxlDd}Zn-dMzyIn##q!jLQrJ2xe)e?y zFVTY!04dBxL4Im$JhdA1%K4eM#c;DUT+GypgTwXdqi0L2XTfHa9BeF3FZYUP;qY~s zT&^EJo?BTeEk@(Lc`2t@fAXrC&km-uOXsCxzIr+~Xl~7Cdxh+3_113>R*M^z;c$BE zq|~U~UfLRt49X+UCpK#FIF!-kXvo0RJsk+FneqC&GI}zOWOQUi7>d$pv}{x{LLJBP zkk|u^ay%5uO^gdeaDdYTP2na%ZF6uz_Dit4a#>-)T1o=*iVS3Sm;HGBp-8vMo+(Z@ z9w;Eb)G^4Q(?PVSAKGrRX}_?c3c87lf>yVP7up|?!-b7%N)zlOtKPwometrnMRAky zMvIiEUzRSps{+)J#i|n;P+!{SuGpe&mhH;PqspC zKDHb>aPf%6d$$lvxsiqJ@MVhg&FGZN@~i2j<9fbKE@`?z^qBDYSkM%LeCuK#FRSG= zHyJBo0bCWI^vm>_;ed=m$g(YVz}RrbJAw{irS2vOXE0r3T?f9!2?Cx8CFN&lbC;cH zx7Dc%s+>@@og`^0lTlZmiokc1_3IMS35pPqv#$Md6W=G&1jSLVGKnZ!n3{xDFfWMF z5)pQIiR6x2O3D=BJQ;fyV*&=0R%Qv%9NG|+ z2lkg*Lpl{bqE1eGWFnKiU@IePtcke`o7508u{7x{TVEAWaMN%pPf5$NQt&6vx}tlO z1+Z=H3Qryp)9jGv5rwGVB$1CXso@2yhrVGIAchcb6%(Omb%sLG*qkY9zi*DBq|K(j z?#z*PgWW2*o9uZ+X1lv6*v@fJp;QLeZX2!12R^&I!*Nn~v9&Dm@X+4Ipbk2CL5?aC%a-F~#@qLlq^HGc3sf%lT4rI(XCwxmd_PcLKPw9bWcsUV z+@dO>&%KAN*-X1HgvIvu6^iZEjXu33M1!eIM}uv17oD>{_9;nt^-x*7om%OwdYPEmbn|6jOH}EBK z+5FvI8W&9YL1#vJzVgI>Mpp?M&$6UtTgN(bC{!dGn@tz=vdzwh1A%iexnZ)1S6$B~EJT5nUb5*PCMHH!<`~%*hD*&_pn7U-U zhhT3}-0>AsE(<9Y#7#oE-xnT|S=;9(E5VzTI;-%4?3V>W(yKM-NKl1nMn|G~*6MYD zN>a!}DfdCxw9@L(IIUYF8N!X~8zpU>w4W~WSnP-iguFv21l@-x$*RXt`9*eQoI!9@;@TQ2V&as?+ig4=qaSqVj5G<7q#c zp^korhICC|3pMir&{qAwD$F2e+$M9^r3gfrG5B4y7H=*OSR9>6b>wX%XG-J&z7|#i zf1;~>LhGV@dwA(v8j*={VQ5*a4Zb({7)_Tzs+zVNEy}YusgRIgCnt@27 zRHMOeKz2TCusk&@KZ1-ph75(0987s;T3lsYuhZddv2sEgEZr*UA_~1RkG9)nQl_BF zpJfGsl&f)U?Jk>FF)SuLI-&&o`?5=7Nw(*m?>%iSnOB#AH||*|`lrakSq)OJTM1nfB8X zW*1RKE?u(vkVk!Pna3=T4g&1ZMMV>E9O&8%#u+X#nK-LaZ9&r zq;!{8f^haGQfb^o=qNI-d(c4NVbV@L3qAxX1Qvm0K{>$1!BWN`{QTyAl(fHPVjX&t! h??$b|orC@Rj~{>c{deD|CS&dXc_Wc{=Kg2I{{Rh Date: Wed, 3 Dec 2025 19:24:54 -0500 Subject: [PATCH 40/47] Add vectorized reader support for DELTA_BYTE_ARRAY encoding --- LICENSE | 3 +- .../VectorizedDeltaByteArrayValuesReader.java | 171 ++++++++++++++++++ ...rizedDeltaLengthByteArrayValuesReader.java | 12 +- .../parquet/VectorizedPageIterator.java | 3 + .../encodings/DELTA_BYTE_ARRAY/binary.parquet | Bin 0 -> 9978 bytes .../encodings/DELTA_BYTE_ARRAY/string.parquet | Bin 0 -> 8887 bytes .../parquet/TestParquetVectorizedReads.java | 7 +- .../parquet/TestParquetVectorizedReads.java | 6 +- 8 files changed, 197 insertions(+), 5 deletions(-) create mode 100644 arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/binary.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/string.parquet diff --git a/LICENSE b/LICENSE index 5b7355e1c349..daf8cb9bceb6 100644 --- a/LICENSE +++ b/LICENSE @@ -290,6 +290,7 @@ This product includes code from Apache Spark. * Connector expressions. * implementation of VectorizedDeltaEncodedValuesReader * implementation of VectorizedDeltaLengthByteArrayValuesReader +* implementation of VectorizedDeltaByteArrayReader Copyright: 2011-2018 The Apache Software Foundation Home page: https://spark.apache.org/ @@ -337,4 +338,4 @@ This product includes code from Apache Flink. Copyright: 1999-2022 The Apache Software Foundation. Home page: https://flink.apache.org/ -License: https://www.apache.org/licenses/LICENSE-2.0 \ No newline at end of file +License: https://www.apache.org/licenses/LICENSE-2.0 diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java new file mode 100644 index 000000000000..0a41b6b5bea6 --- /dev/null +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow.vectorized.parquet; + +import java.io.IOException; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.FixedWidthVector; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.io.api.Binary; + +/** + * A {@link VectorizedValuesReader} implementation for the encoding type DELTA_BYTE_ARRAY. This is + * adapted from Spark's VectorizedDeltaByteArrayReader. + * + * @see + * Parquet format encodings: DELTA_BYTE_ARRAY + */ +public class VectorizedDeltaByteArrayValuesReader implements VectorizedValuesReader { + + private final VectorizedDeltaEncodedValuesReader prefixLengthReader; + private final VectorizedDeltaLengthByteArrayValuesReader suffixReader; + + private int[] prefixLengths; + private Binary previous; + + public VectorizedDeltaByteArrayValuesReader() { + prefixLengthReader = new VectorizedDeltaEncodedValuesReader(); + suffixReader = new VectorizedDeltaLengthByteArrayValuesReader(); + previous = Binary.EMPTY; + } + + @Override + public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { + prefixLengthReader.initFromPage(valueCount, in); + prefixLengths = prefixLengthReader.readIntegers(valueCount, 0); + suffixReader.initFromPage(valueCount, in); + } + + @Override + public Binary readBinary(int len) { + throw new UnsupportedOperationException(); + } + + @Override + public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector) { + if (vec instanceof BaseVariableWidthVector) { + BaseVariableWidthVector vector = (BaseVariableWidthVector) vec; + readValues(total, rowId, vector::setSafe); + } else if (vec instanceof FixedWidthVector) { + BaseFixedWidthVector vector = (BaseFixedWidthVector) vec; + readValues(total, rowId, (index, value) -> vector.setSafe(index, value, 0, value.length)); + } + } + + private void readValues(int total, int rowId, BinaryOutputWriter outputWriter) { + for (int i = 0; i < total; i++) { + int prefixLength = prefixLengths[rowId + i]; + Binary suffix = suffixReader.readBinaryForRow(rowId + i); + int length = prefixLength + suffix.length(); + + if (prefixLength != 0) { + byte[] out = new byte[length]; + System.arraycopy(previous.getBytesUnsafe(), 0, out, 0, prefixLength); + System.arraycopy(suffix.getBytesUnsafe(), 0, out, prefixLength, suffix.length()); + outputWriter.write(rowId + i, out); + previous = Binary.fromConstantByteArray(out); + } else { + outputWriter.write(rowId + i, suffix.getBytesUnsafe()); + previous = suffix; + } + } + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public boolean readBoolean() { + throw new UnsupportedOperationException("readBoolean is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public byte readByte() { + throw new UnsupportedOperationException("readByte is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public short readShort() { + throw new UnsupportedOperationException("readShort is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public int readInteger() { + throw new UnsupportedOperationException("readInteger is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public long readLong() { + throw new UnsupportedOperationException("readLong is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public float readFloat() { + throw new UnsupportedOperationException("readFloat is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public double readDouble() { + throw new UnsupportedOperationException("readDouble is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readIntegers is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readLongs is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readFloats is not supported"); + } + + /** DELTA_BYTE_ARRAY only supports BINARY */ + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readDoubles is not supported"); + } + + /** A functional interface to write binary values into a FieldVector */ + @FunctionalInterface + interface BinaryOutputWriter { + + /** + * A functional interface that can be used to write a binary value to a specified row + * + * @param index The offset to write to + * @param val value to write + */ + void write(int index, byte[] val); + } +} diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index 935df4e6bd09..601c64e97201 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -29,8 +29,8 @@ import org.apache.parquet.io.api.Binary; /** - * A {@link VectorizedValuesReader} implementation for the encoding type DELTA_LENGTH_BYTE_ARRAY. This - * is adapted from Spark's VectorizedDeltaLengthByteArrayReader. + * A {@link VectorizedValuesReader} implementation for the encoding type DELTA_LENGTH_BYTE_ARRAY. + * This is adapted from Spark's VectorizedDeltaLengthByteArrayReader. * * @see @@ -62,6 +62,14 @@ public Binary readBinary(int len) { return Binary.fromReusedByteBuffer(byteBuffer); } + Binary readBinaryForRow(int rowId) { + if (lengths[rowId] == 0) { + return Binary.EMPTY; + } + readValues(1, null, rowId, ignored -> lengths[rowId], (f, i, v) -> byteBuffer = v); + return Binary.fromReusedByteBuffer(byteBuffer); + } + @Override public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector) { readValues( diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index 578d743314a5..3c743b9ad0a2 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -103,6 +103,9 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i case DELTA_LENGTH_BYTE_ARRAY: valuesReader = new VectorizedDeltaLengthByteArrayValuesReader(); break; + case DELTA_BYTE_ARRAY: + valuesReader = new VectorizedDeltaByteArrayValuesReader(); + break; default: throw new UnsupportedOperationException( "Cannot support vectorized reads for column " diff --git a/parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/binary.parquet b/parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/binary.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6d01839cd4aab07184cd7431b4ea9ea26542a689 GIT binary patch literal 9978 zcmZ{KWl&u~wP?(XjH5-h;Q-5r9vySuvucY=lxm?Yo4shN58R#kUz zUA?>N{OIbll|)on0bqcx6!5=;0WgOOKmou2UQm1gDbBFol^Oj^>PFTh;@`^wA#6JOb~uk|0X0$&At0@3AsP8+eRVDzVFT*#D7U z*PDTxJ@9bMX8`+BwKz`-rHTpe1SGIfn(Komk>Zriadi{BJ<$BdnoIREE``~V8Ou|( z$k>~SG7QqAnkxt~m5J!rbel_{!tH6h%MyYANU6xy=4{xjqc=hvmsG71&H4wgSv~qR zia|HkaN6lr-#l=E)y=M>(NYr9r5R&@j1AczO;#2@=A7wNd^}^gdukKar*N?fT^>%6 zmwHN@DuBPok0-5CAy*wr!M8Q1jLc^LhUZ>L?ogmvZ9>6@P9*!4<0!Yhf`?r$|do5(N%Mq$duwZzT*to zi?-oh)J(?k3g+1bv1vcIiyL`5c^jtE^uh4 zKD5RJGq`&E$3h~dV9T18OM8e^0~J@;VH8xU`nV5+z*@d?EOvTg((o`B8PI(EQIhhG z6Ma9pN)3lDIEv|ECgg++j-^e6O3Oz1AE|rEcQc>T+z7Q~MA{T$WGtCGEef}ikrnrB`WSoy0J35adMXT6iK)id@k36uQcntrG54SEkZ8gs&KW zf|NL^=tHNKWH4Bj3hoJT;W>nvZuJ;&MtxFgxSy5sbwc!)4a1K~2sh%%k{5UPEC859VCryj!;-ZUJ2yfdQQSo796+Y`LZsMJ&q;<&6_1qKBT<9ShrC(Dft=# zZ>&Y;iz8E#QQ1PZk$ru%LDQUMn|R;bjQ5TKQ zi>9J}@v_Y6A=P2Gk?BM5#a_Jj6ZHgt{eS>(zAUP8lrm)iMp1_#@LR~Zx$u*x&K2y{ z*_`y7jTV(tCxKCOq%wDh8hDqDPv_CkvLfeSHWUiz#)2sy(d}(hkjozt{G6I%n=?UN z5Yr`hUO|*n(3EZhGFFPXLZ)RR8C9$ z&_C?Rs8Y(^p(-@wR%UZ}0+z?Fi#1~N7+tR&QyCJTEY%LfZP6b|bLRZd^D3sjqpp7y z6^xw`OqV_}Jq9GY$N6WaNxenZkaq8349Jw#ob}Zk|A>7ID$uLFo?@+K&!-keVZY4JJ0it$K}kD_T(1Udr|*k+u(jWGM%lG*k6qj65aHT zi01^?-TMV`He36D;t- z5~-W(yU+hT!(8-b>?g08W3>R)?){v};X*GxA)ll!TzGbSLj1~32$qmG`Pt#JK!sdI z7ZN8btR){0^<_t}{`Mq&^o*j?I(U7Ii#E2vu2bO4^+3tMHh<7=R!B7uh7+W$T>!VY}Vo5qlbQdU-t9jmYMHZ_{qnz(u>$EvVMOa7@ zvMIO|d*C6Y+$r1cGD;OtnBT-8FS~B)Ad=4E*rH8j! zYh9=w;PdYZL>5SfcUSt#NR>6n~yIrC!Dh<%`er~CirE-U>*B4**{@@%vt#gUiWJ&H9v{bQpMT1Uk zJtF9*^q_FL7hA8RE8<0B=e7F&bfwb%%jC+k^_3u~VxHNkXL59P);YA~BGMtgRgQSo zOY8-u<=!NpWY~rHe z?zlA98p7I&(>Fx4s_SPS>*lAyCy)JknX4eX80Z^Mc1PQM0VY_S4&>pp86H1hB&8b1O#54mlX8dv&Prw-|dw|c)_*+Qm5lURdL%DfH74#yXv z9esL4t)uA@3`2XAXcofqmWRmzeTylMPY6?I5RsqdG?4euonB9+$5s4l#F<<_RFt0SJ~di{mKcXvagh*`C*2 zGti@io_K<6%85HfI2ASc& z8e=fGH=)b4*)$StjRR?mkugEdT6sBuIBSuwI90gka zS`f%9_dHBqT?Cj8Fl}8k%&#s=LZ=)^-L>->8ceqQF@3xZIiJ-xqL{MUw-)YD>>M(H)D=0rB< zI;sgK<3Ip!lTv@S1T3LrLw;xB(1z`z9j0_CZfO?HPSJ9WB+(fd)_ z8yCD(I$%~LX6wRD1LDk1%QQj;Edr%+B}Q-fH`gPlicOOfDR%c7PZ$IXezEkJ4D(7h zw_-*P%~>xbEkAcoS9eI+T^1O7Zv@@bm1^uef^!GFbWXwrqatK^G9t8r*{hs;y=n#f z=d}j&SEeSR$NQfpLKM2-c$w_I$95rp^M$gwV=INHEFm`mruD*tfKW z`gQ)NHi+uZ|k*Gp+q@=2@zYwZF2Oc_mMZSfOc&S?aOfb(KK}o zGmrt%O9kopA)+XJ z4evxP{WLt)rP3zVFb8-J(n#j9&|#>W*K}bjYlvX2Rn-aCiF~l*A=ajLWB}G_m_54b zCJ^-F^4`&e%f7r(hRc2xDT8=&GucbS0QKIwYF?6cgMCl0uKVSGzONO(m6*DK+WbR0 zh&FMrK=Gml^hwK)AsowFg87_^_za%t%8w&kPr1Es>BwJnID_ZRrn&X?RMom3 z0Az=#-hob)itSIKYNh*1$7|x^Nxi66#V2D!Z6a_srs_h=fP$9nS9>FL$-V%i)lf)f z{7OEOd;)8$mhYVs0<#*<)~ilPt?12DPKT9L6>|!p)aD;VKaF_@MJ1oDVW=g3_K97w zusho%ugk-|*2vK|v^ER}Vy>1*2{+$5;RM!?-O=Ws3(Y!jaBbp>_6$%(H2dCvx zB@Gkd>;05sBvMGMRD-S`c!w82;SAW=84(Kf_;^M$E}%F9zFA$aK`tiX^PccCrdCAIf+z9;*4&dIp+u=P%I&(e^f2xJn>-L(e?kUNhc zp=hmi0`?|kaLsBf-AZVxHSIl(5>^j+f(^X{RwKof(A-!Z^~>5P)IcrSX)<;Rv^R@p z%4pKab=fVHg=r?0j-Dl(5N3)N0!xbe@<>@lyNAmH!7#0Nw8pYlwz`xoOvBa#? z=OfAc8(-ihXEA>-6B*M`n8H^2&FmYjipdz#f=Dg$VJeTRQb2zdKGkaqKu2u!dE9nm zJK1H%fL6`qurccpM~LeU@fOFXfI|t<8%9f(O7=o1k+NR(BlAFpkD;{nxcXG?oscll zz2gFgXaQMaY^}EJ6{H2GOVjmMvAl@z^oljq)Wl9?7?#{Q*( z(%4=pVprZgJ9pnxfccxdE+ zd0FUD`cTf~;40zRAX^%;cm>Y_A{vqA1om8z0tp;SR$Av<;WGY$om`S|om~(-lMkP- zI>_-`9{whJY2B7`l@#aB?*hpp5M{@Esc_y0@N=vvu*2*HYh1T^Xv8L*kkzah@rv1fF4C&qTcK-Z(vznhD}sXuQmY& z$x{R}>R1zvJzRWuJaS_J3+D(jt>XYf?8U6CmUD;Oc8yw$Q9d>k8m;K$)a3AK*kFX`4AW$juZN-TRRc_|`6 z)05Y>Uq723LzTb~E#a4{b5#=jv8<}4@Jmmsn4J`G9d?ZF?nbfwV@D{;rV-J~(LOGlsgG9wVCf{CMpa%g39pM%*TF;Ymc+3#lq738xJp(}!U z`@|j=)s8u%h)&UjgR)PL37JI@F5QvpKo48+y#K|UI2?FE=TTQKyt7=98q2lBOXNA^dWz;*wI#X8ZhO)%?>ehf+Jp@(m77v4TVb>jv_)3< zyo@5!Zka(EG8K*K&3@2+yb>I|Kb18mQz(*f92bTA=3ax`F1M|&AsA&>mwPW5{ycuZ zkCWjLe4jl*<@?b*vyafHuqsJHKy1b5_4H!bb%G!-AJ@R;vJh8ZeRUb78wUBwW^?ds~uj3le(UixukuEz0z41kuyLc``vZZJC|rI1do3j z$1q)II}7dPZ9jsA@jA%VvXPM<(6sG*{_f{X1Z&LWAnkI3ivcA&v#^p$y^Dw6trA#~ zUD`>yaJXRPfQlV~XDAhBz3DbohZhEzLCVUx2w7N zr7oMD#0VN*Csz0e7G_wyjn!&;nD6{aceue`d3eJwqI+O$P)La}%;_#4P%knraB~ z>s@c#0ZxiAs0Tk$eq*f^`wfm=&!Y}5iL0_bhypM2*MX2!lC}Cm4u`JEIdR?1vFWx(Vhl=d zrP&OjvrUF=Fv@-CGHVH+wu@}mbHC4*=tK0D->hHV#}6V8qI+ybn@Z(dvt+4ov7#zb z7Lq$%FLqf|V<9)?N8!9&%#zzdMn7&pl;{lhp_iv)Oc5_d;!?AfwzIxg<;>+>NoYdj zD{Fe7GwH@R+ENrIwQI!wlXW65v&%dU zbM+antGsO@m4|{NzNJi$*pGLDC701ZtNDpq@JVQ!*RqQ(pY5?fSNQS$i&3-3 zP9q2Ci-60--Au6ad(z+3X#TJ~w8oJdg?A$}O`NKCxWBqvzNIk&qHC$n3t1Q`zi<|B zP=7L1xClVYud!!$pWPzublRFqZdV(Ehj#Ib6~^SbX8Z#0bUF}?7?oaEw}wze#j!qp z%x%f-9!iAxOa^SsH9yxFWLJy;$hPkB>h-=_#J~i;avfWrjGY=^0$V)DwbH7BV{@Ge zIy2*TPG(3kD`L#Xuy-Aib<%-PYIejo-Zq?Ju~}L!CQQeYCA*>mcBK?|>i%Em*mX#+ zXeS+SzNG%L_Hz>Q`Mu?gIg8=>rG&Cc&&pIgmtO8EK|#8-qBh1*%K|jxeNpLJxPSEg zA}}-`|3%C2UOo3bs?|&GL09563_FsyV~Y{c3+sc~AWu2Ac^=}~}@ zJHmafE9AALz==N4His|Dc01^FS$43jUbIk)-(8hvn^KT?66c*(J1DVZ0=TI zcPw$R#V3nFS;GD|)KZ{Qa+NQ}h&k}EpXT?(>M%dvi^YP3O}33izgWA36A!;G(a2c( zo(nX(ZuDLs&`hL#G%Kb?zqos5)|GQE$WBe7W zr)oB=X2=?bk%)r2@A!NQqfRX8Ixy2KP(JP2MbJr~xn~+NYpXJWx=xKpL$!CJ&1+xA z#&_6sb&}2lJG1?gB3wAU9$=-J?l;P|T5J9`{CHs9_qlzt1Oc&vY9yMRm^2nt&YLzC zNX+$i3f~!w0zsEMEXplt=c*f3!o=``S)nw?5(#_Lxsxe~r@D5N7SR%+*@qRk?#ypQTQ`&vV*pGQ)W zhvyQ+{pM}0l;e5Kt`iCPWX}>L&9PwWTgDJ+^^pRaq$?c@UtspXhM65gb)h(ZEcR3C zT^5k)^2-+?y2k8Oei=`D4y6bMYE6OOUi)iFc^AeA$X<@4glW|xI~`CC+QhAXrUP} zPm0p8^0lueJ?4jR76yWaDWVYUVA~`(8OMH&CZOFl;r9_Dz8-^5p5m(?8YbFFm|z|Y zrU(k<^u`T=Tb>;ML?@#Ukhs(DC$R>5kg8>kMd7G|m0=WjzAo{JcEe1*|t_AJlemv`BbsxRnr;oy|$Nlnoro;09=6co$7VuW6X}na&H_#mtL5#F z^3gc<9~qW;XBAuZoyvro_(#>qB2&gB%og^`zLgIUqGMho7t|5+T2{0N&gb;JVT^8m z*?}0ic%S>8@bw2=xBg`DZZswtN8=aB%&IbTCjBbr8y(eOV$oB6kaE&M=8p_>YakUb zhUS735ZF%eAOi2dJhu1M&rvnbkTfAJr#qI(F}{Cpmk#zG!VgjPp-!;-oQ3U?l&C`v zC4AQTD-^5Ez zgpOqtc$&;k_CNw*G=r)r!NcG4HI&D<5|)!3(^VcaoS3{;@bEWpR9QJ5qIYyNOYGco zV%L$!*1Fg|cKl`Z%V`bm>)ZLLw>$!OHuI8qQW8WKsU+%W=R9_Bkx1$iP2y3`{>D}?}$2pYt;?ojhf(VfttZ?EJB_JX^PoUU@4tdxN9t; z%;oFKRfwQGc1DOBe;%*CU^Cl6QcJ7=|Dj+PNnD&lrnzrl)yi=L2h zAD-eSUZ$wgalRLCCnwn#8mV9&zf<}g3nqL`N|`amx0!DiK$F8$S69O=j3~bP5w3*` zmUkAylKA;NyB=J+HNn1!AB{z>GpRm=j)!%yo`Ml2UorYA=?KTK*IiLqQ*&%G!Cz~W zA4MO3S%$wIkujGf@AUQZpoyPRO)&y;gZ*hOcfxUzoYNx!?NT=9;3&n7&d~**g3*ag zQE@55>$|s7bw&!W;B!`Am1$*hLRpDgUc3G3m4-!tcO{TK_Ai>CGmaSqvqg^>7<$RwZms_uoV3-}#Clcvo9^O0@?W zCc&^4u-KI=Mc&oqit=}+^(wuFg)+wj|KBjt2ErLp0cwGg`m*!tNAb?qya(st2y` zuG*XIN|F+{_#tXak?@pss_$dCF18}WEB0>a`c*3O8Qowc1K`H+(W+h;U}r&oK46p3 zEl?eb=yl4%D$QLXKXHGGx0Ed?*Bg*Jfc)By%Om8^n*<%TQeNZP3FGAwgmko!q*|fM zAr45rmRVabB2VY*33W`X(YW^+MTfAD_3tiIGdZKyfT%RaErEyp>K`larelrZ^$$Mk zmB252eRGg*QwDxCKEGjBR^P?NiKL+uXO~3a<0)oED0v+XWXr*0l^V{_?FA0t z^wVcXP8mmoVtk49e!DJU11km?bsoTRtDl;ZxkYxa!KM>dT}*Z~GFZ_-;$cafFOfhB z*^&N`dws{Vfuz14*Y%=Hgto8fMXu=deUmc3g8$pH7P^G8t0GG6D3S+V=Nr#RJxR)s zHY6qufo4JgAv09r)A)j!W;Yp7us7}aCnu)us0J4qqWAavMI4+tcN zTX<}L@5R6443(b&#m#bcb&1gFGfgtQj&DDM_SU%he1nZjyUu%d(G&JdC9OFh4)o;^ z`02gGYV%Z`cS0Xa9(rm*4%Pjqt$rdE(gizA1%9wK$Acmm%C~7xBT%Rp&Wroc99w`V zR_8WPJBnw1tyOK|n02pVdLgc17`|y(Jb+lF81LCR2!uT1G zgPEg%^{Fqx#dy^{02` zU-BF(72IC|437{ONkm0OQIqdKMd0RxGyes<$X_V`n?*$AL_|DAL@h-mMMNw`#4P_= zMMXtK*FXAyt$#F$zb5s+(;w;2YWX)``Y+EPDE|%q463&6&=T82w!}2%-9~F z3&Fvb3kt*rLm`C_z&50!5^I28#TcS0aserXL~5iaY195`rK(c;Cw2i%MKK@H8rjKab~XP$5V+N9^EyfM=}&nt2k z6;*sG28m8S+8>)YQnm%}u^!#8uK6}dcZ3|}!e=JmkWXs&J-zc$vJfKI11 zS1iV2=hR65=fk|Bq9O`ZUs=B1}2=;>`OURbku)vwPN zY+iM(b>}-fuYa2Rtn0(v$WN|~nt)Dc&5D0~>D$q_?lsi+uQ)k${4eMKHn8cPwcp*l zuJ-DMhWdAhFYZf?4-Otp_MSUCvUmHDqkE5@d3fiaU4I?Eyz}$XE937E-{05vtGBy9 zpJkQktIMWdcs8=+8!f5c!fiFD{poPzdOy5%r2k=G|1Uon9X`@};?ggoudL48U9f-Q z#aEwi9eLwNYnGk(YE#Sj!b1(4&Qvxvx4d=k{{7^vKU|)rk}?k=Yjp5PVkCIbY)l3j zhV)3wweb+~cJavY2+)uyi9|{VA7VZk-PT~+b{bx5NGNlIozF)Y=nVBp3UlMAX$&l| zKO37XmL)8q=8_1yp9f_&m$)_lqe$bVa}pQLBw;%%#~cE z!|X1Y?5F~R%LS$wQD`o9u^fd9v#t~lNFY<9NN~3DP>yqAJVhE%;uJx}Z2s*Cnq3-g zZqk%6AqUTZWmgFed6^oaB!$+d(_ApukLUtPNr~B=20vNAPb82u6hbnIj$|o25b1z; z0%WN}NE0PY0pjx!0K^kl7L|3itC21fINrqO3Z*cMl1V0|XXYmJ<_7yrM#9O2Y)f%r zLKOrccM%;dAMn9Dm}-z1ex{fV2)%BBZkt$y1bt{Z@qooH6mP2qu>>1Xu!mhH3r&R% zx-`FtjyoaR&F$#(+IT>RqIaTrXm@5#jfz!-9-Je zMCmvR1K_*UxG&RWK2$dLeNdlLbCAqzqlyvct{1jW)CLx6Qm8TBuG%7)q#EEPhrI%VQ$xjjayoC*dsCX;f6sni1GhmMBB_5!f9wNZ(>Cp#sC6pm`u zQ5)Khth6gcKIkCo>FCfymCzCrnu519%-kph0{H#JnlJ?v?4(@SShPY~)0|xrT|K;7 zhqeea!Yl1{%mr{jhDaR*PNzUWm?R1rsaAn1lW2J`NKKzo7eo#+L@{t;0aLYLA{td3 zAzrp?6mR*+UreTG`{)T;qwI@3K7M6 z-bfIt4wS~FpQ1`FRr|uahG@}!Wn1~jN|@5|I+AG3levj#lqBWFeq#pE5?T_X_5bWU z5fa6b0dq!V@S|42!NeHBQb-XD3hBfbIDnb-lGc34=gIq!;}t@Wu1P~w1`GrR=BNc^ zOB8`!4rE=KIf+KjO0?e=Ch!Z7*d7!{G{TCd7s!_CV*UUZI0-hpba%sEzW@TB6eleZ zy3p%51H~&a5k#B>U^XRrB?J;q4r~^scA2+TqwPhuI74Ki>&FZlDvp{x#R;jUU?L;N z>YSxsY3Fhv!8dsuslD!0VTt#J&N2qIM(0rpHA*!`DXa^FLgAn>Cgd_N>g7SOfSxY! zq!sK`sZ{*fNvrdrVr2_aNpuK^_O`V_l>`niShlh8jQviXlW06E)oDXAta9N3N<~ju z8^A_R#I=|MXEG@!ni(qZ?&kO!Fr>X|6~XaVQcn-mMifbjwv+VT+$1G4$(^>dN;n8W z@V2*WAl>j(G6j)6P3qC!c7bF&v8>5{f?U*tQzLVyBw}l#ff1uj#BXck2wWB~9LQ7?l1=J(F09DYYfOtR~$eeAp&H5b*!D`t_#eIY89vc3IsgHiUUEfEiQBia-T=&T)7th*)DcqK2iW9I7)wdh;a`!!NpdZwVOG&Y@IycBi-|W=b>B zvXr6SNT_#2_5g1fDSxUt!{9^`gDh3?*lwV$-Ign;^YC|BlBK=5RFhMRifCw2B&vi! zB_qnDdO(o;HU=))tfCPwyLyDvgGcBvH-ruZ6l9Ewmm`Z?DEhZeC z{qh9k@z6ATN`exn>&exQ(rF#Oa3JO>dBjmtT>eO@)9*=p${icZ%s*DnpP1_^bCp#% z%1Vk|<G6VZT4R;l8XM{Ac0ccSce<-W?i#l{ca>TEc?%ql2J GOLDEN_FILE_ENCODINGS = - ImmutableList.of("PLAIN_DICTIONARY", "RLE_DICTIONARY", "DELTA_BINARY_PACKED"); + ImmutableList.of( + "PLAIN_DICTIONARY", + "RLE_DICTIONARY", + "DELTA_BINARY_PACKED", + "DELTA_LENGTH_BYTE_ARRAY", + "DELTA_BYTE_ARRAY"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index b09d56db9e3d..6b7ffe17880f 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -82,7 +82,11 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { private static final String PLAIN = "PLAIN"; private static final List GOLDEN_FILE_ENCODINGS = ImmutableList.of( - "PLAIN_DICTIONARY", "RLE_DICTIONARY", "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY"); + "PLAIN_DICTIONARY", + "RLE_DICTIONARY", + "DELTA_BINARY_PACKED", + "DELTA_LENGTH_BYTE_ARRAY", + "DELTA_BYTE_ARRAY"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), From 5d2cab6317c1452379c0b2747792cb4a4f368dc4 Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Thu, 4 Dec 2025 10:46:55 -0500 Subject: [PATCH 41/47] Add vectorized reader support for `BYTE_STREAM_SPLIT` parquet encoding --- LICENSE | 1 + ...VectorizedByteStreamSplitValuesReader.java | 173 ++++++++++++++++++ .../parquet/VectorizedPageIterator.java | 3 + .../BYTE_STREAM_SPLIT/double.parquet | Bin 0 -> 8459 bytes .../encodings/BYTE_STREAM_SPLIT/float.parquet | Bin 0 -> 4427 bytes .../resources/encodings/PLAIN/double.parquet | Bin 0 -> 8459 bytes .../encodings/PLAIN_DICTIONARY/double.parquet | Bin 0 -> 9744 bytes .../encodings/RLE_DICTIONARY/double.parquet | Bin 0 -> 9744 bytes .../parquet/TestParquetVectorizedReads.java | 6 +- .../parquet/TestParquetVectorizedReads.java | 6 +- 10 files changed, 185 insertions(+), 4 deletions(-) create mode 100644 arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedByteStreamSplitValuesReader.java create mode 100644 parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/double.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/float.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN/double.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/double.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/double.parquet diff --git a/LICENSE b/LICENSE index daf8cb9bceb6..5dfde249ee8d 100644 --- a/LICENSE +++ b/LICENSE @@ -229,6 +229,7 @@ This product includes code from Apache Parquet. * DynConstructors.java * IOUtil.java readFully and tests * ByteBufferInputStream implementations and tests +* ByteStreamSplitValuesReader.java Copyright: 2014-2017 The Apache Software Foundation. Home page: https://parquet.apache.org/ diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedByteStreamSplitValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedByteStreamSplitValuesReader.java new file mode 100644 index 000000000000..d879a098862e --- /dev/null +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedByteStreamSplitValuesReader.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow.vectorized.parquet; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.apache.arrow.vector.FieldVector; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.io.api.Binary; + +/** + * A {@link VectorizedValuesReader} implementation for the encoding type BYTE_STREAM_SPLIT. This is + * adapted from Parquet's ByteStreamSplitValuesReader. + * + * @see + * Parquet format encodings: BYTE_STREAM_SPLIT + */ +public class VectorizedByteStreamSplitValuesReader implements VectorizedValuesReader { + + private int totalBytesInStream; + private ByteBufferInputStream in; + private ByteBuffer decodedDataStream; + + public VectorizedByteStreamSplitValuesReader() {} + + @Override + public void initFromPage(int ignoredValueCount, ByteBufferInputStream inputStream) + throws IOException { + totalBytesInStream = inputStream.available(); + this.in = inputStream; + } + + @Override + public float readFloat() { + ensureDecodedBufferIsInitializedForElementSize(FLOAT_SIZE); + return decodedDataStream.getFloat(); + } + + @Override + public double readDouble() { + ensureDecodedBufferIsInitializedForElementSize(DOUBLE_SIZE); + return decodedDataStream.getDouble(); + } + + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + readValues( + FLOAT_SIZE, + total, + rowId, + offset -> vec.getDataBuffer().setFloat(offset, decodedDataStream.getFloat())); + } + + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + readValues( + DOUBLE_SIZE, + total, + rowId, + offset -> vec.getDataBuffer().setDouble(offset, decodedDataStream.getDouble())); + } + + private void ensureDecodedBufferIsInitializedForElementSize(int elementSizeInBytes) { + if (decodedDataStream == null) { + decodedDataStream = + decodeDataFromStream(totalBytesInStream / elementSizeInBytes, elementSizeInBytes); + } + } + + private void readValues(int elementSizeInBytes, int total, int rowId, OutputWriter outputWriter) { + ensureDecodedBufferIsInitializedForElementSize(elementSizeInBytes); + decodedDataStream.position(rowId * elementSizeInBytes); + for (int i = 0; i < total; i++) { + int offset = (rowId + i) * elementSizeInBytes; + outputWriter.writeToOutput(offset); + } + } + + @FunctionalInterface + interface OutputWriter { + void writeToOutput(int offset); + } + + private ByteBuffer decodeDataFromStream(int valuesCount, int elementSizeInBytes) { + ByteBuffer encoded; + try { + encoded = in.slice(totalBytesInStream).slice(); + } catch (EOFException e) { + throw new RuntimeException("Failed to read bytes from stream", e); + } + byte[] decoded = new byte[encoded.limit()]; + int destByteIndex = 0; + for (int srcValueIndex = 0; srcValueIndex < valuesCount; ++srcValueIndex) { + for (int stream = 0; stream < elementSizeInBytes; ++stream, ++destByteIndex) { + decoded[destByteIndex] = encoded.get(srcValueIndex + stream * valuesCount); + } + } + return ByteBuffer.wrap(decoded).order(ByteOrder.LITTLE_ENDIAN); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public boolean readBoolean() { + throw new UnsupportedOperationException("readBoolean is not supported"); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public byte readByte() { + throw new UnsupportedOperationException("readByte is not supported"); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public short readShort() { + throw new UnsupportedOperationException("readShort is not supported"); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public int readInteger() { + throw new UnsupportedOperationException("readInteger is not supported"); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public long readLong() { + throw new UnsupportedOperationException("readLong is not supported"); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public Binary readBinary(int len) { + throw new UnsupportedOperationException("readBinary is not supported"); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readIntegers is not supported"); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readLongs is not supported"); + } + + /** BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE */ + @Override + public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector) { + throw new UnsupportedOperationException("readBinary is not supported"); + } +} diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index 3c743b9ad0a2..4a72f1b98ebc 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -106,6 +106,9 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i case DELTA_BYTE_ARRAY: valuesReader = new VectorizedDeltaByteArrayValuesReader(); break; + case BYTE_STREAM_SPLIT: + valuesReader = new VectorizedByteStreamSplitValuesReader(); + break; default: throw new UnsupportedOperationException( "Cannot support vectorized reads for column " diff --git a/parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/double.parquet b/parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/double.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3e0edd2627abf81564851f5f3c4bf5dadb1b07d0 GIT binary patch literal 8459 zcmd6NS5#EZwk|oTBtde{f)bn1K-1lHf^O&>L^6_tfPf-NauCT`0Z~9eMS=<_ARs7` zB#B4{$tp>b3Uct@`#hb0kGuCh_u+n{R?Vt!rkbP18tY*$9E70GPsg7Way(S{`?>ge z`C0f`LMHp(QBby0vKAjS5?J& zr?a3J@f<79s(@a?Cvu20eNxS(I$Z)ugI;W7&I|W5ukpF{)fI`&hN-0(2-_o*?~2OU z2p0-e%BepU4je5@*5%ex+!uTvn$QfRo(X2uzI#h1YOz4K;$x!5qeR~I+ipm|ycczY z>(`^}4ljd3Z>--~4DcVgPCBghb&^QaYi-u0v%GwAt#H08C?x7zLlWaXh;*Y$HO;2A zJ>|34;3?CJ@!4tuDkK*AB)uy)U3`8Ei<`NU0rmvqa9pwa#luc^FsM%f_;L` zD^l#Xbd8Ys@05pzk27VOm5HL-h`spo^3?_RjG3d@@Jq zcAc5_V=&Hct%ypg=@R(9(;N9__{M|&^y0+TMy8ylxZ+sgrPwpecx!F@{``u^z58A7 zeRy4fh(l86%Xa%A+6jtjHy^#|+Yl-0kw$0S*@}ot6lKb0_pMc7 zkPEh7EN(JZ)tBX@i^vQlPy_)wF4e3*=lZiuo#&=BiG-sNQ*Ryp~s zwu+OP>H2P;GG=^A_=w_Mx@uKYHI=iB51i(8l(0OPyh`6!@P4Jsh-*|>Et}fIkRzqj zMO^2j2ei)JeMq!et!5g#tvg;2(a%cGsM)f*NyMN3>X8?|!SGU6 zx|3S&J|!$w{UG`%4lO^H8ws2?=F-0=>^=QLAcvtlKJMyS{kDYq5kCHv2J`Y_<$-5B zuliQfjl7bFMkMx(KIyOGxa>VkTiR7#^s+aTob5wVIRm>^KA}VEJ{5QA#Z~w1AGV@x7@_O@&?3UXZG1Cke3-b?e9{64B zyE7&nqcXc};Y%48cDm_Y^_xN~es?ZzZg7HWk<~aG*67El)0B{<(qpN9M}oHJ>?A&( zWiaM=P(M5YVDtQGm(F;$QbDb8$_Vd=gHorPfi%$Zt;IMwAjv*z4cG@1-94S1fAV83dl%Do=G~j6UrP{Mc<)@8fjcjpDn~;GKgQ9n(-&g)P+~{Lj6m&X}uUmw_ z_>or<;~nh6k$Syui?_4XTAb6vvy=A*4+kJO<4OzM;a2)G?>9H4vKwp@5ZzM*5o~V8 zt%{A_M%}#IuLrpEtyLV&f!Z0P=hhX%bFsRztjHE-=~TDyIFNmO?n|gr1HE>)v1vx3 zhei8qrbYAK*-n0mb`13#D5Na)=uMsH&l@~ipORyHR02;~ex3R5wzRdB6Q5#v{X~ml zL783iLBMMMllr9;byK$Zr%A-ny+v;(odfV#@Iqnh_ly0zTpUY>W>hOZXVo;t-wXUg zlc?Dfx}ofh@iFuzcR%ZE3M1=mK_x~vrfYsvO039^9$Ks=D)_zr?$Y6w@%x13MGWyq#Rz>DIMbK4D0@$b2V zm0c%NDs?;dy6V}<1>vd+&B&6OdnbasREr`%%hxDyHE&jJzt*r`HkP_b8)pNebZ0v3Kh-^sL|$&cRVc%P=?IS;S}hST zOJ=o~ZGJC4;;CW0XHH(=91OdA-SEfmpv3^^uh*NmQz(aUdWllw(P!Jt9?chKpV>8m z%xs`fOF~aL7Ggdvx-o^WqH-KrM6V%N4jL+^c{O^Zv@d2~D)^|^`RtdhA-|Lm>}SWh zz~`x@%5Jd8Ub<{Yt-A}IUUx8w6SuwHyMR_141M?YUtiZ{_Y$(Z9u5q1 z@pz6J_wYHHfHZhzJRbKmWjYAYGhcV7=c-gh^6a33PF*B&dr)JsBUVmdj#vB+D0x3R zwI(}b0H3uy?saNZ(T3U1mHrjDlN!jn-F$-6?VZsS3+1Ct5M^D~ieb9fWS!B$Rp+Gq zAwH#GG_>Fe&y1t#XKxCH z{kB*4Qu1@s%|Mv0ArP6hlzLAsgL!k$6-4tAmILt!))?*4gy>re zRws-sg;(O|)#!=lbqU%BMC`%s#p`-$arh+y@4Yv1X>#WNu-*A@wAvf^&OMT}*=ZBmov*sypZPfgiyyApNxrZ}Uuf z9;Tn(vbYo<(`3g}IH0|q_Q^C0S;_t8`^g_kH8Y;KpJ$8Mwp?x-FmE|>8tUg%b;*r* zQvLHgZC&QK&I5tr&0j9w-&+ssi1rQBjo&vv!Ur2J_J8TWKTxZ1@^|Z0Ktonv%c<7y zGrWnNwzk(36@G9$Sh}%ye$02&OgX2wqK7PGJB@?ccnhX)KA4jBE18r; zyyVdSAXmE4+!PD{uq?((>WRBBb?s0Q5C2WT~FvU0FhHjEFVV8R0uscH z+sirG`ODsP74H7SfRmo(pz^MXC&rociwpGl`40cxWWbbfQ(jDFL|B8&PLIWU-#h-_ zh_M-!>mXWYHmCNN!z_poOfXLgQNQynzoVs$P1xCnF+C=~Z8VZkZTM~NY1mwUfXiK& zZOZ%7j*m?KLFv0!3g11=nyA()T=|h2pz^ghE%@91x5VNssXVls1lqaYgZuW}Q-DR3 z#?=mnl~M)eY5Dsr%Ieg~I0p9sKDX3JFR$r9RK43BSX`Z#jGXG*BeTO2felM*-|cMnjEDmSoL~PA|`tK)In4)1A6>t?r{j z-e)JQtdKAJYfF2A+_HJUgg)HU1KIY`HWyUP34sDpU5#Q4_0C}jC6Q^+All^0S9qtW zM4nL66|~pAD9#_d_E>;E_0_5H$_2;YoagU^-k;)rw`$imA#$rys^qzpizSDi#bgYy zM48XA*6sb#g>X^&rZ0ia*VFXvnRe&0*6-gOslSa7D$8jMk*YQW-UzMOi{p~ZnW1m+ zs;%~)y11W!j*uK`&>MyCkWq`<`A_mF3 zyj{hbkw|j?E@p$GEwSu4OrZPeiV<9d+9x1bE73yDEL&Ze_M$cuR1&XFvEa zAUJ~%dP=`@e-?yYIQm$-9(VcaH+dh)s?siu6{3v)_{nJM+^|8!-{`!vSMitZ&ggeCh8sq6xb`C$uC#hWk2ioabx z+uOib&R=CrQ0@UjELG%c*JjgV$(F2Oc4~ZaLfOz!^qZ>tn|T3Nq41ScgP(SqAAf>D z-HHWq{#e_w&jZfS`zD=B-n2-+_$f(F?S~}i$7TjkjY$12GGx~9h#ci<{bETQfBVk+ zOHZ->?+AzJ)xC`dt6tz0Z5zl5gYNjEujGag?gmAS^{?joXTpD#`f;K>yLgvAuH~*v zaAmO1i|Krbe#EQby-SU4_#QK&VuE~G3w+8yUaMVQbf1`tIh@MXGs3u+fQRBpbJeG zc1s5)3f^qqyp>g8Z6iHde*DqODc5XKcboYR%e^L*m{KL`dseBirKRtTu-RHgq-re> z@s8noaveL?yu{$ex$?!sy9Hai4GPzOmwWiP-3T7QtKJ&0<~aq3+17ZZKTu{F4-NEt zY&k|P&JVtlc9!ih`Re+%ZsookIMiZ2rCNNss-o+Qe}I*#g<8t5&~d-MRsOiL-?w<5 z=3M~ux1Lz!y{80nNl$LC3JQcrUsjEIB zI?B&SUm(tY*OF`CtMr;1rwG$Ri_}CcY3u&pgFsW?s;h!k@=v!6jl~A*1V9G>1@6iY zMu^fW&Y6fyZu_&cNiGsj0N0@NVd6WxY|Q~hSEURC>o&NJ7=P%RGj!`|gialqDa_7k z`vgj~j0@xj&0S0u4@;rwZUEe>WCC?=5Et}*R;pSAF|*+%k_|DQ;3u999n#N75nW)( z?-okMZBmXO%qZM|`*zjn;^maB7BrHd4eqhBGqR^cla2)(NGrc>n^EJmA_dB|GpL>Z{)&^qD@r`qm{q7b?fP4%i4We^ zg%hMtUdst+$rBlcA$xW1>-y4(Y{q#(4dSlllM=chfjf(|v5?>hTf;$vj{8gJy@z*M zG*U(180c>ZFo?|cFv=t`?luizeOuySbz0Em$NDJ|>WkY!RUK;_#%dfE{seGH2$hc@@3KF?nKVW5B+Vspq)^OELA#!t@m;36SY~b?f!R9o)iK~aq57b`wVsPHfA zYg2@eCxsdB(|J7N<5A7zeVGIFKe{g}s6QB;7QI|N*NE;kyEVJdaD(8wYUUs~G6@6D z85x}jFzA-^V-#E)PNb@;o&&une#>k8mN1o1>e^UEYW8#*MtrYv)ASPqQhziajf(L) zEmhupBb2s$)xL9bGq{`w0rcHp+Xy^Ax@OZW;>x$=hlsLf;`mg_f^B>DmTPy+6CwJ3joB!zjxDEIhvNcelj^Sef3$}>hwGn&_y+!&Z0ljiS*P%dWt$lD4SD$TSd`%E%Iw~?{;<` z>g9WIu|)9Sr9T5xD*vg|2_GJFrID2DBl^z zKJ0cI&Y1_Rd{WRG0CHoDnrmCbx!zV;VgD8TK8Oc8&V94;g|;;|4LW?SzC`BhL1Mf1 z;q8b^n!nk;xr?XxGe@kYz7cfntO`A{HWINei(TB`=_qOQe>)Jck`UAilBy5(ajAE5 zYJLO8L$bCO&U^t3vRBn~w3rnAGJJi>EBob_hB-NIivRC4<$d}`k{8OW?WEpS(idns zxXUn2WsOiie6KQBIszNp3SSkdl_N*c$yRE^0VoE18m8I&;=Ug#%7Uf}0 zSw@Z(Ej}UC!6Hg{(-kEM;6<&ERC(K(75LG`0QDt56Ozx`ql7!feS^HAym+Ed3Dht5;acS532tL$x&`UGkH1tqJw`yk%?J$RCIg3SVZ#}BW9E<%z8Qy z_ze$2n+MTV>>a$Xw@zYSe^^uXq+j(yrsEQ&Nh}H7f0Wg6z&}{X=@`c=7Q2yi25Gyc z!yxs=K&d{bX&~Cy&lfG1UqBeiE7-TlA3m|ke0|3?Q*oZ-Czd7RNs8CLNy5nMngRi( z@^v;|d8He|Vduq6+Tp`ZgMGl{@P~HHBD}LQCOM6|Zrf#-i@eWERld$B-A-C+?!D^# zP9;Y9wLO+eUz>oVNWobFCd`^Rw6+;(o`i>}Qo+(PWOEY1lm#*>NK>_$KUm28AFrXYa27@!%&7(g?T=9R+|;X=kVQyO`TfHwu= zAUL{S@TrlT#2 z)uxa|b#Q>QR1A_x(v*S2kpvt~1_mbUY9iqJx>N)i%||2AsAM%1044yX0f`7Wm4?Qj zH>6?EgfnI)WL27^h8a#rfud_>4l^^QYTMAT-`1e0m{xZ~=n3N&>ZP!3KoGB+{C=;1r4rg#DlDuM$Wk?|l93}&bV zr{XB)Aat<-9D_q507wc^P0|DlCJ}JjnvA+gvKS1jiAU+;W(h<&xH{E@PnV1!;b>-5 ziaFkdgr*RXdb(JmDOO+HK;H z0zsyMu;v7+767NJEbv@z9$Ox7pDs3s%>Lo*ucoGAp2Mu0W&7#$)H0Z%fCT3(Z!W2P(;m#vz zU_6dYCL0^!3FMO^SSpPGm%)+|cnUAk5PHrOLsXL>o9j|kka~J+BnnOy0WrhE^~jpW zC>;!jW=O;7WAKF2<|v8*(Hw%s;s`hrmVyHzFa&8sg8Ergnkj{(M?#SZUe?};`lCMg!3>N6pYFNJ$~`P{}V?4lltE= z|0-p z?O4(Yi3*;C5C~W*@E_+9d_=GM2b#{1W8nWx$3R0MS0PYm2m%6eh8X{)fd zF$7}rM;`D0TORM=e<<>w>3DT4o&Wfw{=_*3{*UoL0Ehi$3H^)zYxxiUPim<91scKS zc!B(<|FQfF$0`4UA)TP4x1Fz#zoXwd2XF6lcD}xsuSy0u`d)Fp>?Nt8abE4bnvkgI Pam@krbac4m-yr@AObT5v literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/float.parquet b/parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/float.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8ba32a302f0890b788364c990345b39a2fc75139 GIT binary patch literal 4427 zcmaJ_c_38l8y?$mMU!P@n5-ePB~r4?$dHncm@eWK2rw%;>ZaEk`)O@!qJ?x ztBa$moV0S8gNhumQ~z19u)tt28sM2gP;vG;`RkU_$C&IX6vBQ@Z%P*4-FN+(l7mAF ze7mZfvXqwHo`c1 zG-`o=uK?XFV52g>xc&)#;_*=1(;zHcl!b!e&hKFrcMc}&A5zt`HVS!PUMBnytm5s) z+%>0}9_3HF%XPr8NcGIi=*qhYpY@{aA zq+H>;{_^qVFdWCUQ>XK&6?>&X?se~POXBm~Wt+;&Wq+8juyfVeRf~t+^_Xcj6f>b4 z2@V%q@(Cuq(-982>otSJzu&=_Oakexz2g9RW(Ahi9*i`;fQT#s#*a)K|PGEIFH%D=GPT{+Zu)oKQ#wG)?8Chu70s-Ik{N3 zmag(acTl2qYdIr7MX9svxpTYfQPJ6jkFPZq`p2Z~C+=V!9(GkpdpwzgOCp(mOovf3 z{eHB9)N5J2VWIb#>cWD(?byobQbD|~Ze}_&tcL!`q;gIbKMRMiiMw}TH&$GVc% z7ThW^__GVqTt1pF{W-#pBTv1wc&o|(%!QWAcbI2w4mMD&eO)}2ru;4r_ui@Qx_kH6 zz9}K)mA*+5kQlxu$o4_*>-m6&fn(DPV;!Mhd_7(x=6jJ038HE$LUA00S2SYNetlm| zwv}%)Xyu?6sA8W%_*74-J5&unJ-Y1O{OV;2@|`IfReW-@i7^sUM2q_|XmnO#Roi2B zmuWTc9hFPQ?;kA>SGGOdP!d~G*Zc#CT8Q00mxkvV;J5k55-&==53@0y>?z@@2+v^k zR4cGF2zLI+?nIgz;v-71311P)mE)wBRj?>?u65#RI~%dfgIm~&u+oy;%8z9{?L&anrir<@9P ze6IHN@8-Rnc+E$~5e@I2ljOfAS!JS`&0Nx+?@9cPQe;_W%NJ^4apyjlyYSZ>Un;#- z+M7C&?efi^ZGU|$7(i<${rq_Rx&G&UHAU`meZ;^cT~FO-uUKt|6MAnHT&OUd%?B#x<_nB$~S5TGcgq6BZS*y_x=b zyntR8ua(Hu9n@qj*_hJrb+431=Ad5x(-QB&)ZE+TF9~ho$G7e3;z9yk*I-O&#~AL; z!IJDc?@YaC15)@MeCo&d8dG z9^lM=u9fXoGhDd64%6v2pVY5x=T1tXiuR*39{g*L@C1JZ{kE)W;xS92A-7Oh#U_fN z^(#+J>ZmJ2;MB}lgTH%k&J@*L{AN;|h~M<%6+L!zKF9L95pdcch>toVLy8*|PW1X4)DoPw!)WP^z5h(K z9=axe$x>Q!LbUD7Po@tKdh#7|g50kiYzIdcrwpene(5luts`K5b?-%=u9aPie4hvJ zTVTp_>Y*9*fn}U#_aEjek%?(tbiZzPlg{dnE`5{Si0!8+!NE%J8?YQ5?=JrVcB-lw z3!M+Q+!k)*Qf5Dl??`n&xc74$k7E2FkY6BP-e~+8+p)TG+hz3+TC0t*)UKVd4@acs`es>QSZ$3@!#^_zffBnIsA5k$L`@%U!8sNE~q5X*y0y=%GM>#Fy{qP z0hO#SWK(t|h9-Q~muG0kER2@4Qp%H7@7^hB+m=|^&X$bxBRC>`7DRV5ggdJ`T4mMb zx`(1GvotH)Y3si0wYd569w%kO%Zl!8defcb6*8%9YiCqo#ALL|-g;mt+iEkC?4%hj zV%GZoSb_6mNJC|Hv~J47etCCGp;`u$(G0E69=kI^-<_5VOSFE(THMNnS zB6rQeSQ^8pVU>6LWEx%VR!=r^?&X6BL%||}he4I8by0#67L8-=1$VU%JcQ|qb}5ZG z%U(Qh@!&AtyQa9JpQk&5! zF$xkaSR`vVNIiqO`PS#^Y*mMw6p3kKSj5$tOtZvL{%7@;krk(gt{FDRC(Oldl(k+{ zEOpd;HFHw{r%{eC7fZ6fG1dH{KHcMyS7?lSl0$2y2tSITcPg7XrY|Fd_Lw(eXr8<$b^mzo*1re0Q{`kU_CZv%R?;S zyrrE|+YfV0V)`bb{rupmZlY3FEvf3B8K6hWV_A7L1*&%B1}>Lk7y^YW9IWxQMuz>KR3ji$0`CpWss5HS0k;! zy!pO*Gquz8?(3D-Vf<++6gpP3zORRPZyez&GDK?V_EKo}ACAc!8Smihu8eH=tp2p7dR|Yt zF{Q7%*0z`?s?#Fl$gdsIzHlzRu~lxf^EU{Z$O;(A3WCllQt3;|X&;Ut*F8v&qg%j2*W>r&rgH48{(~ zWJT(`=b%nZF_%%XUn(U$iJOuMo4+{PEhZ>ijWaAPPx#bONkfQh#~IGo_4a#fiKlnB zh}4Al^X9&i9{Mf_Yr?l>RTpX2;ZoRaY892t2f{K->R9sq3363Nxhug$gAM)G_JM#* z;(5%$MNciC_z`idqGq*-r#NToSvR7@-q1PE%nPe=Xdab8FZ_B`LQZXOeoH7GlO^IZ zaB|q*ZbK6jSe3+Rl^@(qy{c!-)WAe*RJM=cff+rtD~eAjQUweP4yqW6g8@8p1dGK2 z9xwm~djEiw4>@26GAzgjkyr{Fz^gz@G8A%w7yyHWKsnF{%0jx33k;A31R{qITLES$ zr9xqYD&X%QgIWb@LQxF4XG#)6{ih6NLx*HwKCA+jfEom~Rlt70hukR8feHwKLZI}a z0EG{cp>qI}|7HOg76^k?At8u%xO)f*o;Y}Zo> z0w2&QErA#c0fImbFi$1`BiJmZVQ5F-1%k*R4{#_skOLaXgQkDq2^gXj0l<_Ch*cuv zptlDx02uUg0Vp^_F-Q-pL#YPVBOml309aC5z^W=y>HrL|D7b=LsQTfJ`BxZ3fw$lv zBi|&53id$Bfomm-5+n&Sz#ym%z9O=bY2*U|gvd!U5EKJoUK z4+pG(s-h?yB@BEIfWcrOa{?jE0L(BHnv)4@XsBnZK(5bOOC<%`Stqc418@NB3KkoP zRdvE@V6jfvOHc~j2UZ76UczE8gEO!`oPm`(7%~P2zyoF{kgo;m0A>J!$)>IbAyuI` zdAqkT1Z&$JfQvgHJ|A VQd&|Pg+>E=cBF#A^nuSP{{tAvM#}&I literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN/double.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN/double.parquet new file mode 100644 index 0000000000000000000000000000000000000000..edd614c66af653260bade9a3b7d1300c820ccfc1 GIT binary patch literal 8459 zcmb7qc|2877q$$AN{GyvNhz7v5b`@loW-KWGG{UCKQ#ViHcN|B+;NU zCu0Kj6;e1uIU!RvB zLFbpK)$KD!)PdRoUAhr?=O(s=1k+KMQW4hokqS}gDV}exY<$#sSZ1?P89E($n+v3E zp-|uzxvPBvo+XUDQtmcHubH)BnvElJdleUDnF+wkddF5$zdqJ4xms#Prh;bfO#;~& z!n+ho?H3whRCczhAj%at!i>8-ESRY1J#eM*=>Xa6wxTsjorzGX${eW(8f5eL3>ffH zu#nepC{0=i`A;7VYK?KgIJj|M@GGD%C61Pxrh=B8KL`3as!(3JwBfrB3$^CwBd_YX zq2xjQ_rldo7^muFIFRUQs(Nf#_1pRc!6u%fN@ z!q^@&%zS??SXOERY0u@d31Tj|xxGD5$Ic9~v0J^bnYkkMr5>MHI2*OP)<+pL*09PWYI-bAou{@yzOSGmNWdKG&R2 z#qj`k$u4&dge;zI_H+HnOva#E8>J;rE3kNs?Pjuc|;+EBx>xU=ZF-xhB^AY4n79%ALm!*s54L-7<;VfvKCCH zKRTa$Xb&C(hmbvcov|T3UvbBNGi*nn5P)bKB_goeBnsdGx1$d=(jc*HvGi>f<8uZkeG8;YYss_Ii{L+oIY>JaMMa z5$XAQdLjBNz}Mfr6T6IX_PlaK>@YyFGvf6KafJ%+`q6P-W8CGP|?^U0gjq!o0rW;vJ zMQEBOCu)l!UPMTCdq%Cq;t}N(r;QvqeU;($IP8M5r#73{p0!7(&#$P=lMK8sZjkt` z!^G7$t3zUi^+Deooz>~53WvU%2@!{FvG-wCXv=C_B&(K$eq3h{EB2~hNy4tLJPe6W zc}>O12^)tC$GB+vlu-4zoam?aEab4V4Jsyi9*h3~Ttf03t`q(#cbUJ&v2GJMC>Y4D z6VOMQ)9GG>+MzOg@V+GBuRjJgM&9o*N54nt!G*s~@Y3zX`XLDpMqazT6klNn{zqxv zXH!@RKHv3QE72L8h~w8u77_^F73LrGn~R{9&G|=%2tRtFY3ET>E{rY;;DD^B z)OlkTHYR1qW}aD#AZ^|^1`ZC$f8XLV=52`lk#n!8i)}FKVgAZGLJIBS4;4Bmt>DAn z#rx|F7som!9_k$+{D7>1-Qr^mNCXIOmaDTv-yzwtSPfh32^2JaP5Arm6)#e*_j9r5 zuV2yYC(dwV+b(aNrlE{`Z!&*+4KjpxN&(TBu-7P{p>RfRoVmo=mdtyCGEFzm6Fvc?D)R(%yG?7$ef47~& z#@9ker{fAq-CqHPs!nJV zg=F~UHaCtvrK7`Qr%4FE0$z3~^N4)3gS3DAeYx*kX#L69%uo2e;Hvj;2VE6V^Xi36 z9n}!QU7BIa#QWzpG+x=ow*9N#JXb7?vdKKkO$ zk2g%j^Xavo&!xeA<;f zC;inDi9O=t6**J{?pyvzBbtM$>=UuqJ6FO*i*#{PQWJZ3X8f=WQb*LS8B^<0XWT73 zKvJ+U$MC!?pBI*H=y$Dbx%HKX;@Lc|)CL1cm1a+nZn{Ity>0N1zZtBa(zFe?I$$O# z*G-eK=ac6~=%Sh|oZ8$OT^^}|UsIt{4TQb*EM1()chMYmK5v^SSKGSZ3I{d_f1LJd?v17-)xJ`bxDC)oWCuCxb+*^X;8R_L{ zYi&$;zmyFciE~7)fDOgsXsdL4XKeFc5EInRguU<-S5e6oX=iJ@0}59o;QGQ*!@XLF z?-%(T{E~z%yFzSZqS<(Q|4HMTVFq@6B+JJU`w@%Wqqv;V&p$c;*z`O@ywzdLeE((w zu|sdt1vAv3HsZabb{_*P8)BTJWZ5WEWrv=j*mh`?K+4HxVWp7cSb*dyIvW6piwyME{=9*+ElcUu+Pd`RiR_V4Y3o-`GNR zH2*3T>3PNi_3+y@b5SfTJ{x<&m1Km;2ZgbkAxtcbxN`12pFIL@QF{6lNthOSCu-Es zz>J6ElkE?T5c!VhWAqp=m{Kvmid20(e>nS~E*S7SXx!C#oCTNCGC3)2E{4iB9xfO) zheMKad5jPhg=S>s{@>PU`SPH6eKHq++G#g4h`29TB1(CA3Lj7Ae+aNRLcy9G_8&ob29dF3AMAM_E=TsuKAoXGR&`kkbe4}QxGvYaz-hZ>| z^%xb;OjaGA%3>oStHmzwksBU`EtR23lkjSK`a-_HGyaD>&gUrIovpAyt5NXYtXNwd zy^yNhGUtM#<}+6nyr@X)v~qn%w#P@FpJv5!cDQ?g`5o*c1fQAY4e;e4qc)rXPzsd`!}AN=y0rDtMH&?@wzq4vH# zPBv*+(n&0oF&daBlofEfO~~zSl?|SW%jnPTamUQ{-Sw&4_0hEE<+HLN8lG>DlUq>b zfbC}7Ej$4x5SN)B-Vi`Vkf&%dwe~&P%%fFL`w<;>-UFAM!NBZ?NIqMdI+*HSi+U-x zXw+CdU74$kJGWgje@}97)_!=ZbI=iXPkUZ(SSEqc;hk@zKbgRC*?wCOLl&-bB;{|u zu*3Mxm(!v`OgtYBS|3+I#gOW~zwwn^kS?3AXlihUf&QNX)NvrR`O4qU zxZ(au$4HOgH1IuHDWzrUh<7oP$wd#DFp7H8@?euQQW>Z2D~GbNLN9Bne;yr2e3F!qOM&&QRIz0{9XeD;MKLxD<~bHNQ?EHt)Aanh>zfTk zZY;bcQ$Yt$6WJry%^68?8zj$axgm!oyRm7p5ehbqp31GXqk{DABCS<*6%6ZDqG}}V zv6_cBPF+C)B@dsbCQn)6iqMtzeJc%N%CmuWngiJ0?5*orz`%&%-o{p<{n&Tu!LDXH zcH`|n_xs)jzDlSSZkgW1H%GQ&FJ)&Ec;kPZga7@Z#j$Y|2oElk z{0u6jUhOp-NM@khpzfuaGZhmrc9lzY(oy(wuq&a41DllDm1oEG;i0S2@zRwCf)(H2 z|BPOPEluqi2_!8{$N%;^c$W(M;7du*^<2;{M-3TxYKR+uF9jXlL&F^f&9~~>3^;X) zj|Dtli+ev0L{eq2t|o0HWehZp9w-9EV*uDVUIbP%U?)|ZV<8P&=CD*1wL}<_6O7RF~pmwBxA?I@CxY)867>G z2-{X-V`PXU<>@I(lMEKnGYW~dKJalL~?E3JOi1#>FeLpZoM^En_&JZzQjOYUQ$Ldr# zNjzJZkm`WVPZt|CykH(pSV2`ZfA=O_x!>?#dPEj>U~y91qA0(5856f{FW|jFjC4K z53AlQkE@!3Y$Dj2t-Jz#brV}wSm}cqc4Sz8R1@KWSA~~6bwZqSV+5ZF1D<08^V$>i zFqUHO6MoJRqyV$A8bb#JTzn(SRiGn#B_-bGqNpb4>a@`3#O2lq@g_>im@M7zp z^<1ple6;j$nKKUCF1@g88^H^CPG-eZw9szu@lxX^i`ZW_xseY!VCwlnzMwV&-%TW) z6}avK_C(Ey{VscGDEqG9nQw=Sd17@pw&_8w`c&)BGzoP3wB@%XaUhjy#53M+50>4O zZR%krHa$I}E+RzWz@B$Sd}kHFraN67-sKJvx!hY>+a;mxZlC?JeKooY%A2@qE?Ao_ z8obtzg!sdcb}W6zMMv2;YE^|M!oDt23zKIc{n^jU-BdS7+)aN}Q_q09Nu^0%mJ!|@ z>ns1FIY_*`fX6ah+Exc9?9tDJr&~1rO0YWBocVC@1A+?i?h6<>kZBSnQ5; z?Ybj&F>VmrxQ%vXAHl1web9Kcf`ifGwl=TF29WM?t0Ku7Libx;WKBF3(yDWJ=)nXY z@(I{=+E*7#3tz?+&0mhM{;dJSif!bOzapvO#P}LrI!TQcuE-1z>a$QH@XvTbNt3!U zsRP8E7pXUs?-y zyjN6pIg(6;H|1}@ZVm_DMaG_wiMV3&>Uo|3GiNN&mAcclmWt&4uiRhXU}N^l#NMzn zCO&)qp6`_@4{C5w-l@E`5LXL()b)UbkWJs1%xXHKM($sD7wLliPP6MmyDV_8?3T?) z53zo283L)sG}O-XyXYKCgFth2ol+JD;WNEv3+FpRFyyXwc7+3MwtWjbwM+?*O(!Jw zepmu&%A2Gg1-6i#u=Tv903_7EklZX}iaLer`f+O&xP`6PFkeK>YjA6x3c)L+`F{#a zd*FhL`K=a$4IF%FITvm{%Z0dVoBFX9Dw1ZeMaFzHL)KGG?P@7eEYV!lv3!KEL(X66 z;3RuwH{Mq-b);fvQqTGBAvW%qM%gK9=wWY2wStL_D{eRCW|3;#F@JB1oK&s@_{HjW zo*=8hXZ_-)A%6-Yuil^FmH{H=-)cJQs2ID@H?(&<3$MA<2f{h+BB;9 zys$vw1|Mn5GaPsX_~gA{Gof~Ccb3I|XE+U}9c-wvgX(3?qH{aQh>8B0I;;gE-W0yD zhQft+=lPdmhBjE!KRoy9wj=I-+IQglH7=rJe>|GMor-v^a}|0-oHJY9elDxj28ZUw zOvpdb!z-hrV~>_uVdK;CD2q`#ep)~DG`A&iuhYTUuXDuwWwULi)*FEDRNE+Tlry}x zXe^e~<)Fc1-iv;MFZ)%`_pX`X)2}?Nn&xv{flPjrT2&?&!$Y-~)3>`p%FX*7-=Y7LW*rjL*3QL*>r=#$1oCRSZB4YN4Fg;bUOCxdzi#0;NmK6!F6 zUS-GXS{-KtjUVT$3kbZHJJjRp+`9%3!c?B^j9Sg)N$5SjawQ^mI9oHTv$qKA2QS z#`bUOXPSNx{LT83&#V5>A<_Dk1%el5e6Oumsak~POKM&_c*$XjaPsCZf}b@RwzZfP z;9%|S>oZB?TvWJ@@VQ;2Lz$yc^}5m)m7cu!94k~&Zd%embA|AaftjuQTUp@KY3h^I zvBUWz?)#4uxFTT1t>2?LYz+K0E{XORhwOxTeC9ha z?)1kEJO`^3Ns=bWPNFZ7BkUt(UQ71rQ`)#&Y#~%7Vh_#Jq=7bnQ+)iuQt(VM#RcR ze*fs>3OO<#Cxplo1nU2b}h-cidTJjEgLi{fT@RF#HSd?c_b)_gd#gPk@-hPqMR3{V&$S$F^ zvr&1Lk29aZ(*sjm)VEcdqHNu_CwzrWB(^S`DUl+0NcpsTNm@*}(4^n|Dy5^O;gMSU zIG{?uv9Ur>6YiFOyb{H^Xw2&1KG4v{hrJOK1zKFlEIH{hw2uYGnDk z$HlIE{`*O5xH$FB(|-f8UPSg}mVa`zLeGWuY)>K%V%iVXOW1xVPt-K1D!p<-`Q62x zR>XdE)Q}?}`+|xKx3jc<6Z`4HBIS55BJcL0y>S1o3?`nfcC_f(&w<`l&jR{Ib#zn@ z7{Bo~hFieYxGB`6ImM&NdPz zn|`j#KjQ$;s-Ev=VX8PJR9+Ifh7AVmR@Mo;OHzUb=DR}>6znMjY(G6oM2z+y2=hcALR26Kjc7G<3av= zKNckUWwZ@>r04J0805dzSCW@UQdpf=ntzjTgfGvs)+I9XGV&Tc@{+hpPI{;QY# z*RbqJEOBTETKs>d?PcVNWIqoviiOsE84V(TPbBgGJ*jnxh9FV$$jD1CqR{E?9JBv^ zAL&c;bpDBcAcYXxzY`(O6v|GDRUpNVLJ6c;|EGy!WksR5{%f?RP#pi2#P$D5;>z}) ziuLbA)QB?hUw?;x&mn~N@81b2 literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/double.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/double.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bf9f55dd91e6565e28356c8ba8374fe230f5b414 GIT binary patch literal 9744 zcmb7qX*^X?AGHjTN{Gyvr&3C$j0@L1-OHU)nKCD-P|_%&Qc=bRQ3#bZYm$T{gpkbh zJZGw?_w+pP^X>ie?%z3QPiLQf@7a5=|9`E0$VLuZ1=$5p9VZSO!J0LKEc`4i>e7#2 z=PcO6`jKMyHdc1{++8DAbV?T?XA66`I%tCR@6>)jUk4OL=LdEUlfi2}!}7zHj$wrl zxuiYPP;FM*n<8uqi4^DHBaNJ>Ix|==-KK#K9TSZhk|h#5Bv-}haKPB)&_09iZP>Zy zQI4@885CU?0~lI^`-nu2`ASU;i7r*7gxKO)pjNAs9u@f=XCIb-?lshTC23+HPep*x z;{>4~3Ph8S^{TTOVI`}3e~hpyl0O&sDUUPYcy7-!-+G`k`XnVWMh4Y~fA)4VWFfs_ zZRwmU4TZXQgCD8dA*(oiE_D+XTIW?STNpT?qTr)O!51qCHA=TnIx=xd;KV`wF=NyT zCcIsL%^Cqi>#l8iLj~7Mi^$buc4)TiN*Fy)gLlrMaItn9^koR0G`>l}osGON{iys% zsLhd8tI))eXXIJxq#gwLN-7@@OTzCR)#;$Q7OJaHO_*+z!;7H})smIENWU8Lskq!8 z5{40i&HBdZ3ef&T4mZZh%bjL!Kh&}1YiV=(V;#gcx_2h}F`y^;U1i@kGDPQJ9PJFU zgS+7a0|kk#F#mpr>8s;_%ev+XqKqv_8{VJ9`O_HUb5Zl}UwgQ?-x`@| zvWDnB*`+863T|aBPXs>=LPa)4US5vKD=9sYKA;JCmD42YycO8`>Wpy81w%Zo`g(0c znGQr(3@a;(XdtvO)9lt22AZhr3yKD5$o9^jEjnz7%kGz-RlAd+eS-7mx^ffnTDX^r zw$Y)UELUVZWd^0%!tC*5I#`(d!j+q&4dLDEMbGfr;Q4_@Z&foLoH()1<%y0h&VN;7 z<2y)4;Z~EYjteFr9hi^2F|GkduH8-jNha2%(TXQREpcpFU(@ByGGJTJbLK@8KQvYA z1Xhg8BW>a27JCq(;2dew02A6K zb=^5ZR>+Nas_8dl;@ebh$WjCiG2uKuY0lGzY8k)NQrR7Fzy1Shnwx?c?kuh51&~Z#fUm5OjYTyMGe{*9s+rgZpTB z#hn#aOxM8+S(B@$I4w~Z-!yluZ7U|Eo*k;Kq2s9j%q_|X8qP8}KB+e8&8>fqvH`K3+$hNO`8ddcg{(rrmC79CpM+;@hFf2qIr$@N>re zfnk4%ZJe405R&E+a$otcVX(Wgimna=wMI`zYv*Y=x;L{byMhX;;fX7|iVU!%xboP! zjY4?R?Kx)rgNB34ZY_k~XTVFtW-Lfr8j^Ru$2_pHz?E(N8M_{eVfzNZ#AY@}yuZ#a z&5(CQq4$Yv84r}9Jv(fD^Mg59)Ghpu9k<4=OUaUlPU~R5nWy!YFEp(Bz1(|)k}1Z| ziWhKg*1#?QBmPTsR_N_D=e=x5tk3;J*AGjmBdlz(dt##@j_-K+bf%Muh&Q0Jqt6KO*Fas6VB7AH4Ol%W9c{IT0jo(7R;P5gKAugbLlPRKPvjT>9 zn8TR9u|tqp*AGAVg+@1!adV1fk$sJcijgw~fAWZSa#em8rAf%2X8FiJ57_u6Sv)21 zDRG^r!nHPSSV*Xg?%>#lT&uJW1eoD*eBXOP0!RB!;M~gJ+W2aBV`skr z1A`4VU-^~Hz+M*Payyy^-@C29l_RXd2)h2%Ku-X^M*=;4ely`yy*K%4KY^n+Dh^-W z&V**OPVPG;2F{A^7P_lN!=A|a6S3)A;G@F&P2Iu*$z9bp<1QLV9=uaeUQNP~lWx69 zkPsRVevoLHHij$x2EQRYxvV!xr=GCuNSwSt7mP$JB>HLl~z& zE*(>anaOj-MRq3Ak}Uo{<&Z)ysU>86f*MNR_^&urWQ!|72Ml+05%W=Cm7(@oEv(Zz z|8?sxMMS<6Z4qcUUmZa zd<(jo`fMfew!TiJh^ztMR>eSR;{CH~Xl-bvG7!x5qBV+xj)VKo?56&vq4Agb?&(lt zB)H9(--%HL_tm=B^WUflXH%=Wn@EBEhW;m^33lMiE6uXXV}eI9s`B6tGHSNShvjn( z8OG%4`W@a!#f4P)B;iSYM6~ns=O>WieQN!PLMQ_>@i$I9ZP@@DWrKUuf{HkP`0~8I zk32$NENnN)vBsO!vj!4+x)@j%=US(4hi==))h{L~$XrTd3hh#dP)_`m!E<{E+1K_h zdg{RVGet#Xp9K~o6YUg<^?dWrpaZWW4Y&5Tgysdy;MYumP${wA+Sjg*V7sS_BG;x0 zqeqQ~ZFiO4D4RP%|AC9d>K1!Uui;?jQPaWDVYh#}&GHfV*lCVcK!*UC_fjU2Nv? zdSTSw9btf3p6|Sx-Huprvix+QSQEkDS%yQ$SwR&FbCV=*!yXRuckS;7DwerGNkddlmDBb%$m$ zxpK_sB%dNW76Nz08Z#l*W5i8w*$Sm^{M)s8nXvIc>NI&l2MnI!HjO-a1d1p}`n@9X zT24bJU(*4t!9kg_&W2$5n6)v~-VSHDehO$^v4o6m&h+^5mEBTtrrhYQ@NRY$GRtw9!dTBN`seQa4+i~~NWwY}vi zA;WjA+6b4XF*=hDgi@Bo&~dYqL-INqL90BEUJeIXzgb<;_b`OXlOA20Z8RjU2z1}~ zhl2MNbarqVAeVbuz`w@aFxV zQ}3NM!2T=vM0vJ3lG_Y(FGbNYP!cnLsoxRN4sW7G7PcTmS9neItQ4X)c*%b%wS-P> zYH~)VJv{abcKQ?pQ7IzJhJuNC)ED;B;-dowY1;dGYYD!=lE7CLq6z0{#@NfDZP4Vu zRYEz>2KVGj;e{}5)U2p0Eqrf|n-vQB4hA&jI+jvzNK4>B?Fzf50urkDMYjDtW{-uZ zM@!Bh*oKPDU#oI`DEM;Vq!?#`1rF#i{aL)U!7sA>V5t`wKD&7{$%S2pI!-leDrF8Z zbLqWr4M!~X1hbh^rR_GY0&V9At?U5&I}XJzs~ZmpyJDb&(4$iWc16v`xE|{34;f^N)@HH zP~W!Liy{VAthucxWl2X4YqIo@bUVDiX&LPFn*z2^8-$egE%7}}Fe>8%6`CQRs*63W zao+LPd+7i=l+@zZdL}vGig?Y|hS%n}@X+(M?Z|fAzxDGVNL&hzDHRFmNte>k%4Q|v${KK5I z*abt}7(G=rz04ZZn6o}gwmUBH40lP2d zrl}hVNk@HE{iUt3BJB?A&?gcOVyJmGPaRK`_nS8^Glfdyz^?gJ0>7T*_-m=#LOyMO zD{~J8$0Aui2=1l9{i2OxwX7rhH|SpKWH&)Zq3{m=_HM(sA8#6*Ni>4Vl2E38qXSgQ zmXdsQ8gvu%NHYx#$SLlgJo1ACo@XoXi{v|irNYqZgq<}aPwo=Dt!#$`n&_U2)tX50 z7`nA}N&~aY&pAG+FvZ2f?xPmkoJdz2w+yZZV$Pi44gW<(etu*tv`ElPJ*^;rdJ7Cm zC%KcI&C#A3!FwT68QdxBZuE%TW6br@^H*0mV9t5yYVZ>iJg)S}($cm8E01T%$vh79 z^~`ak@R4Eg{T`)8b|W-OWJ2Bwnqw0S>q&VD0c3snd_HQ%7!OxGY&^9=1KU}4(b5=z z>GO`Fc1}kOY8)@GA?mNq3HP;DI^ZZCy(3pr!SQ0^nS#!CJezy(lHw+XLhkBW54OLC z#`LxHSb}dTo(o;{`OAdo`~KA@P8xx`Z=K*M$PlVOuG1Ukh&J`2uR7LbOw}F96KZil z>es&3GjACnMK5i*J+Tc=TVIE2?IQ_j^M=b4-xYW>D#B zH^abACmFL%4JMe(DHIa!q(Ha%8XvdUdhi7oWRIIrAa=Q=>&Y&Au&xNGV7kdch7_0A zY3~U5>^abBL<5?lKaw~Iy`gzvnq5-~i-o-j*OHl-ZFVhci*-bh2h~Pnml0(5ck}D- zHANmZFVil{7%%uL$u>5Ygx^3V7q2k_=PKT37Wy>UD-Zr0zC=YqOrv0um^{Rb(syuk zFd=pJirMjx_DHRm;Cc0eibM~mu(CV~9@l<1(b_7Avg!>@SFQ<9!J{)#*pGl;MQk6yC^ z?+;_J83r6Eo?VW9)(9yPGa3ezg!4sI)o>$le-=qo16T4cMN3UPLL$h}fzED(3!{2Q zt61q+6zmMT8b!gf=;=41X}X9cJD%**pETUrr2Q)82?K=>xU}3QrEyfwlX{fJ0V)

chE9y1PT?>NTjC&tS$G{yd-JQ-F3RXfg{x4_=dt2ImO91$zFvy*y523~@f zHnuBiqCdlUZJ;#;-1jRRJ5mi$7I29%wAL19FKH~i&M^lkTmSFM3sx|^@v8TrnJF&Y zC#NoEIv}x6ZB*(!!1ee1InyfyZf&LY1q%lu3F!0hDf2&~uuY2EknGz0& z-+*ZgKZ5tPy%KWLwSw(!Bij-iJ=oOMR85G=L3ruOtdH+Dl+?23#J$tRHKUIrPp7t` z>B6;!jd$h@U8Aj9$DUf@P!`{F=Kx)dr{1eMwv&mCd#~pF$+gBs)3w=0_7l3$?&-L2 zBV{z|I(=1mP9yA#hn?XC3(R~u$L3Q@@VlwV+Z<1AK%aU$Xnw>T3es*$EX&PsFNv?{ z*?u+1y}VWPGe!VyuC>Y4kqii(*JPRKHV4gY#`OF}Dm*@4k>^=K@WA%(8Em&DKzFcu zG;qWoJYtD2;tmKx#oj!AxN#F&Q}Qa9ayHl!&+EIz-2mYi%MPvm&O~$W4{|}iA_6B@ z$pwl#;!@Sm2W@0K2)wye_O`?k^4gEJlj1b-?OJEvn3NSfuJ5Z%h%`gvhwG~KGG>^r zea_2wng%D{B&}^lOc>o1TX=oW05oS;M}^h)xTI2a#VpJYEB5TCTscMPYFmoSujVr_ zlv!Kr{81gk?REtQq8ixxqbT@oI2ppSe_uQJ5`4(j>qwg0R;*3^dNO1AdQ5uOcnwI_ z8v6a=Ie(DoU!hesvOM1wu?KxR^`r>?Gm(;2A+Lo3mGFRw`xFE}d|0#oDg`<{kG`io zF@xIMUx!RoDfpD}qnT5Lf%AWvfg+C`QIj*Vwh;E{l9aW%5=DlK(I2m)3w03_a z{EF#EcUio2tiice=ymHBGNMk`+c!L;W9ie>@xWXvMtA>S?i?!)vae6lt)wmBmkTUw zEjECk#}6v?r2|3+-)Dahw!vwur5ypSdU%)nf;8Ap%wJPSj`La+6fSeWXMKVKj>?xs zQgI9%TRXc}L+A=Io+E)V#WuK?T%*TT%D`Clor5MzOz_Lr z%3rG{BXa3UaM%wW#C=v&c`3w;HHxd6*AEiwknu;@H_{yO*PW{?`aO*h{OJ7@GZ)~= z`|-B9h>Y><&i>;EXsA!;&pdXVD0hvS4rfphKq{AWt~gwbOxNfT$AePRLH$J z8mD*K8diNV=Stt2LH2=S#+^fk2n+pren1(7zsa4w*@y|3mb+gAHAvXpJ@B{wl_lPc zoH{%Ago%(7^JU8qkP)tYCtr>5bCzB<-iga0;li@8De+=8)N5v3D_dubJ)iSJ^oAVp z)8xZ$T~mVhTAe#F`Ii{Kc)F?3PIa)|svTktv4*q1!fLUt43s)8tLrB8vR@@^-zy0{ z{o#j#SvE^0#IkD_%2Jsa=r4S5>3|)C>|DOHjcb9ms)RImUllxEg2r#pTjO_mrC;q0 zYn&5I&9KjqLAPX=@WE;(8cRgUMRjxt*xU<_<|q6`##VdM2}M|L(sg}NMd)1y@y+TZ z7BD=X(_=hL!L{t{+RFzlkr%hs`vF52Lx0_#>$X#IqijcY>}e*BzIfIa?xKwy`@Frk z##&?9ugT2*O(uvw)loLHk^z6d;i52KOAM-y$>bBdgxL2NuS!#BID9$#*Z3`6WJiri zaTgK(U8M`nE#}d0&7aK*Kp<;H|)yoeo^gtS&zscF6kDHw? zQB_P^wEs{|__@Hu8^6J0o?#X+7*K8(EZ79yX|u-qF)Mfve3ec#GD2oD`=9g4*3dt< zGksvAH7sJ|^?ggs@O{^Qj-qwI)uYC0r`puuy7Hv|5`n8NMIL8ZlWg!+|4EbDPC~~? z_2E0V#}NwIgYUzF`Ebtd+K70n15Os|>f$!x;V+pP*;v!y zckK1bMLQb4_i^i0gwYUxOY`%3787*7Z#Wm3Yls6s38Ae-Gi2S_s{3K|}p z;_+_Qcb56G$lIRPz3`C0NAK90(={}(saA9fs+!^M75mfI30~o)^y2qW0v)}7w6a1y z`JvUfDu_bpEXsmmJX&;p*oz#nn15>wzBbW)DiWLEVR5x$?2;7%x{CR`tZk6Ba&kbj z#t~n%^S{Oa-GbZ`q2oIfCBeT!EybmjhEcY&6IP3MU^!PHX&|VL_(+E}V#NB0URE8S zc1s0sGWAvz@R&m}&7il|b32B6XcD`lwhy$0emxUx%FI>aRklFd?h?zx1zNaAGcdoAYy(Hn zl-EgbZQ$fTk)cz|#ISl9Y4EH*9&s`anhr9-p1JtyUJx10Ln*0`59;E&*x5_gumYxbzM4jEyB$PKeIU4q2~O|f>@&k_;)yPCSRk2#pUr*T&f-n zgpcffy5I;$A-2Q-Cp#GVwA($qX@oVJfg#W8?eUtWR@rBzJ@i5rEgwyh!S>5@^%RE# zBp*tx_;8erJ^r@QJ7=k2gtWL{^|i;|`PsB`VtfnFNe0C$k@4i>&X~Mj1BkiL?mNd# z>_dIzJb2-k4P>Q#4Mgn;zL5HMbNzC{4xJKg`5LYdE^n2r&W8>Vy1-iE9=jC@c{%;@ z0UKaM*J17@c+ahe8Bx)eOvrR}^9-H0LJEiI8cHJ_k8iUvmJ@uscgA0S|Ksh*-SOiS zTPhV1H7gggga{o{Jmy`bG8Hxy;cvfk9FSF7CU?=k$|I97$$>AGw){Ti73!z+frGndMo6;S>0kx*rTf&3=YvcGO}OADgP$y>BXzn@ISwbsd}0&nP;j+Kbw(>6dz zo=&T(9|>EvWsDPMHezL|ejk4}1CQqqzJ0yl0MivecO<7H!75qEtxR#rIBWr#W6m<&-XxS){B3*li2;TxXO6WB zH*(Ui$CXDKWA7RB$F>{I(B31SbnycNq6)>yUG6jpvIsVOC z6bp0xPYzqhqV)eL<>O;vVPPZAnl;O~PqMH}6|t}jxE3wrl1nRMU8UYx#KudKde1IE zb$!nv?2-0%4?KD^ zogdbKUAlyqB;Z!UXCaqf!f&JARkD^$k}egXQr$`gnI7q->vjcol?u5=NS6tF#JiP= z_++G)iTXe7DiaGVk^U$iRPXjt;z)1$$Mwf%x;{#tVwWkGiV$!wmyVXZT`m)&-d!#m zOOp8{7f*HnB%kbY`;)?rpzcqKX%R9NN*VF)70THew<|W>f7)HKF~3CSv&z$Y_s^RO zdT)Q;{AQ;6vuY8$Y~_{`fjyO5%jND=s#U7@RBo#w$yTY?Q}q(rgXtsnY6< zkgeA4jo(wfeK6xrwa)0%o@(9664@_$GxdAE=r8o%`C_m*)APlUg+s0e?1COOMqKh4 zHO8wndTUI0P33Ay0yK|WQ(@1HS~KzB-dc01vvPG7atR)FmP(l!byh0Rdh4vWl*)az zQE%}0YOCFs@zqXmw)d+&IOOZeBtg%5iiLb;y@QQLU%eyQRK9^qrFk~cn4Xyp^j*Py z4Gh<_@{LT71kc7DKAD+~PX5pO8g~Yk%75Dx)ZqDT_mRHLZ_dYN`@XrH;!tREjS%!| za*LMFYI2X!=x^E+YpU?wBcA5<-80!U>$}&D;QsI4X=fFheKHcfntih~vzq6H!OXkuM@OFxw1-WWDs~*3Y4Gkietec ztnBXdD$fVIFKj7O>WNWr^y#^%-JjiaNpEhj=Q5Tn_r{Xe`S!+HDCG2BvC$mry-GGy z?u)0=eftuaUO9b4 z)MIl)1Gi2s-!PaKvCeNWJz61m@OF&m@ZgaC@S3Ykl?X8Wr%Hvr?@yJ9 zA0D0hC>5nLT`rg8KmAGR?)~Wsl^3JapSOHe`BAC<&HqQ0_Q3rg)q3-zKfZu-(@YIX zXx~h&h2n#mIvcIAnXhE?O|$h>#=hAGruT!{#$AWUX1}>cZJKNHNZL2|-RJIuxn}C%`LX$qQ=FR@IwOPv7P_Jp^A@^ew8j^DV$C=I?2Tsx z{On8i&imPa2f!O)UPXF;`uhsb}n8nr-raxHQ*#cw%Y3GfMT(LT}Rk zKR*T8MN~9cgqI)MR)E{JZ(T!Ibo$JB|NS{-5N3%l~z4ymtrF z!G}ng|KIjR{GUK{`JbTAA}kf?cJM&3XV9iSfq|Rc4j$ZpSSrNx;341r0a9veo40J< TA|ftM{OhyJSXju!w_yJV&3G*z literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/double.parquet b/parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/double.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bf9f55dd91e6565e28356c8ba8374fe230f5b414 GIT binary patch literal 9744 zcmb7qX*^X?AGHjTN{Gyvr&3C$j0@L1-OHU)nKCD-P|_%&Qc=bRQ3#bZYm$T{gpkbh zJZGw?_w+pP^X>ie?%z3QPiLQf@7a5=|9`E0$VLuZ1=$5p9VZSO!J0LKEc`4i>e7#2 z=PcO6`jKMyHdc1{++8DAbV?T?XA66`I%tCR@6>)jUk4OL=LdEUlfi2}!}7zHj$wrl zxuiYPP;FM*n<8uqi4^DHBaNJ>Ix|==-KK#K9TSZhk|h#5Bv-}haKPB)&_09iZP>Zy zQI4@885CU?0~lI^`-nu2`ASU;i7r*7gxKO)pjNAs9u@f=XCIb-?lshTC23+HPep*x z;{>4~3Ph8S^{TTOVI`}3e~hpyl0O&sDUUPYcy7-!-+G`k`XnVWMh4Y~fA)4VWFfs_ zZRwmU4TZXQgCD8dA*(oiE_D+XTIW?STNpT?qTr)O!51qCHA=TnIx=xd;KV`wF=NyT zCcIsL%^Cqi>#l8iLj~7Mi^$buc4)TiN*Fy)gLlrMaItn9^koR0G`>l}osGON{iys% zsLhd8tI))eXXIJxq#gwLN-7@@OTzCR)#;$Q7OJaHO_*+z!;7H})smIENWU8Lskq!8 z5{40i&HBdZ3ef&T4mZZh%bjL!Kh&}1YiV=(V;#gcx_2h}F`y^;U1i@kGDPQJ9PJFU zgS+7a0|kk#F#mpr>8s;_%ev+XqKqv_8{VJ9`O_HUb5Zl}UwgQ?-x`@| zvWDnB*`+863T|aBPXs>=LPa)4US5vKD=9sYKA;JCmD42YycO8`>Wpy81w%Zo`g(0c znGQr(3@a;(XdtvO)9lt22AZhr3yKD5$o9^jEjnz7%kGz-RlAd+eS-7mx^ffnTDX^r zw$Y)UELUVZWd^0%!tC*5I#`(d!j+q&4dLDEMbGfr;Q4_@Z&foLoH()1<%y0h&VN;7 z<2y)4;Z~EYjteFr9hi^2F|GkduH8-jNha2%(TXQREpcpFU(@ByGGJTJbLK@8KQvYA z1Xhg8BW>a27JCq(;2dew02A6K zb=^5ZR>+Nas_8dl;@ebh$WjCiG2uKuY0lGzY8k)NQrR7Fzy1Shnwx?c?kuh51&~Z#fUm5OjYTyMGe{*9s+rgZpTB z#hn#aOxM8+S(B@$I4w~Z-!yluZ7U|Eo*k;Kq2s9j%q_|X8qP8}KB+e8&8>fqvH`K3+$hNO`8ddcg{(rrmC79CpM+;@hFf2qIr$@N>re zfnk4%ZJe405R&E+a$otcVX(Wgimna=wMI`zYv*Y=x;L{byMhX;;fX7|iVU!%xboP! zjY4?R?Kx)rgNB34ZY_k~XTVFtW-Lfr8j^Ru$2_pHz?E(N8M_{eVfzNZ#AY@}yuZ#a z&5(CQq4$Yv84r}9Jv(fD^Mg59)Ghpu9k<4=OUaUlPU~R5nWy!YFEp(Bz1(|)k}1Z| ziWhKg*1#?QBmPTsR_N_D=e=x5tk3;J*AGjmBdlz(dt##@j_-K+bf%Muh&Q0Jqt6KO*Fas6VB7AH4Ol%W9c{IT0jo(7R;P5gKAugbLlPRKPvjT>9 zn8TR9u|tqp*AGAVg+@1!adV1fk$sJcijgw~fAWZSa#em8rAf%2X8FiJ57_u6Sv)21 zDRG^r!nHPSSV*Xg?%>#lT&uJW1eoD*eBXOP0!RB!;M~gJ+W2aBV`skr z1A`4VU-^~Hz+M*Payyy^-@C29l_RXd2)h2%Ku-X^M*=;4ely`yy*K%4KY^n+Dh^-W z&V**OPVPG;2F{A^7P_lN!=A|a6S3)A;G@F&P2Iu*$z9bp<1QLV9=uaeUQNP~lWx69 zkPsRVevoLHHij$x2EQRYxvV!xr=GCuNSwSt7mP$JB>HLl~z& zE*(>anaOj-MRq3Ak}Uo{<&Z)ysU>86f*MNR_^&urWQ!|72Ml+05%W=Cm7(@oEv(Zz z|8?sxMMS<6Z4qcUUmZa zd<(jo`fMfew!TiJh^ztMR>eSR;{CH~Xl-bvG7!x5qBV+xj)VKo?56&vq4Agb?&(lt zB)H9(--%HL_tm=B^WUflXH%=Wn@EBEhW;m^33lMiE6uXXV}eI9s`B6tGHSNShvjn( z8OG%4`W@a!#f4P)B;iSYM6~ns=O>WieQN!PLMQ_>@i$I9ZP@@DWrKUuf{HkP`0~8I zk32$NENnN)vBsO!vj!4+x)@j%=US(4hi==))h{L~$XrTd3hh#dP)_`m!E<{E+1K_h zdg{RVGet#Xp9K~o6YUg<^?dWrpaZWW4Y&5Tgysdy;MYumP${wA+Sjg*V7sS_BG;x0 zqeqQ~ZFiO4D4RP%|AC9d>K1!Uui;?jQPaWDVYh#}&GHfV*lCVcK!*UC_fjU2Nv? zdSTSw9btf3p6|Sx-Huprvix+QSQEkDS%yQ$SwR&FbCV=*!yXRuckS;7DwerGNkddlmDBb%$m$ zxpK_sB%dNW76Nz08Z#l*W5i8w*$Sm^{M)s8nXvIc>NI&l2MnI!HjO-a1d1p}`n@9X zT24bJU(*4t!9kg_&W2$5n6)v~-VSHDehO$^v4o6m&h+^5mEBTtrrhYQ@NRY$GRtw9!dTBN`seQa4+i~~NWwY}vi zA;WjA+6b4XF*=hDgi@Bo&~dYqL-INqL90BEUJeIXzgb<;_b`OXlOA20Z8RjU2z1}~ zhl2MNbarqVAeVbuz`w@aFxV zQ}3NM!2T=vM0vJ3lG_Y(FGbNYP!cnLsoxRN4sW7G7PcTmS9neItQ4X)c*%b%wS-P> zYH~)VJv{abcKQ?pQ7IzJhJuNC)ED;B;-dowY1;dGYYD!=lE7CLq6z0{#@NfDZP4Vu zRYEz>2KVGj;e{}5)U2p0Eqrf|n-vQB4hA&jI+jvzNK4>B?Fzf50urkDMYjDtW{-uZ zM@!Bh*oKPDU#oI`DEM;Vq!?#`1rF#i{aL)U!7sA>V5t`wKD&7{$%S2pI!-leDrF8Z zbLqWr4M!~X1hbh^rR_GY0&V9At?U5&I}XJzs~ZmpyJDb&(4$iWc16v`xE|{34;f^N)@HH zP~W!Liy{VAthucxWl2X4YqIo@bUVDiX&LPFn*z2^8-$egE%7}}Fe>8%6`CQRs*63W zao+LPd+7i=l+@zZdL}vGig?Y|hS%n}@X+(M?Z|fAzxDGVNL&hzDHRFmNte>k%4Q|v${KK5I z*abt}7(G=rz04ZZn6o}gwmUBH40lP2d zrl}hVNk@HE{iUt3BJB?A&?gcOVyJmGPaRK`_nS8^Glfdyz^?gJ0>7T*_-m=#LOyMO zD{~J8$0Aui2=1l9{i2OxwX7rhH|SpKWH&)Zq3{m=_HM(sA8#6*Ni>4Vl2E38qXSgQ zmXdsQ8gvu%NHYx#$SLlgJo1ACo@XoXi{v|irNYqZgq<}aPwo=Dt!#$`n&_U2)tX50 z7`nA}N&~aY&pAG+FvZ2f?xPmkoJdz2w+yZZV$Pi44gW<(etu*tv`ElPJ*^;rdJ7Cm zC%KcI&C#A3!FwT68QdxBZuE%TW6br@^H*0mV9t5yYVZ>iJg)S}($cm8E01T%$vh79 z^~`ak@R4Eg{T`)8b|W-OWJ2Bwnqw0S>q&VD0c3snd_HQ%7!OxGY&^9=1KU}4(b5=z z>GO`Fc1}kOY8)@GA?mNq3HP;DI^ZZCy(3pr!SQ0^nS#!CJezy(lHw+XLhkBW54OLC z#`LxHSb}dTo(o;{`OAdo`~KA@P8xx`Z=K*M$PlVOuG1Ukh&J`2uR7LbOw}F96KZil z>es&3GjACnMK5i*J+Tc=TVIE2?IQ_j^M=b4-xYW>D#B zH^abACmFL%4JMe(DHIa!q(Ha%8XvdUdhi7oWRIIrAa=Q=>&Y&Au&xNGV7kdch7_0A zY3~U5>^abBL<5?lKaw~Iy`gzvnq5-~i-o-j*OHl-ZFVhci*-bh2h~Pnml0(5ck}D- zHANmZFVil{7%%uL$u>5Ygx^3V7q2k_=PKT37Wy>UD-Zr0zC=YqOrv0um^{Rb(syuk zFd=pJirMjx_DHRm;Cc0eibM~mu(CV~9@l<1(b_7Avg!>@SFQ<9!J{)#*pGl;MQk6yC^ z?+;_J83r6Eo?VW9)(9yPGa3ezg!4sI)o>$le-=qo16T4cMN3UPLL$h}fzED(3!{2Q zt61q+6zmMT8b!gf=;=41X}X9cJD%**pETUrr2Q)82?K=>xU}3QrEyfwlX{fJ0V)

chE9y1PT?>NTjC&tS$G{yd-JQ-F3RXfg{x4_=dt2ImO91$zFvy*y523~@f zHnuBiqCdlUZJ;#;-1jRRJ5mi$7I29%wAL19FKH~i&M^lkTmSFM3sx|^@v8TrnJF&Y zC#NoEIv}x6ZB*(!!1ee1InyfyZf&LY1q%lu3F!0hDf2&~uuY2EknGz0& z-+*ZgKZ5tPy%KWLwSw(!Bij-iJ=oOMR85G=L3ruOtdH+Dl+?23#J$tRHKUIrPp7t` z>B6;!jd$h@U8Aj9$DUf@P!`{F=Kx)dr{1eMwv&mCd#~pF$+gBs)3w=0_7l3$?&-L2 zBV{z|I(=1mP9yA#hn?XC3(R~u$L3Q@@VlwV+Z<1AK%aU$Xnw>T3es*$EX&PsFNv?{ z*?u+1y}VWPGe!VyuC>Y4kqii(*JPRKHV4gY#`OF}Dm*@4k>^=K@WA%(8Em&DKzFcu zG;qWoJYtD2;tmKx#oj!AxN#F&Q}Qa9ayHl!&+EIz-2mYi%MPvm&O~$W4{|}iA_6B@ z$pwl#;!@Sm2W@0K2)wye_O`?k^4gEJlj1b-?OJEvn3NSfuJ5Z%h%`gvhwG~KGG>^r zea_2wng%D{B&}^lOc>o1TX=oW05oS;M}^h)xTI2a#VpJYEB5TCTscMPYFmoSujVr_ zlv!Kr{81gk?REtQq8ixxqbT@oI2ppSe_uQJ5`4(j>qwg0R;*3^dNO1AdQ5uOcnwI_ z8v6a=Ie(DoU!hesvOM1wu?KxR^`r>?Gm(;2A+Lo3mGFRw`xFE}d|0#oDg`<{kG`io zF@xIMUx!RoDfpD}qnT5Lf%AWvfg+C`QIj*Vwh;E{l9aW%5=DlK(I2m)3w03_a z{EF#EcUio2tiice=ymHBGNMk`+c!L;W9ie>@xWXvMtA>S?i?!)vae6lt)wmBmkTUw zEjECk#}6v?r2|3+-)Dahw!vwur5ypSdU%)nf;8Ap%wJPSj`La+6fSeWXMKVKj>?xs zQgI9%TRXc}L+A=Io+E)V#WuK?T%*TT%D`Clor5MzOz_Lr z%3rG{BXa3UaM%wW#C=v&c`3w;HHxd6*AEiwknu;@H_{yO*PW{?`aO*h{OJ7@GZ)~= z`|-B9h>Y><&i>;EXsA!;&pdXVD0hvS4rfphKq{AWt~gwbOxNfT$AePRLH$J z8mD*K8diNV=Stt2LH2=S#+^fk2n+pren1(7zsa4w*@y|3mb+gAHAvXpJ@B{wl_lPc zoH{%Ago%(7^JU8qkP)tYCtr>5bCzB<-iga0;li@8De+=8)N5v3D_dubJ)iSJ^oAVp z)8xZ$T~mVhTAe#F`Ii{Kc)F?3PIa)|svTktv4*q1!fLUt43s)8tLrB8vR@@^-zy0{ z{o#j#SvE^0#IkD_%2Jsa=r4S5>3|)C>|DOHjcb9ms)RImUllxEg2r#pTjO_mrC;q0 zYn&5I&9KjqLAPX=@WE;(8cRgUMRjxt*xU<_<|q6`##VdM2}M|L(sg}NMd)1y@y+TZ z7BD=X(_=hL!L{t{+RFzlkr%hs`vF52Lx0_#>$X#IqijcY>}e*BzIfIa?xKwy`@Frk z##&?9ugT2*O(uvw)loLHk^z6d;i52KOAM-y$>bBdgxL2NuS!#BID9$#*Z3`6WJiri zaTgK(U8M`nE#}d0&7aK*Kp<;H|)yoeo^gtS&zscF6kDHw? zQB_P^wEs{|__@Hu8^6J0o?#X+7*K8(EZ79yX|u-qF)Mfve3ec#GD2oD`=9g4*3dt< zGksvAH7sJ|^?ggs@O{^Qj-qwI)uYC0r`puuy7Hv|5`n8NMIL8ZlWg!+|4EbDPC~~? z_2E0V#}NwIgYUzF`Ebtd+K70n15Os|>f$!x;V+pP*;v!y zckK1bMLQb4_i^i0gwYUxOY`%3787*7Z#Wm3Yls6s38Ae-Gi2S_s{3K|}p z;_+_Qcb56G$lIRPz3`C0NAK90(={}(saA9fs+!^M75mfI30~o)^y2qW0v)}7w6a1y z`JvUfDu_bpEXsmmJX&;p*oz#nn15>wzBbW)DiWLEVR5x$?2;7%x{CR`tZk6Ba&kbj z#t~n%^S{Oa-GbZ`q2oIfCBeT!EybmjhEcY&6IP3MU^!PHX&|VL_(+E}V#NB0URE8S zc1s0sGWAvz@R&m}&7il|b32B6XcD`lwhy$0emxUx%FI>aRklFd?h?zx1zNaAGcdoAYy(Hn zl-EgbZQ$fTk)cz|#ISl9Y4EH*9&s`anhr9-p1JtyUJx10Ln*0`59;E&*x5_gumYxbzM4jEyB$PKeIU4q2~O|f>@&k_;)yPCSRk2#pUr*T&f-n zgpcffy5I;$A-2Q-Cp#GVwA($qX@oVJfg#W8?eUtWR@rBzJ@i5rEgwyh!S>5@^%RE# zBp*tx_;8erJ^r@QJ7=k2gtWL{^|i;|`PsB`VtfnFNe0C$k@4i>&X~Mj1BkiL?mNd# z>_dIzJb2-k4P>Q#4Mgn;zL5HMbNzC{4xJKg`5LYdE^n2r&W8>Vy1-iE9=jC@c{%;@ z0UKaM*J17@c+ahe8Bx)eOvrR}^9-H0LJEiI8cHJ_k8iUvmJ@uscgA0S|Ksh*-SOiS zTPhV1H7gggga{o{Jmy`bG8Hxy;cvfk9FSF7CU?=k$|I97$$>AGw){Ti73!z+frGndMo6;S>0kx*rTf&3=YvcGO}OADgP$y>BXzn@ISwbsd}0&nP;j+Kbw(>6dz zo=&T(9|>EvWsDPMHezL|ejk4}1CQqqzJ0yl0MivecO<7H!75qEtxR#rIBWr#W6m<&-XxS){B3*li2;TxXO6WB zH*(Ui$CXDKWA7RB$F>{I(B31SbnycNq6)>yUG6jpvIsVOC z6bp0xPYzqhqV)eL<>O;vVPPZAnl;O~PqMH}6|t}jxE3wrl1nRMU8UYx#KudKde1IE zb$!nv?2-0%4?KD^ zogdbKUAlyqB;Z!UXCaqf!f&JARkD^$k}egXQr$`gnI7q->vjcol?u5=NS6tF#JiP= z_++G)iTXe7DiaGVk^U$iRPXjt;z)1$$Mwf%x;{#tVwWkGiV$!wmyVXZT`m)&-d!#m zOOp8{7f*HnB%kbY`;)?rpzcqKX%R9NN*VF)70THew<|W>f7)HKF~3CSv&z$Y_s^RO zdT)Q;{AQ;6vuY8$Y~_{`fjyO5%jND=s#U7@RBo#w$yTY?Q}q(rgXtsnY6< zkgeA4jo(wfeK6xrwa)0%o@(9664@_$GxdAE=r8o%`C_m*)APlUg+s0e?1COOMqKh4 zHO8wndTUI0P33Ay0yK|WQ(@1HS~KzB-dc01vvPG7atR)FmP(l!byh0Rdh4vWl*)az zQE%}0YOCFs@zqXmw)d+&IOOZeBtg%5iiLb;y@QQLU%eyQRK9^qrFk~cn4Xyp^j*Py z4Gh<_@{LT71kc7DKAD+~PX5pO8g~Yk%75Dx)ZqDT_mRHLZ_dYN`@XrH;!tREjS%!| za*LMFYI2X!=x^E+YpU?wBcA5<-80!U>$}&D;QsI4X=fFheKHcfntih~vzq6H!OXkuM@OFxw1-WWDs~*3Y4Gkietec ztnBXdD$fVIFKj7O>WNWr^y#^%-JjiaNpEhj=Q5Tn_r{Xe`S!+HDCG2BvC$mry-GGy z?u)0=eftuaUO9b4 z)MIl)1Gi2s-!PaKvCeNWJz61m@OF&m@ZgaC@S3Ykl?X8Wr%Hvr?@yJ9 zA0D0hC>5nLT`rg8KmAGR?)~Wsl^3JapSOHe`BAC<&HqQ0_Q3rg)q3-zKfZu-(@YIX zXx~h&h2n#mIvcIAnXhE?O|$h>#=hAGruT!{#$AWUX1}>cZJKNHNZL2|-RJIuxn}C%`LX$qQ=FR@IwOPv7P_Jp^A@^ew8j^DV$C=I?2Tsx z{On8i&imPa2f!O)UPXF;`uhsb}n8nr-raxHQ*#cw%Y3GfMT(LT}Rk zKR*T8MN~9cgqI)MR)E{JZ(T!Ibo$JB|NS{-5N3%l~z4ymtrF z!G}ng|KIjR{GUK{`JbTAA}kf?cJM&3XV9iSfq|Rc4j$ZpSSrNx;341r0a9veo40J< TA|ftM{OhyJSXju!w_yJV&3G*z literal 0 HcmV?d00001 diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index a00647fae6ba..e4609891e07e 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -86,7 +86,8 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { "RLE_DICTIONARY", "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY", - "DELTA_BYTE_ARRAY"); + "DELTA_BYTE_ARRAY", + "BYTE_STREAM_SPLIT"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), @@ -94,7 +95,8 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { "int32", Types.IntegerType.get(), "int64", Types.LongType.get(), "binary", Types.BinaryType.get(), - "boolean", Types.BooleanType.get()); + "boolean", Types.BooleanType.get(), + "double", Types.DoubleType.get()); static final Function IDENTITY = record -> record; diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 6b7ffe17880f..9747e993c6a6 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -86,7 +86,8 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { "RLE_DICTIONARY", "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY", - "DELTA_BYTE_ARRAY"); + "DELTA_BYTE_ARRAY", + "BYTE_STREAM_SPLIT"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), @@ -94,7 +95,8 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { "int32", Types.IntegerType.get(), "int64", Types.LongType.get(), "binary", Types.BinaryType.get(), - "boolean", Types.BooleanType.get()); + "boolean", Types.BooleanType.get(), + "double", Types.DoubleType.get()); static final Function IDENTITY = record -> record; From 57fa667ca3d67ed2b0706f7dc4b2715dfad07e50 Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Fri, 5 Dec 2025 14:07:48 -0500 Subject: [PATCH 42/47] Fix bugs related to null values in parquet v2 encodings readers --- .../VectorizedDeltaByteArrayValuesReader.java | 11 ++++++++--- ...torizedDeltaLengthByteArrayValuesReader.java | 15 ++++++++++----- .../BYTE_STREAM_SPLIT/double_with_nulls.parquet | Bin 0 -> 6986 bytes .../BYTE_STREAM_SPLIT/float_with_nulls.parquet | Bin 0 -> 3754 bytes .../int32_with_nulls.parquet | Bin 0 -> 2069 bytes .../int64_with_nulls.parquet | Bin 0 -> 4913 bytes .../DELTA_BYTE_ARRAY/binary_with_nulls.parquet | Bin 0 -> 8197 bytes .../DELTA_BYTE_ARRAY/string_with_nulls.parquet | Bin 0 -> 7317 bytes .../binary_with_nulls.parquet | Bin 0 -> 8075 bytes .../string_with_nulls.parquet | Bin 0 -> 6999 bytes .../encodings/PLAIN/binary_with_nulls.parquet | Bin 0 -> 10740 bytes .../encodings/PLAIN/boolean_with_nulls.parquet | Bin 0 -> 618 bytes .../encodings/PLAIN/double_with_nulls.parquet | Bin 0 -> 6986 bytes .../encodings/PLAIN/float_with_nulls.parquet | Bin 0 -> 3754 bytes .../encodings/PLAIN/int32_with_nulls.parquet | Bin 0 -> 3766 bytes .../encodings/PLAIN/int64_with_nulls.parquet | Bin 0 -> 6998 bytes .../encodings/PLAIN/string_with_nulls.parquet | Bin 0 -> 9764 bytes .../PLAIN_DICTIONARY/binary_with_nulls.parquet | Bin 0 -> 11772 bytes .../PLAIN_DICTIONARY/double_with_nulls.parquet | Bin 0 -> 8017 bytes .../PLAIN_DICTIONARY/float_with_nulls.parquet | Bin 0 -> 4785 bytes .../PLAIN_DICTIONARY/int32_with_nulls.parquet | Bin 0 -> 4653 bytes .../PLAIN_DICTIONARY/int64_with_nulls.parquet | Bin 0 -> 8029 bytes .../PLAIN_DICTIONARY/string_with_nulls.parquet | Bin 0 -> 3166 bytes .../RLE_DICTIONARY/binary_with_nulls.parquet | Bin 0 -> 11772 bytes .../RLE_DICTIONARY/double_with_nulls.parquet | Bin 0 -> 8017 bytes .../RLE_DICTIONARY/float_with_nulls.parquet | Bin 0 -> 4785 bytes .../RLE_DICTIONARY/int32_with_nulls.parquet | Bin 0 -> 4653 bytes .../RLE_DICTIONARY/int64_with_nulls.parquet | Bin 0 -> 8029 bytes .../RLE_DICTIONARY/string_with_nulls.parquet | Bin 0 -> 3166 bytes .../parquet/TestParquetVectorizedReads.java | 8 +++++--- .../parquet/TestParquetVectorizedReads.java | 8 +++++--- 31 files changed, 28 insertions(+), 14 deletions(-) create mode 100644 parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/double_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/float_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_BINARY_PACKED/int32_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_BINARY_PACKED/int64_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/binary_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/string_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/binary_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/string_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN/binary_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN/boolean_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN/double_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN/float_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN/int32_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN/int64_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN/string_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/binary_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/double_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/float_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/int32_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/int64_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/string_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/binary_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/double_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/float_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/int32_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/int64_with_nulls.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/string_with_nulls.parquet diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java index 0a41b6b5bea6..1c8834fee693 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java @@ -41,18 +41,21 @@ public class VectorizedDeltaByteArrayValuesReader implements VectorizedValuesRea private int[] prefixLengths; private Binary previous; + private int currentIndex; public VectorizedDeltaByteArrayValuesReader() { prefixLengthReader = new VectorizedDeltaEncodedValuesReader(); suffixReader = new VectorizedDeltaLengthByteArrayValuesReader(); - previous = Binary.EMPTY; } @Override public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { prefixLengthReader.initFromPage(valueCount, in); - prefixLengths = prefixLengthReader.readIntegers(valueCount, 0); + // actual number of elements in the page may be less than the passed valueCount here due to nulls + prefixLengths = prefixLengthReader.readIntegers(prefixLengthReader.getTotalValueCount(), 0); suffixReader.initFromPage(valueCount, in); + previous = Binary.EMPTY; + currentIndex = 0; } @Override @@ -73,7 +76,7 @@ public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowVa private void readValues(int total, int rowId, BinaryOutputWriter outputWriter) { for (int i = 0; i < total; i++) { - int prefixLength = prefixLengths[rowId + i]; + int prefixLength = prefixLengths[currentIndex]; Binary suffix = suffixReader.readBinaryForRow(rowId + i); int length = prefixLength + suffix.length(); @@ -87,6 +90,8 @@ private void readValues(int total, int rowId, BinaryOutputWriter outputWriter) { outputWriter.write(rowId + i, suffix.getBytesUnsafe()); previous = suffix; } + + currentIndex++; } } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index 601c64e97201..b99ac73303d6 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -43,6 +43,7 @@ public class VectorizedDeltaLengthByteArrayValuesReader implements VectorizedVal private ByteBufferInputStream in; private int[] lengths; private ByteBuffer byteBuffer; + private int currentIndex; VectorizedDeltaLengthByteArrayValuesReader() { lengthReader = new VectorizedDeltaEncodedValuesReader(); @@ -51,9 +52,11 @@ public class VectorizedDeltaLengthByteArrayValuesReader implements VectorizedVal @Override public void initFromPage(int valueCount, ByteBufferInputStream inputStream) throws IOException { lengthReader.initFromPage(valueCount, inputStream); - lengths = lengthReader.readIntegers(valueCount, 0); + // actual number of elements in the page may be less than the passed valueCount here due to nulls + lengths = lengthReader.readIntegers(lengthReader.getTotalValueCount(), 0); - this.in = inputStream.remainingStream(); + in = inputStream.remainingStream(); + currentIndex = 0; } @Override @@ -63,10 +66,11 @@ public Binary readBinary(int len) { } Binary readBinaryForRow(int rowId) { - if (lengths[rowId] == 0) { + if (lengths[currentIndex] == 0) { + currentIndex++; return Binary.EMPTY; } - readValues(1, null, rowId, ignored -> lengths[rowId], (f, i, v) -> byteBuffer = v); + readValues(1, null, rowId, ignored -> lengths[currentIndex], (f, i, v) -> byteBuffer = v); return Binary.fromReusedByteBuffer(byteBuffer); } @@ -76,7 +80,7 @@ public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowVa total, vec, rowId, - x -> lengths[x], + x -> lengths[currentIndex], (f, i, v) -> ((BaseVariableWidthVector) vec) .setSafe( @@ -102,6 +106,7 @@ private void readValues( throw new ParquetDecodingException("Failed to read " + length + " bytes"); } outputWriter.write(vec, rowId + i, buffer); + currentIndex++; } } diff --git a/parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/double_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/double_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d23c1e6b7387132eb6af9acb510b62131897f9e6 GIT binary patch literal 6986 zcmch6WmuJ6w=P{u3ewUgNaq3;ti>u8u{uC$=@bD$x)qRar4$fQ5kUnM5D*k8DUmK| zB&7tAgYUP`k9~db+21~ApS|yEK6A`*-=m)20}a9|aZzx^IGi0hu0B>S4lX(_IsqOA zXMZnyr(?1{4?|c@clO(kV!eyW)Y5-Lv$IoBP=x;ZrUEw`5B}e;;QvZ;KBh~U`gd7TL3Ohm=%+~Be^$=OLg z+RiC^W!O4|b0#Rrv?6&&)4<`NC=gcBmbxAjEbq9()feT|m}Fr)Hpt`L=uG83oWX=* zXX%TNa@-B}Ygj7b<_6~#98FE??yTG_p6(a&geOF}b@|hcu!|Sdd-YD>7O29JPcZy$Q-zSvv@sxr1vkE%+Qw$fhI=v$9d60N?=DGT+| zgH*v^lrdLt%aNv#uE`EeAEN1#G8q!|X7Jt?8$gF36Jws-5$l_>_X>u0?TC`%liY+A zB8!}2#F!VS#MkRP<{$!N(h3g`(}gX`_+Kwfo0U=T}&3Qh$|4QK#-0BOOG0pM(2o4G$ zj`gh$wH?W|iCBZ-YqwL>#ewGN4e`1-;2l5r$&-vIe=*cR)0RPY3^*Qt~<9bYsb|MbFS2zmdp0P zV((o^(@Pp07SbElUS+j)FKv^1)6;}^v_)k1@81X+RH`VXymV*_qV+OJ-!2|0z!`T>@ zDR)ijfzW}Y!=+4}G5flq2{rd4o3zW7Qi}OgVd=MO^C?ftQC*K*cP~7C#(L}KTxnm4 z71P5x|CZG}h4^K$%irvli@!YVxBPz2gBEi1u+^v5e3birc|{ChnDgA>S2q@3-;WGx zj3bt6YDI+?^3H#)>13I|^WhshJjH9TM!l?iyya-JP5W}c>jHt|PBvpFj^m~L4PFa# zD+$Rr@TH5S*J969So1yw`Y8)MPZBfl%CdR={QGO54C;2FzD!MF%)O?MCupfxouSw1 znv|>ZA^Fzy-7KSR6z?fstL@kBGZ9-BhExadC{;x`^Zy?I`Fv_9BO(E4UOtw@2yq%~ z8oQ2X2cA!8j_&NYw^(!UmOHYq7B6n))zOpos`TNygB}q_fTuaT__>~XBvPXmPL(v9 zy&O00I6RqrIB)`rzfiGO~iQ`_M%TwB6?AY@aQ5(k0aqqvaCJe)UCJku}7jcHtT0 z_}rRIJ*BzDH@}0swjV|E9pfvB)3gtF`vfrU zx+%e|>)E40@WuwU;^8a14tGDZ$vRIYYqlSBG7|E`rJCR+56=a47DaxOlwxh#-hHoZ zX&`j-h2V)sbajwzYv|7lj}<GQC=vVkdKfhDx?w(kCbdH?bI?fb+NC}pX^$SYd6CbL&IjX*2#%aU{U1;&dm zG$Dv=dpdvk%CCxP4rLMbt6BM9wK|T)b-8#UN9_TxQ%YSRkv&=Vs)ZdM4~!@7d$|HE z(y1P4zxP*V;A60bzN_Q4xg8$r}D()%&X;~vb?R@@T+e(PQww^wC+N`G1<3QRAc*C&-{?2+GyM8^AFOG&SLFYI+Y z#}$F?eDxA|9P+sWH8V)v1Cs5$w+BP%*!t!znT?ljg>wzQ)Bg3)@G{q@ls@~@4SL|I zhjMw?Cv({*=SM4^<(?GAJbB!P7U%S?t=yg4hzSpkdU5l*{S#5Du4~i5Jew`O7ZxuD zH!qxs?jJuzKjnzFyis@Sn{u~kQ*N6_irvz@HNLVN&o*3?x;Z+FQ2x499&kPWr!?!c zF%KTkM7^0EoTk11Oi&N=)x_>ZwZ&}ZI0D0V>Wg`8z}GjRzM7A@NcOY~JLY#|8f^;t z)ptjcnQ*obKh7ss&$z$N;R%*En;K)G%Ysy;ov2!kz#DvNTU7zdb=Y1VLNcCiq)aY9S?xw}MH?{Z1Nm1@;%e+N zw%VWCtN)k1$@reo9=&s;9921Bj*r^yTmsLBUt>OblQb5|^X<$ek$l6c-GT1q_OvZQ zQC;t!m9_l`ici`_zvft;-iHgc&WX=^QdGG5s-1T)FYlaqo-x0;fSGUiO;Vfk&W*{4 zs2AJo-t7Iv1sj`@1Clec)YncjZ$Nl*VRd`2+MCN59IdF+@>)kC6^B07K<4`V3L)0X zrR`s70@L?6CIiqEhD^Mmn|&dZ)-}s1G6~RLJqWm*5vI4IcxxTRD&Q@UFffp zNN^x|@;kIcAeT4srpLo5mVH~48tEN#_=5d!mIon^rarCOwDRAp6nQP;bWO{AGNwe9 z)86ItshI#};~MSlRBf95xy(oL!*wv;vg}}y7bamL6$h*m*)#R-?CU)3x_Fhg9)9l< z<)`QOl6Oa-0(-xu=XHF!V|yc%fj)hlNdqid~z~IE?v02L)7pIL(JN?KFhEr=8H~Va7<&B+; zX5$hx`W2TMj0L78^ZO`2)a+}`=AD=8k~gYK?_hlzLf_!O9gxh_XJm&-hsXKf?=JMY zEiqAAZ?JKj^z89|nRs1ZmNH2Fa9O%9n*v(=qBh<6CR+T1W@R^<#vu;V(O>u0Khz;+ zrqpBf=9rhUg9QaDm@W%nW(IlE43OiDvEY)-^3sv;l^c_I3=`zP)w#V`vLl|Aa<{l_ z@HD#W(RQvy2y|s&bgyX?0(Rj+`=Z9a^*i=XI+iqxzBwYK^nvnXGlGUieizXyyG63M ztX&)b_{pogSo^nK^y)#qMGrvT3UsdPZqfJpFRnV&b?@e8!jDT?5bm8zUpI1uSkoB= zHNHf1NO|qY*8hl+Glai|0N><)bHCq3f8v_yCu7Kb$F|VN-5V%h+O|k$VI@f%zWXX} zN!~WMY_YFw6C0dzovd-*Rq|Lk8%S_U>`A zD%xQDh?zg+%a^|Fx^Uie^+8UFzb@Upra3Gy!bO>XiSZh1|Mv|S7XF21kqP|Hqk{mX zcNI^S@}YbC3{6G1M09Jn*z|sCno?;g zhfJAB&CcBj5NhVm37kv16q>BLrRE|Qpb@s9byR5{NXsCUq-(7I+`V4(HSS9mCi`tl zui$7)M`GfgPsL7%`JVd2ChUXLxC;7qw4Bj&_3?Os`D?a_PJLl>nc`OaFD68_M>a2t zmy>sl;9LxDD|^ElWUgM0NL_Y+-jIh%H3(mQiR}0{z8ax@jS{hiQ+*SXIoFdzzRv?9 z9}fP!`*}Xip{<@v{qZ)R=@lNP8uH8xP41+csc|bJiz)bx4w8ZYL7REX{xdNz2#~Fn z7a~kd3s}gx-KD=6*^Zx@UU}R7OBm{#F0H~k?A=HzxqQQqVM@>Ka?4_)Ess~6%;7o+k3x5Cl80>T$6kDD97mb5^C4@*QJq6;Qt$_T6br#m8jppLIt5T8MyKJ`}WG#<8sTQ0Sp~y~o z>G8_x#rD;-j`X|nd_yy~>8>jnI?vLWqr|AzoW-J;YipdsU_ML#DeQSZ>PK!*IoUHf z-e&urJ`&I#h)!KDo@?kZxi?D{hIKZv;~9nk=Jd|_>2&!}^K2xL6cm7;aqA81aZ~ut ztyPuo4&5KsE-Hcm($CWoL5`)$_z$Vew`?c3gUVq5@58MC`4Ouge&;10SQHJ@XeFxQ z`3RAIvTtO^JLT|J6kG;y&?)=lsmOC`iD$v#pQZQp1ZDM>W>l73sAv=C9|m{VyZPOF zn5STS?~2)%+)9zSNm`GwV@~NPo#RDmjUoiI&rVTpv*gC7y}c)U0yUH3m>`;r=Gkaa zLH(hEn5FwvXXj}b&Fa1hIvRf*knlC5(u`p%BVgRATu1lXYkPMo-z!hxQ3@dt$5XPbVP?p@v=lMNA7wV|i4Yae|3ghwTKz1-&eB~QUR4UlN zu0-rtLYw;ii0dj0>#mo4X(Kj1@Yr{RoLcHxmc=gawU@Mh?Dty+M>-1RK#eRrZil*sT18$frt5RXbO)smx*Gk1A7{?M}^J zjd07D)Edo7F)qPf68&yBz=rxTR6o|JanmHALAfjFetTOCUR0bW%kXVv>oOogP-Rej45WIVsv+TQzvd)aVN5|XKBUvVRtG(Ze*ya`f! zQzKH|dJ%eh)o;ni5Wn#>?m_PWN66ah4N|yO@)hUfoPu{(?IOp?hk@&$Y}t#Go>_Lj z`9&sKQPFvV5lz&_BdnIw0RRlp$dsbu;8&exBJGpvs^ zRnY1t_<0OSfg~zMFvTLtV7w{8m_uDhNL(8QRt1pIN?1iO$_PVHXVe3b2zqED44?@x zAsUbkMLAG7C@&dFCSWm0fFu+~0vIY0by24BNF4InDT#pRCXhtTMx10kiJ*vpaD&M>7?fm$k=Hdg z!d^5aD3Fy+&>BQd6H^l;NmauDt&0_ekpQpNFWNowok|aeY10i|X(mqn# z2qQxv7{g(lBsg9W#dQTpmcrrGkuU-oh{BRo)fAL2^_%EwU|<;1*&t9OB3?idYpf%U zAsHfAq54{6yfFz4V8fz`$|eX=kUl{lqk}hs>5(CZWFv?&7==|ef?}k}8VW{6WFlM& zficu3YZ@6C5(wH5k|AE#gp5FfjEwY^F~&G{EXDwhCo>zG5MW3c7A+4aW6%VGfgT2X zo*zZRLd8%p43R?@dU7X W@{00`yaEDe|LjjmL4iKoqWv#d8v2R= literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/float_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/BYTE_STREAM_SPLIT/float_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c16e10e6804e8d5b3155e5814218afdcde9c1d17 GIT binary patch literal 3754 zcmbtXc|25WA0OLrr9?)C37KqV&r%{YleMz%mn_-$oya=K&SWOJQS_r3R@_mB5|KIc5oZ~1<|zu)sb=bvMQHIYZa5Z4reM+)(X6Tyvu zBj6}L=GDbf93OMk^rSj3fl1i;44Z1uW@7_cTlNYY#@~r8sDjeOsn_ZI^ z)jWSeK*G-RL-75ju?e}39CTCI@%8!2H| z%!qHK1x0@?jVbfO?@#aP;d!Ur+(+$sL<)ZoSdy9(SU&YJwC8wVpMI_HkB5?d@(SBU#AXK5A_ zw4x7{?KiggqGFsCK{k|6dt<&xV>o}wX?vYxo$M0H^sK6H!(`!y&bvBOt=cDxc8ioZ z*{=--r9Up`kkmW79=GETNG`m4rqe$r>wFdKa;HYl_dZ+(!7&xh$Z>ZCVYq5zYsheV z!FgwCZzomt0uOJsB7H|Kdyl_wGRxfDNM|&q+`Skos@uhDWwFc~iiam}0=;Ev#CCSA)@fMYreQcIL-Kw{L#&HMP*4I^S zdDP>^OWamKfBkc6QTcNb>0#4Dua!Q9HVzzI80&}gsvlEsoV929KOyt#?*7 zXh`XgrXgWu&aFR|=Gfp@#?sEnL_1ne+~lu{;r7!kHjZ$A=SH0zB1<0<6D?HYQ&hFB zsCN^j#cm1WKgQbYek3Kad&_#K_M0)s1Zs0?j8N07^v0HW(dIESTb`o_VouC|OL>Qv zXjhx%$6kN;?pKGg^^{^V&PA}$h*pWHs&n6 zbWmi;ZvFlwD!oxs;9yFy4%#khY5B%ZWr4Ix2RPwGT3q`G3rLa5~CjYpBWY&UL*Tf}Q?8bWbcrmMi zZW>2-v$;%DzyFPLg`EbYC83?k7a1Q?V-9aPB}ayO!8lN^0-b|5DFL~L1CzlkN4o2W zw{9j2d^a;Y!m$`)qKnkUUR7M?2~uvEqcXXZ59oZoJIjV&)_UWO!3u16jG?#BjM<8s zW!rEmSee_wMp(FnJ~-s$B@FQ8>*o8{m2A9)bz4suwF{)DCIqvX|2a#%zYq1I;?X^> zGy&1BDkM?&r>5)yudvDKPhWa3P1l|IY>`G-4;DU@nsVx<6C(JXItSm|NlyLrH{XxQ z#vsELHP0OXJwqzzK3bX$?)Wt?A}hOS6qrKg7+;Hx%Dsw!g5Z84D;ViY+LlPy2N5{z}l>I`Cswq>Hr&< zen3|U4@1qg{%cR~EBMTr@lDz88$ZSg?5{;gQk6~S*pAf|kJaCp*T%BCCSGhF*Ta#` zJ&;Sq7s|)HIu?xQB0e8ejI{k>Zg(QK#$cMHV|MXRl+YzF-&VDh(x6=T)~eQw&?ZgAP-3;iWg$1C)hCEoklRs+b&_;%yVf zwvh}s9dWC+uf^_*k=3;c26y^Zd_?QnEOK<;`%Vky9Vg%3S5X-5ak$#ibCQ&)WNI#l z5w1`8qsYrNr76mPB+tgls?YyrTD><_XZ$}U+8zcnOmGujDCzgx6 zW7t;}#a4AoQj_WtE79Cm9u6O`JnU^VUB!+j+RD_gQ3nf24wTEdVKwsggW(iEj10 zdaCj-qe^eQFP8%bvzcbbDtC0kdB@VSBq1lls40}%DSoP9gsmah6xm6BKFU#3wBKJt z<7&g~kPx>xTp}mCuTFLU`m6P-Tz69&b^-riqWXe$ORZ0d+b#G^~WLSPd zRAYaG>X}L=gYvfA*4T2ps)C1T;;T@`A^X$gyiu0rB>fkKYgYX=Y^bcx{9|2dDa4)+ zi5W8)?&=*^-e)(A3??b$#TgYKk4#oD@SG#wPo9jPJ2QG5typVPQkmLV%|vX+o6Pn0 z2k2&Zw~EK~3l+)@eML7DXnCbN6#OB_dNu2T=v)dHDMYEptT2LXyw=t}5K2CU+40j2 zOp&rLZHaw=cc1YlOAjgg<*p{91PA>IZ&M5Edt0Lj7;%q*W6n-%I$<^G?4*cpMniLs zMrN~$?}9M1JEbY9r8pc83%t7vX4k`Ffhau=&OjK4JG%!29UK;f(sRI!#cBM7rc)sb z1c9tw7h(d)Ju(gh(J+7%XaFzd@70DvfTIDHqC-Ii2=te`$D}6!DahE913IJY(mTLv z0F@vFh}s)K01Sd%?S+69D2&DY1H88jaPwzzfbxGD{Ra-z1LZIv0hFYl)~-0zA|wTD z3aYo;G-N|~AP#|nG+^yw=^-2zs0{7_Iu}SDs{M}!pu+(aARI&oCP;A?2IhcrU}_fz zm=F-+0JjOHpoJd(TXVa+rfbjzG2jQVD+=5fJ-Am3x`W+HP+1@WitZi|s0>h{!vZaJ zhzCUI323LF^xygblR*5h2*7~KLyTP}P90PQaKJ5sXvo&srO;h44jmEw8v#F+U0sL) zf)LC|DJ2*mJjlu2#m5oB2t%-mGV(DyI|e(#cpq^hQAm^+4257rio&^(^f&@c@cyX? zJYsBW|NS2ei2{DhxB*!;NjOrBz|_wHgTX-VBTg|kV1*%3d>mL46GKZ?dIx;<46@)j z=>~Rg3{JqffW?Mkac)>`EY=Ns4oU&dW3l>R@*Ebc2d;p>w*sCP7%~SZzyek`kgp5s z0K*sr(}$KOgv3E{$fU=?(l{?`D-(CX(Anh!D>U}l>M%ZGf5*Up5SL(SCx3rw$G||p hFyT;_z#tDlUtt9W895m_Bnk!W*_{CfGXnln{|!|UOV$7Y literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/DELTA_BINARY_PACKED/int32_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/DELTA_BINARY_PACKED/int32_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..358a9792a4a98abffbe7d8545485f40a52f7f417 GIT binary patch literal 2069 zcmbtWc~nyC7C(q_odS(8Fj`Kfa-Dfvj_E;h%2Fs(2sIQ`tnb!u>x|!P5w9*Uu-dgXk_s4s`^_{)oH6;0Od9dMiXZYu>FVp`}#aML*%F@n7h&rk5J8zHff`K@_pB{c?dP(}|%1 zxOR439Pb+Nv2pw~K`AXLo~os3JW@0(F4Wzea@B70FYQh_Y7s39-mF<#=f2cEWdl-6 z2L3wqJKGioBrO{o@XI2pv9Yy{gT(??TfLjFNVt>ZSDt;WOJPSfc--q#kHdKDwMpl; zpxtMh2bGE+H*jA9FPK;FIMoz3<@NOEpZ)Oj=kJ_mQaQ`R-cGN6StU`-O-dM^jI5JZ z=FgEf+qc4Qui(FNG^WEASV090$P0`8`^E1MkFnz~{Uk1BOy)ym-NFO3^VO%;sT$Q; z?1!8?YGZjLqZ_A6JU5~g^dnf{;o=;S7xgeQV|$3Gwl9)M(cS2Cb=zIZ7TcHE6SYK4 zW=Q2o=)g~bf;6MJS(_~;cGS=Y2eJeiA9J|*NYwgKpPG+SLf_H@^en^vkoHpkj?S9* zduJU_wePden!_#PbB;w+T9#i}>f%E09Jb$a_`2GgZ?Ab`K0B=VxAZpwC0lUW+D#H&;?hj$s+z;ysNO5b50tj1@ZQs73a0yW+*j&O zm$b&b!n^))X1=<;KD=co-iC6hSF>DrQ+E`~CA&YZMV+WO-&)B~ zc>h7?DTQnc==2Cl5<}Whqr&luAGfShEYE1i>3LUkgAt7aoTukwyAA49t`duJ3I8#d z;pSxq4@SNxSUWMNdh6RWNUYp-*VUeV`72iP`35SQ$lhX_ULQ-ZGVQX* z!Pvn!`!0XnaJm|Bw*IB#y6pk+&@DmWb`2zVB|PAmA?3WT<6{dVzOnkt4J2#UfS`2AMD&FwvyXN-% zYG>+mMa1^>iL<^v;&<&K!0Q=CWU_x{21MSfK$pp!Kuy`?T1xJ)zYGaqv?mvu*3Q%oEj4YU>xlrp( zt~&G&ah=Xn!4$E6u2#8q$?perM&d|~)RUVL)BNZ{ohLnvb19p$FvzAlt=imO+MYRD z<_#61JyVfi{!#hanV$KHSzS4s@76uLfi^YID+u%6rK(O6Osr2no813pBGoEAclA3y zB1rU_5~}zEoKun+EO<#O_v2EhwR+V;PW`HFRyDocw6iq+%;ZSzj&>i15Ut8zB8}de9b>yrZ|2-F~cI6xy zUN~LW^*;TrQZ_HP>AdT1i?J$G!f)FByCV~!0Ma(l!7Dop8?E|w$lWq!Tw284DJ@j? z67qs-oNyYM>1p_esCrF_L6e|TwG=dpLM=1a( zg>4FGMFu^d4rodd7%T>B0ANuHm~Ai>%$kmpztC*bk}U%zhyOmpFj(2tQ<0T*z`-yE zJm`Hj`J|KON)ZN1G7n&|Xf=>b_VKq{y8-$Z#9R(5P!7_n1;Je)m;@5SK{pT#2VKHf zRRRG7cgrIe5cHIL8UMGJF```ZlN(ti^WpM*4|yFKNb=2VAiAvXlogR-HC)r>M-$kT z0CL0{4+mXW>xatE`xl67H(6zljsRLGhR%siV8)w<#>AM?Ih^PuokS)ljuri@4jyl2 TVP=8BVrBiuLjb^6_CxSD-7S(| literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/DELTA_BINARY_PACKED/int64_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/DELTA_BINARY_PACKED/int64_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..23270e0bf0f5649d79f00699e4471e0b16206920 GIT binary patch literal 4913 zcmdT|dt6N0+ke*Hd#0J`GSg*JblF8WMKzL2mpzp(q9zrIn22&hNmNApB$ZOeB|36y zTq1>XiB895le=_67q`Q>lzs`5B98Ezc;EN0_n-HD|9ZclwV&r%&sytQm%aD*vp$bR zJi}24!n0FU#ZK757xIK`A=}7+1AW^cdbGC*I3TmLt1ZEaOs=J4}m?Ynb> z|9i3S|1$X8AdL}nHl6*n<(0XkbyQ*Q0A!c7gAH zEI_FAor!`kn4N{}0Q)s;(kA&WuJB!(t|PZp2zbmHrZ|%RP#U}*sBDWx;NX!Z#+_Ui zrQk|s?h_(Ox-Qd@Zk_GQr@ZQHWRnwufBYG|jypA!zTIb~C5w%T+KeHJ9&zTx> z-(T)oQ*|vz&0$iN#twL|VY>mDr)P~sm$%7e7`?r*K~oxd%UoUIzUGE5bFwd9LWjOw z5Dd}2ggKh&V;O{GMMwQuNeSjB)X3FqV|C~v(KIdSNo0)anq{GlEsI>Ggz34xil6B;%Ql-5p3m-eRnzX zOUC=1AgsLGjJDd?Uk+iBbEI&3_uDNC#v$$3m1)R3!}=WP#@OG-L%(z3 zG$?Y3c0`Md>)bG=^do_j=h$~ylC}tS1Lok=X>2eYF)t8ytTLGcL5HtxN4!yH9x(Jd z(2?a(?6#RL&7Rq*t+4$weH8U1YL*koeERF4!`ILYt`qN;>g={gx# z2!mhFB|y>Z*H&Piz3vRSoVr|vxv-tp2HwyF9?2b0x_i-smcJ`X^e$$oNJlNS{8^8!ApzGyuEhS`0>3iO%Cp zg%Gg?!Tb1nOh8_eA?>+jo;hUv(jN~uU-@&;N6YGo*tC_!d|blY)2vA;1_U^hPL$P`+UGy^F1S~EL zA+)~g??164&&HMD0zHk-T%{-?Tua_RHB+5&%kd3?2BF=2u=9303i|J+mSW+K{A~m$ zvFDVUBp}(AC+{iM8^bIaCDwqK@4S1y*X%75hfMM zKVBJB=kV#iD@8Z?lNX8tF2gqQLMC0YckGYx?u_eeU#t zwQ(jqO#RCdy&qSej#>_dq!aW5cMG=UYszfCd`U*pXz1}-=M0zB8s@>HMX}y!&H9ES zl=k(NK9N@X#+CrLM^*y4ZQHZ)u=i812VH8bo&}rl^!0*7P4o!KPIyEUC12V$v83C& z0yP=F@W5#5tam~vnB~<@hx79?{gG5sIu>!R=M=EwgQhjJW#?-u)udAZjbP#oJc0p5 zjn05I8(sB~w|qHp=QD;^NTtPDCRIp6gL9>73CW zxPtw>#FiPz4GsnurkMvZA2;ixe_qp4T*iL*1<58_MzCr(>}}A5pl=&UW@lSiC>=7i zY%U!CjZRa?uL2=TDHd(c9A9$MW0kAuE#t^rA z=GTu$-Y$@{Tgr}@3aP6jMhmIae`Q#}%z~4tAewz~0SaDowH0-jNAa=v(?bSa@JV~F zPpaD#lJxsX+8v_a-k1P|ow-(MY%ebyZ-JPsGj`>&9QEz%| z=?OpB72YF4WgJc?>g6TOCma@vebrj}UEgU?RUdu$jDVgfq05r+0?2%mnGJTI>)xXJ z^?&OSF8A_}X^=M$8uO(dV?L_5_xM^nXms)U1^U`+IB4@XW*5r#+x3Bv6zdGIC0(^% zLd7MtN1M91w0Z`pd!&X!!o-)KVZo|>w%8GuHEbI>f?@L7yw$)C3GxG(QPM*^ zrOw+439?+4s|&W8-N5Aq{qf`J+HScooL{OF2j)Ivf3*0*#r<&oP)ZZaAjEw$4vB-a zHAvQlNC&7|+dK`dJRX_C`=JpwsC};81*GSG_#Zs4#A2zMvgG4pK6U%yJxl6R@?Ar^ zsUr9!Olm9kK`*uH=R-+nsxw<5dG{AqyJ@gpn{0o(!j|IXraIG}(*i0$t8DNv*d01g zU<36-ukp%~7dX-K%J__cjDB&{obqt#ccLe~zI)(%|Am8j9zU4U^{ekpIFM0)6A$qJ zEeh4Y@ySx7Eu9zWC<1#n*})dMu^s(*q*Ww@{kr8YnA_ycMRunjb+R}Qv`*ql>~CX@ znU#wVn@}-rXL0?c^?)9;~X>0z>0X%4%zj;r(%-J z$BfxO2$`vmxMxDUKANjfy)1VQq@i$U0NmxrSHdrDFGk{dRcYpU$paq3mCp4SN61~v zZtGA*QK2H(l)mUMaMqh5KtnSe62Uim_Bob(dDk6w(v`4kUFqk(-8{x^znhe9{lFaq z+wYrjQObw4{#etA z+K+!Kh_vP@cD5|xD5x#hM$&b!uKPoAL1Q7PKhSeSmp?9YL=I2oi?MiL-Mt!Q47s02 z8i@!qaMZgw3vvz{$HV5P#p|J~k6(Z?3{DLZf;;B7+0yxaS9oMdww@i;bEZ8Q;`}Cg zLz?CaFSK0JB*EN|P>C3)hHb%>RQr_)sMIVUE`8}z@nYJ+8M(u(ZI4n>QtQR#pwYSF zI+0ho)Sbn+njh6>%qzN#A#m`-eEJ}7&2;#j^QR2uTz=MvdiOOuVU^taEFRM$qcs^P z|1y1Oi$6<(Ci!azbU*8i1IV34;)*9)h1e(k;7(kwGx40hqBK!q#iWrrPGGL4y4^YT z-Y`HV)nb1v#Zb451-CEmVFUMQ96<)}vbCquH}u&+-l@OBfamNUjdmy%fHJ%upJh=* z+X@zGnm;g-Y1%XegGQzC9J<#3ml}xv>}3QEx6*U5mxoTTCZM@x9*^0!Cx#=9WLquJ@$DC(pv~s?U7#J^}{8 zoh8W1U&|-&Zr(APe%JPHBHeLGy#`LtIX)en$I4iEc&z<$%zD7nOGA1ln+43w>E@Oo zGP~&y)lunLaK_y6C7d14F+!)V)poI^Ur!FQ$jm#&I@FTFZ=)cxM?4ZHCK?`vM|IU) zl>E8L5RFNE=S4_NCe*7-w41N8z_g@)EcM58&oJ=FZ&?Vkou!i?ADugaGjaNskmMTg z!&8#8^wgyn9dk#6sLCV+;FRTZxOe38cW^mr`x`c<;|`5vqqbYj!qM5NmN*KI= zmg@ZDSb%jDO^}zCXdubc_5>rEhfT)md!;Yp=?1539^dbkDepPi3{%dB7al87)T(xK z6j^R|6*oAds0Z?~fDn+l$Yd1#bos7GI_pab2Bl-Ra*_EfX&*+M`reEMFz}8|mDpMH zNV;k``(SF}1(S$%xuAS|QVK;c7y|ng_5L8L^eB6#{)61$?abdT=?{y(xq!9iI5+V3 zb-xDE^~bxh>_d@31Q5+Gn1ko90|I#GKY~qPeeCV@bKL&}DI^T-CV~Nb<-+(y%VLBW z2nh?!fQXGri2?l0asQvCa}14C86lo(8h0DE zp`{FGG*o9a9IJ(I$x&%AG&0Z-&zKQ1%kAfi4C;~nFr3802>l_&;vlg&RqP!n_7jWa z#6EF9yx!ho@$_NKM=TB;)~fn{YE^B@uq$F%s4SI^8y+7p97l!F;m$uX#pg#)RS*?^ zm_Pl)Q+p@Oj+_zyvmPh*{SklR@O=LP)pGs|9v+~gw3y@-s}`kP{TtN;Gywnr literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/binary_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/binary_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b252f642ed6ef68106b2d6280bad4788c39f4868 GIT binary patch literal 8197 zcmbt)Wl&sA6eaEs0}Sr&4uQbH;O-vW-Q5F0CcxnC9)e47cNrjPAXsn+5`u=1CHZ!% zw)V&V*gdb`y|?e}b6$7V`|+y#X@InN0dRmX0WXaMFoz4k0iXa-D1?#`Sy28!Ih?+9 zl48Qee0r1M@Ybo3)dn9Q4h}Bt@4Xpx(R26zZbJTl$US%A@JK}{aPaU5@bG_LpsTB! zuJdC4#((iLK{&X-Mfm@-^1oFP;1T{~y^Q$_#{c+#L&cZtAN?iw5C2sEr}-!TZvq4) z$S)ue7cGjn1%Y9n4O087#YpVd9MWzhz8R4*1Qg~uGrYVcK}?!UCUHo!-daApjjZO5 z&<0x=zxKF@xpZs)5+rB>K_MyVNCn=w=3qm%Oa|~*bhg|b-ex=H^D>1~^--4Lc}oTv z^Z+0<;gH-fo$Z06Fl_Az&qSJ$i&d-zG~%Mw-;R)PEIl%W#mO@%odBJb1R=xwkP^)C z@~i;+MyqI}f1>EGb@zDj8ws(M2|EU;JP!~s3&dpyewfowZ!|$B`aA|nj zfTn0FyH+t*!csi(hK+;Cz@9#{K`0%R1|2gm=IOT2QL*jq--X5_D2!NJ4_*>PB{3pf zm_vF{4aI6y{YpGMvx#7i|3uYFep)IeWsl9}9Ot8b) zSYdXoCg;G{)acRUurVZPr}i*erVOb9@RjW)_6c6c_J|w>#`QBz$uxnhR%8E(PNiqo zLeUiE%uB3$ewu5?&!?pra|t17$DO_>HhXm`rDbZ*Gk-m6L;^x_^>0|z)K}u`@gocL zq)nqzRfs1+mwnIqjw6dlgE3lMQ?{Ip9vAuLRZtt=1!Llf6p?upVP}nMA`q96!as$G zXcRwIZ79ehYxDm^(0I+G>{jR8)~7MJMr{^01a+suF7CdKS`o|BiuP}))aoB5Q;$Bx zN|{m*WJ8&0_Z{@Wz13>EhRFxHV}>ZbB}+(-HkaUaBUl;^9QevrF+>AtjZs_1UJeu7 zxJ%8Is2bK>``qxQVM^{X65YapE{H_2JiDfi=}X@E{;n%1Uw~FRdX@}bVcYDe(@N|Y zeBO{&_UajVzdMHv3QugQR$Qq9r$Zq%a(#KSq6e8^W68BnXjZN(PLxTYLQZyy`_By5 zt!A!2ukT&J^|Z{B;*En6qD4x$nsJ(pp#-I!uT{Pu&nY}P8Zfzck(;;1Y6^Giz;`;7zJ5 z;ZP&&+Xy_~kVa^okz{U@htDd#Z^19bDOKfNZc?i5k)?KhFB-RpGnC=e$unm3=5Ys?s)<(U$ncptDiQ zo5Z9iEcffl&a@ws`Om+&f70m|%w&2rY*`h?X_3l^Q-l)K3@D1%WD)9R#=Gp05BG|_ zYMpT%M&@CH8D^)S9j(B`Js4pJ+FgzEn47@-em1tthwM&=d>A(6A4pZv`ri zIaRWyr&2Jl<{P(%}e=?VbLh(B>G6m zYZF(NMDrID|M~?nSHIcNt>$ zmiw&>d>za^yV-%Imyg}bOMF6}p|*^R{L`r{MrHD#eGr-+g%w)u(Li`Ttt}py|Q%$UzMbk7lA&trfuf*mFr%i$7u;%Gg%2A#l^Md(x{YOLeLn=pjv74U^?A}B@sLy6_sItseHlEj zW!4(lN=32wMeV=ce%I>wW%=E%{gFJpdY(JhHK|>hYQ^{c8R;|aL*{RvV_<5Z9r}WF zCemOB%pIp>%D#miSXaFKroPQB{Kf#3ADOnQ-Qcyf*z>gQ#hCc;;L<$HNlLWhAMjSz z-OY(>dB_MqZGCsiE@SL=vJWZXtVM`Hxbw`$WLQD0->!>q-ATE^a5`|Tx34?6HM|YL zZ0#B@b>Dm3TAc=~>W z-$a7fC+DXn8YL;IRwa-T(XjVr%$>0Hq@G$saR$hj_MRv#8z?Iza#DbhlfK>0^Z0SIT)H3tPVPz)x1tNTbW z?LZ`1)@nt`L9k3)BjK}Q1!!(|Emb)9k;3S8uJ$PvjXFvbDOE4!_-jE}iB5Nc)|(b; zP^VYiz^fI!p9PNk;qksYh+OL}qY(}yh)x9Algzv|XM>o7$7JrU13XC)uPC+4$w|$E z(LY}~lCh{47p@tADixFs$I;F1cj^h5UV)l|N^f!^3Pe-~C~z>LEANZLM3wJG=xa*> zGoe=Pt7b*DrK$LggBe@Sfx|;-c0XngHfKf>k~GM?^z8lw&Zx1O|D>|STX$*gS*C_O zex`dnP9gi8HBbxw^~FH`w&Gk+e=<*S4cC&8b1+o2MPndO7L~%Sspzvap+W`Xj~fD; zFSRw0u1~t_{H`s|x@-$$*JG>~p1=$?mE2gu_JxZk%-PMh8H{XJ%R_m7pE^Gj3|n!T ztb}Zv8b0qbP62(GKbAqTkD!+iqS6)*ic0{7F)&MzV!^x=Ta}IyZxHr9|JtNh&E;;j z$>x!(Me^qQp!w`)ZnrJxtS_irAYp<=fNpvJR)tHyAR3 z^F!_yCM$q}57nf%pLB#ClM08Ihgk)-Iv<$=a7Ob2T_d&7jc4hQ6I>ZpOj4xZq22~) zH|Y#=ywh{v`v8Z20&^K_U**M|>&pJ_rdn0%Ph4|8TtkZA17Ta=mf^hL$$p0nBQdST z8YWHk(R2k(+;Vuh-nUJr&>aW>zhnDwd^qaotMOgRXuly)Zd*6*q)8zUY<>!9V92(# z2>m6Pj!S+~(p7ufcDtklJk-fLiq7z?am+BwS2=;|8F74=*Z1VAY>MHn*EWhaiv8&< zLS@M8#-Y-nXMN*soJ_9t9#n~N>utH6NdWD;^wK}0>w){0Ron0@?_X!i#F*@b_HGbE1u)-z|Vs9myb9U4mZla|>Klm0&4eAPSf+c;?>+%Qf zt2c(_6Dy?D4}z)q>rR1xG(z|lu`RRYfeztm+gwBzR)FoggRkshhoz>HPak+;Nvtcx zRU#?O#tMaUVKvG1ut|P?UHw`MueWv@B?q0BJ_|XsP2*#x5-`*=WQSa-yj!)Yo=C9W z_N$nD6QSFdm|U!LcwX<=7ufy>J(fTvR_~(5vZKz>xn1rp1h2Xt%ODajP2QWCD>XG) zGWuIowH~f%dkeKwRTB%%o6(Kig1)CL>~}_j5p&iv!lowLI^913bbGk!9k)?B6+0RS zt6*?yAFh4b;rQ^Qi#fhrf2jFN6rv_sjTm(ixr2S%w=P);n4X8jO7)x?G^6zg<%5Oj zkhI*L0}8Y3L%+;-9)1tUv^7<<$2w)!ruL?hFv69#SC$f#8-U5rA$e7-7X4JW0eFn7 zvo7s`L1EvNrtmKGG39{+b_S)g+ zKSf6tpwS$}#Mv=w>|{XmS$b!ozAvA^;*7p9)x2yvHR(KB^Qx*0IRsI*7<)ME<}GAf zbUDMF{BLO?U2}YWQ^GJ7Y8;6i^0}*yh){ZuQ7jyTfQA!k zGJ=;TpXQHIF7L1sOy?VroIKdG=?RBvi&$;pptIy3r8!IxfvKTrfgH?-(fcVhK>;2$ zK1kpj`r>CdRQe5vnAq^_a>ko(c6LSIo`C3NKE_Nw^s%%%SsjrOrWKlM>SG|O(vC>; zVTj%amEzOocb5V+&K?A5Qjx!^#icRP;Vu+~Ik(XAIZ(H>vb!+6AsO^&pzK0JP-yYyaCF`x!)&s^{8S@@n;)3PYYA#*Fl+2C?zcVEcU_PB+aMn0Md6`0>;)ZqH`=oqxwvI%`5e$039HZi1kUXDAG~EA zX$QbUTdXY82g3nz=r&)vRCOp9dC-!&p;>*MaSBorQ63>OJQKRAHW_CT5ERiiZE2l~|IESNi1&Rgf zLft+VlC9%2c0R{Gj(;C}ZlcqC`#vAB;&WeC?$p4^OnL!vaJi7otp5MD74hrw_P}@K zYM-Miq?jE?RkqRks&H^gSCLN|yfqQ~Wr2ufA2G9Y7eVIUy0TtyQ`l*hS%y;;l7oy_ zdU$E6aRh(A0@k)=!eS9=Im5*An-F34p+oRPTEhsD^KOrG@G6$3QaY161Ie=V7BVGs z#LEU}7=15~__L&Kp!Ul|zKov+;N#8k^~a`UTwp!BrrtKLpiPNIt|_Dn2KF^>Ip9bo z={7PIJ>!0p5-Vxa8vM}2gm@=O;e}z-F}Y2|v}wZ$GAf;P)eN)^CuQ5$9_;0dm=8I9 z0;9nfckOohPS)UNl5bd|&HcJTu()g9)9afm(CQ4Lc+rZ9c}2p_+S-O70Vl zdG_k`-4UXSa^9Oth1Sxpu zgzG6*H)pj5i#}-TO{FuWlDK`A>YsZI_dfe-e~!TUJV8f7{tb4BEB2#xb_b(hZOv1L z4ZI6cn%}WyIc{7Y6=Op1E~O@i>k_7OqZFjSZ(CIt)shP11FSeYd$m}V%Fs8rfQYQZ z*loF(1PyiY)Vm6PC~VH~Si@hNTXxuXoHu=j|G}~lsF@U%KoNrt=5S~z`ep@1llVjt z?2#DlSDmM3f~a}*>B9j`)NNH{f7*A(+?bz%-Qx7E54eCM_BceE@`*AwJ3<2CqYP=& z0egwia%@ER4a)~fs#7krc@-B?2X@6j!ZjixnF1qhT^&5EQK{^WsTS!Q!&pYW zG^sQ)hrp5BKkQcIc%gxn57&{(2U7!goJ-3q1w|E}>s?eBdj5y@WV^Q3M53uzq!Ipw zz>$0DZ8$z8tds=qEKfRRQ=}b$?ewR*$RL5w1>hAsJGyAQa(G3hRe8f(5?;C&vk-4?PQ;i#KE1HOM~9p!uupjr{zr1sKufieJN zf9*=jr#k2^*Eo=`Ph99VtTOG&^fsIi`<@Nmp_iV!pORLZ>@2z(d6hu`*__r1@= zZtQM+uak63g=%}Q5)&y=Tn)}bT9?=97HpI{kh8>j9%8DM_ z_2(IZtNp0sc_%lN{oG+`r%l)6ht!PjQ5RZ=6j5%^c^2;4V^S|wrxYe%HFYvO#a@{o z&*Y1N=LpvJ^ON=PQ^<~wm8TnlU&miQq$YMw&r-H>;7+b-elz2Rpo*|s#Ojq?jn1|R zYG0B5>S_C!$#hb}!^!wgu+!vXBZvX(;p&5D00sE z4!x8~U~T3=*iP6diW2jVPNliP=0tCZUp)q()V?ih()VbafD-m7bl?yHDexfg%1PWj zoTVYGPOu&)-g3h>%2K)2ai+Qma1?wC$-UAIc{j&zMDv|>%I)G^#xI9pcgeut8yiPsT16$~9z-f(F$6wXsh#Nb6DM2ew+N!L5{ZT%BO>v*q1U)?-`R;gektOr6BVM}W} zq(m|Bgj=DavNyo;dvaw&9C&ZLAnTatXgeU&A?q$8ZcI5kp0#Zpzum85t=-Q*IDeGf z=3-L1mo1R2`;-N*`xdbj`8sXd=hh&*t>SYLs($1ufjLM`H)vu$oznL$p+MBfwriD|n1=rQYUgqtsh*(VygHu zWr#U8{qQ77*BhZ}*mPzqa6rBMyVPY<2^&S3Mu#91Yc54i9jj>}b#)=oGhFy7z(FJ5 z?||PZ7C`I56RsezU=>u!!B#OIy}%uEjshFT^<+4>DGS!>{ZuePQ@ zcc_?WB9iJ7PmR;FKM+T6l~RFwtBymyN$i;HZUK28PtLk!DIO?El{PKbI6OkRnLNoo z9>EYEB^W>&iMBK~LCdCS3Xr{m`zc?~3&9cifU3v|_BbyOjQ1u?CDIgWRJ6#>A9wM* zmFe!5RUI;Pb+b)Yg5_t~DTR$@3j+)Y3V>DLh`F#a2;e8O|=;sz>FyXuax`TX7|g0b~?n+Sua&r@Yv@q}~&&F>I% zK2+LpnSS8DFw*`d6F(h{sH~vE{U_VTL4`?leEWT{n|7iZO>$;_4}{AtX<%-P4fPr! z7*qRa!|R?C9=;nI%1&jRXXD?*IM>IVlKUUq?>{KzZT774w~AOT+BZ ztx}zxCKGy~rl%KsuS0lyo((L$%o?!aNIT`<+V7?iqxmG!L}1r_fMIWBy0#^1$rqC; zS}x146Tk2Eb}*5$UGAI3`ZW-ga;A^bxrBqxlGDW>MlOZ-=0n`;z2Bnt=;#+uOu6~K z(pPM=i~PdMGUu@e4#vd2&f=BqOPNg_YhR0StGpKcsPeh#2VzD|R>9&a^-_UOP zm*DvQc3w26Zr0lVY*7xJHx6!6*a{SA?;KI>(0{^WQ|2Q``x3A~0?hsHr+I31oQEgF zekLAhV$d)G9J3@dfVRS2*hbzCv6S)QpfY-B3D?=i*{C*DX;C8iZjMqlKOC$eGgeuV zO@c-6uD@{ItGQss2RZ8+lWZs)Bf4|m;j@|829eB2URZgM3^HyC8y~#ZSr#Q!O#n8F^fTJLsu*BuA^cC$2PJLExm$X&AGo#ny|gLs{}g z8c5*^k0W_fUY+DM!Yi~VyOfIK@hEqtHb%0E@D4RHa7yXxk546s7_5uqn363iig#dg z7El)+J)Z5hh;!-)`iA12Ddq3(r+c%xd6F@3y3?tvFJ}9H3(%;|PEW2Z*NL5;?1m); z!aF$0*MwA0YaFZ$=3heyP+}JZ?Q}K8_23(?CaGv<)G_KqzH`m2w>ZKGGy>P69Tj*r zhy{Iq51)J!D@71pX%ne19Fl%=XD>vb1xY+sE-B1YXpiBb%><^i0N9yruX19OqL*34)Mbr&j+gfq4+>)cAY755Hm!}pf@6z2N z@cP`2jjrBjenF51KpQ8+`*6>J3WEj*!c8H!A$6+bH)u*}wRXRuafJqdI;e_KJ!yIE zW|01v#7~i=oGfW%fF<9KWMox~C%nTcJxqLHdUckOBN(@;g3Dsj7Pcp@`)Aq5s_|!_ zZjH)2vHm$k@98(eEP=mKmRFv^U`hq#6xbpNJ&9pfMTPn>QhM}T3C`glrLCq?&E>0o z=Kegzb^lESdbY&v6HjMDSn0O5N)eJ)onX}01p$?r+gF@|if(Vp$Yj?BoV!(;cnRQ` z3juB$1I&!vZAxThI!E*grZkjrDcvF{15ZTdA)Hx7_`opys*`%CYYOOKw{8H=jL zKQ0+GQm6m;gvF&N(MkcJ;6^IBomjAz`&d#7i32Tg>O!7~-~v;+Xb_JPjQEds$z?~{ zM4WzaCq0u4R~@T>tqY8eDe>8JEVKO&F2#Z(%qzN2dbjYCcPiu@1b^eZf@T-$8RC8YZDY0yiDdla=1gicd*k7Qh+x7t@VBwx1I=70D; zA(WB_Uz!q=Jc^z0k9YmNRg~9ye;5}=Wu}( zKnf-}3IG<65d{bMUmozcK-?Ml(wNZ1|1U?`K#G^6Z=4rRapDRWAPSHv8L=PhJ- zAl1K-3<#w7*T3}tmw)NW{!QurH80VNxBJUi_^a~*&A)^HgRJbok<$P2|HA+Be;+OF zWN4se|G)Y#lz(+NQW|#)PY)j(Z%!+BcTNjWPd7grUmH&^2RBz5K0YoUE*>BS#mhQP M;NjpjUVZ@hAGeQptpET3 literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/string_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/DELTA_BYTE_ARRAY/string_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..599e72bac0211ad70d39bfbd52900b2355f4f8bd GIT binary patch literal 7317 zcmbtZZBUfg6<%bQDCMJ6*MTW^#3+dg*p!q`qv=c+C#fQhN@C;3FzxQLpX|c2u%AYn zA1xy&X(Wh(nn4qZkmN@vMx}|J&NN9&OjF0npe8fzOdFlXPNzTmk(vJL`<`>2a{=2w z?HzpIeeXRV&w0*s?~BV0PwUs*4);Ghte-mf)pB>4yU1Ns{f*}f?=Q~lsC0SlQBmP= zIDTmV``!M(UjF=lza02~oG*X=Y=Lv4$Wh^Pxt#0Ij zQnxC!!asg!cHgO=b^IoO0Z>6>|*I@jgwzXtO^KYnFyZ02zM#Ebda*KQx{yYr9n zV}H&M_Pu>*toe~QMh`}(0$-U3mKFzFTimxkY6`D^d9?81n$bD$7aD(gt+l@9(+{H+ zc0)r~b`OolPTV**d3$T;O!M{V>ERn|o~wIjMQeYJWBcaX#`YCYo*zGVea|xkmkuAD znZA{7YdbLe&eWdwZ{;q|9Up!Fz38v(5?y1RH^<^1$37VxI5~A?`owtgx_h_p+ftu? zYTI`jKWg#4{`Mb6HvZ-O%g=3!oi3VesGsUz*)&x1(=~-Z8M^U-Z*tXK*`_B>^_O+r zH}&RdgWXtXWYvY1z5SNl?Zn`0X5g)h;n{58 z?PGKOFTOJQOyt0;$M^o}y<^dVlQTz$-tG8XE>dbMI z@7S1qDf_*n7gGIO_W$_l$?Q{u{psK**hnbB@M-;jl*k-3bvFr z7u9qh_bhi=OGb#mb@k_4H+2@|=9cvE=*CF!pxF=&J$Ldk9-@;Jj{psE zOC*vy{4#R(V5=?~SsgI(`{VYYW6jF}dNCwTik&bkAqWup#@l)LWOUlYKo|S8JqVC} zLFiP93>ghF^hR~)#>ik%m!iTSrK1ao?B;5`-UtomG9>2KO;Hea!bA{5%tKV5;v{OA zMC(enLbqf!;h7ZcN+nwt1DRdO8)-rUENqVs9)>)zVn1|xQSai0B#}uhrhU#*ToMFj(lEw2~N7ZWpktZtiDLwH>71#p3_|0a%oI)WYNH-|8S;ZuTi#U|U0A+?` z&=qA16Jd(V^(b;G42(r0ylP*eWv&Gz5^2bkK@`BH;)xzET@kXo5NozpFepq>(O4EV z{LV*cua}<*7KoKja2f#46K7-spzLa2nM(0jhT4rPP^b_=p3*Uv2|W;0t{zyxAzS5g zDkpRgAcQoLA67eYmQ!LJNuVSt(Zva|jfABXi7nu*n}+A?tVBI7#ZI#%IWTc9vDG$B zv?|HWvW8bP)jF$AA32qpp)*hjv8ui1+^7O%#DyX?^TIoz2N2Cb@KS=OMR13oZVnmF^ z7$u~F4H*ndyTDyQ3DfdbDP%(p$5!$AIDr&l-8e7@6&|gd!TH2F~ORgDgsK`%|g`N2RLkvPSZ zX)w4EU2u4xJ6k!n$h7F=~IfxK10|9~R)R4*0 zQhchZD0Tx};%X%40>jaRK+&b)8NfcGVjy53N}^WuQ@Wr!Dn zDU)Oe9b1v!+IL!9)iNvTCwIS)saNtG8=jJms|D(glmNf}1DDhmY~(NNHYgal%;ZXo8yIaF}wp?i4QMo$dO?IaOz4iV?qTYJdbY)hj f1IeC9SLf=6h7AvGc&Mt{X72|D4#y7b50d`|MjSy26rbAWC9HC?jg9lI}8vs5G*(Z2|+{1pX95* z>OS3vdsp?@vby&=r>pwmRBug?HZK4UaOn4EZ~*3U0aySO01AarG9nAgZzzZJ=PpuA zxR{PN365`_8(D4f@!{a$!u~#6L6^Pv|L-Q`|EIY3J{%sY2!((I`40%hMT_F?K(a8; z2C0L!VkCAO4r%bHZ$@Mc0fj}$3@uQMTHA>BH# z3<;V3%IwlWDXM5l% zj@UTCGm&QGVijuxjk#zIb|MrSOOK6UaSBYzr$A?ALCDAfqy%%KJS)JV(K_1rpD6lk zy?tK%MnY^Ig3duI&qD;v0&%&)?-ukkn@tcoRmQiq#y0U;WUtr<4^(ANN4^p(>+_Dv znpWc<7G#ZPXILoDh%SwlOz<4e@PiTLm7@M_A0pfAV5z?bxH39xKvOc4Td$ZeVJV(` z!^Xj6=s=&@Ae0VDgN|Di^Yl36sM_@n>_OuZ6i01rhOP*rk{FRKEg-$9Mq)K;ekC5B z*+ej>f1>IoKdiJ9v&ZLiPV&({X`}LoYj}>uJCn+~3f>$#IP0}$nP7IVCFj7`)acXW zurVa)r1mmdr3|Y9@Kqcn4hUYy_KF+_R`)PX%PdM&t;PNmod#ysLeZ4uElO;9e^}_m zFQlcIa0wyl#GSn-Hh*;`rETWGvv4zKOael2^KV$vGEnC0^&<=Pq)nqzQ;a7;mwV6o zjw6dllQCLcORk)Z9vAt~IaD9nlf}dlDI)VI!p<7gLLe?9g?|na(JX$d+EkQ7*5Utw zp!u3d1zhLS)~`9VPHi4G40Wf$F7CODS{2LFj`nY;)E*ci(}+I8N}1LOWJ8&4_Z{-U zz0+>Gfhh#JV}>ZdB}+(-wvgZj6D*Ge4u0XP7^Z==#;C7guY?J1-lyhDRE=n@e`@&L zFfIQSiEe2~7et~|o?X+%^f~Y1V9yPdFF-3DJx7MFxMP0YWi9p-K5tk%d+nTjz@0+| zg(tRDJFZlb)3J~mxxPGE$%9O=vE)WKG%ME)C(1NXF(*64{YQq|b~D%S*AK3;^|Z`W z;*CQRqD9KMT5(#8p#-H}uT{UD%qu=S88W$dlUuaLY6*Ag!uL1@b{{-emU{ejVo(<} zklC0E7ebgVzxEGj)V<7mdo^itJvQXRrK3G88$O(1{+8bLq`^^?$$d#2`MV1pQ+kyT zQnkLy3hbgxA5zHabpg)cNWQbz-H&$0zop5a4>>KYo(+h*_&Hl~$8{T;;*%JXo2l>| zTSwEoLolRNS$8tfX!#v-8(wT$e=)<|@Ix|>Da-(fCyylyv+~jjSHk+rNGFXcf8Q=1eQsfBQbkSi?ufE|W!c(gSQsq!5?B5JL*_1|T zot0#6lZbkH<)s|3&KF2=v#rx8TXV>-qCx zI$mu_C@_w$i}w1QmKB5FFJD{6C5|92SNqYb58X!CedT@|0$+x5&u@2O z>E+{h3KAX2v(#2`k-xiC#HdUkbq+(*qp(7&JsJpaW;9z%cfu2A@I)1^Kf4frR6rGT z>%ywozsg#dGm(ZmVeU93(+(}{ zz`Ej4>7(fPFXLhOY?wXwK@$@ zZUsZ_t;55+HG(9UYhJ3=mbhYHDD)%>0=baZqesR1uc3Gk*t1wMzKH~{PcP0&G)q!a ztxF)IqG9jLn7d#bNxij3=BYG8YmlLt_b5%0|k%bJkdj1jZw_JHSa&WIVv z)Q(V`$^i3;{mD5lEbnqR<%n3s5D;0h09enqvW5r4B;5u0@ zuiq=VyXW`d3sR-olLrjlwB_2EyZQbQNq8031;Hm}+^g^Sdq0elkUN}D>n^xiRk~FV zZzI;la`0Y1&->9Yp~@P;=*dm%RBZAylCb5^n}dQGD25Wh)_o+HaU_x~Yqh52AXuTT zk?`5H1~fOjl`0;7Phs@BQ13`ZqmI%-O4Uy}`BD&8qT5rT{icN))a4a7_-YmJM}d<; zc)YJJBG*RCScD@9qBBAE6fb#(%YPf0ui-A3LH%6>igm_QI-2q`r1;!Y^Zhnnt4%eX(~SB zP{y`P;K*>A{rB0!t=Z9pBuz3eef!^mv+8UXKd7wmHe6eKSEwORpXlCBP{_Sx4c5y3 z{A{RjS8*X|FqJ2`j%!88ITR||qB)o+he`o%D*EI?s91sc{g%M?b8QWzyF+h-->t<( zk8N@MW}NlX6PUrKnj1^lzIfS$Ik(j|i;>N0b)>-WQ|E_*VJ9w=m5^;)!{=SbDPREe z$1)7|5%lswRNm%6aSgyQ0cHtOELxOetI<*74Z*(U-!xfuV-p`8GD8c z?KILiI*(=_&(tHZfxe-9sHU0R$27~Sh`miyxjQQ_>vZmVgCP^RFzjw=x(XQlP)&N* zp)2&1R5-FS!YZ)c^~4l_GnNWQZDZ@Np^%Saa%<*Btz>}-8DTcRR$2i(J_J@lIl@T+TL$yKQ=GNOJ znOyljs1o7M+iEA10NQ`$WpGZ{3->jvw&7>U>8@eYW=h5)ZtHi&aMqNaVvRHIQzoht zbjpVt{O+4z#Vs7fzDhKg?5KI%L@R%O**he4s2jW~mh@$x>ujE)re6SkvlrHeeIT$fa!ZUu2#>hLo-^xQ$AXX4ol15JEAblJr2lx zi^s!E6(T(Q_IVyQAC zSua1?XyGYhH{@+aX+5R-`Efa+&8X+#Z0z3=7&QaevWrw2+FwUH@d#yGjq_Xkpm1_p zCuIssg!7Q1`@oVq$MX(NqBZ1?)!4G9;NLq_@+7RY7h$P|Se^mXa=E`&f+qCUXNWa_ zaR)`{5OPMgVKRe$TdUz}7qf436Cms42sqhv$zmk-M+V3eGoax_nvUY7DWv&hlq)!{ z2GjWlBqtB`Zh69C+96h3I_fU_M`?`^L||$vSt1AXVf1x`CMd$A#s>*}MPK>^My20$ zjEN1;E@!;$VP{wJ?G1=d=3~s{LmyAOm(vvqVOpiBral3ZD({LkABE^|Qb``sEWzW~ z747fRHF-SWLOa(>i?jWGfbKpT;zq=qsZRvPSZ! zhgT^khWRqFWvfLNF^T8H)v3{Nax=T1OP0wNT~tz~8eGEBxdO$4^q}C6g=8D}j9o9W zPZQt9Uz+H&-oDRAtoYQQl{-CnI-6cV99%9WGiUIB+y(r6x;yk8y*}V*3MppCQIl)5 zxh@=9)>Gn>mffC={k%v-a)6lGwTB?{U{hHyxFzho#w^3B2FXFjD?Pfh(maNLSe4bW zW5Qw)X*tKl@|zT4_Mt=YLt4iOk@s$obMz{frcyqaKL^RN^c6BCbHvL9XBdAgkNCZ; zW2pYqRH2NY2H@k(@a6lKWL#i9yO#bAuAps+Wv&^d3MT7o(sIa=O44I&CVI~OCM8zV zv^Ds#i3#yulEMqawsUHSh-u4~6J%UE<)#&A7f#A{pfl9R7qJj>_AHAAU)+7rQ-JQF zQmV^yA$QpjIr(k9XX)JNzz-&Lng(jkDL3krc(VN%KaOhQJtlcTH164_+ka1pD$4m_ zdWm*qs$rX3UtWOiy`%W>V!S$IMA5`kv!SrE`Ia??wbX%3loO=rofEFFRNb7_8Z7## zr9Yj{kV*pnBsDPq6z+Zg#o+>h^JS8bg#0V)2v_WT>)bBJfcm<*|Xkv9Rj2>%3_C1pl37F;FWhDuE&fTb9GIq3EkM6iuRoBG@A_+OIlK-4s#l zy5qwkOccB(axmjNYhl9Az;1c=))@qS3hRoXb4$4j%#~DQ|FJ!+U4t{r8|gxIq~a~yQ}X{kzNQlHhE34fXNcUPz$Rk zu}Zg&zZ3mPae15g>di45OsS#2Q1Ux`<_g8oquOs$ZmAaOTccP;{WPgGGRMHtyWi~A zg9_ngZss|7_Bo*Ug%82bK44rF_FHbkPS*Q62tg}~7V=^Z#eB&?JK z?krC_6*Ht=fZa?-U1X5JrvlkkdzBfN6DC72*N51n>y@xZ)u-)i9Qv!*vdge4C6Waw z3L;Gw5wXZ==X=w9f)=}W)gyexqQah5JOa5rKO0(|tYM`|16_=y^`7T1q3#+exOI^f9}IjKzmMPci}$_H=Q+ac~7U>Twr^uKg_QY15j??5jE|9vP(b-dlEWyjDQq)kay=KZXM0h5LPGHOb~B_ zv5m7-?{rGSW#V{ zU~Xs!oeMat@haIpxPOosSx92gf z_IfP9{Z@igIL2qKC-SLWEY9k`%iD&peJh1=7`AeO$Gvr5ZIX|;ka5Q^o9k{CKuM~!8L`HZQOd34Dej2~hVUrC0Mbab z<>^UUHYGEF+%?<}g?e5Hj=%?0B~Dq7i}JvDZ^Bd}Es;hg%k2CKSI;|{o*p^1VIw!N zU9vJPKg(V@Y%E(CU`S8^tolmKg|!A6Oj68Mt(u|IE!-U!V*w8ym`7QK#MLQdGbSX% zN(PmmtA;4>2`{jV8>mF>t2@IM^820%#y8$=Aq<_pOqXrP6VeGZzeCLVP-)9$_JRA- zxLN0?O#DnRqKcv__wQ_5M^z@#iJkYsV4XyBn&iyfj<4fO^g7*pp*!|UEt z9==-|$}Sb07n5JaI5#Jpk_R8#A3iAOZS}74w~A#WIdtVH#yv!T0;Yhm_-a7215Tp4d(L`X^eSl$aX1cW{YAY0zDOs&3Y!JWi z^L8|qvRmn&!}>WGlyYu>(Y1_&&XUv3A4V>P_vS;~>-}G%4(RBY&rG@bzS7rhb4&cf zDl!+bhmI!1eJ5d zpl;UQ`D9rRTrdf4Qrr#{Xzvoj=AVpHKGNc$YHNCM3L=BIULeUgVK!+tIvXlmFn z3LLj0GlaInUD`$74YQQ-;h-{lXbacb#o4MgRB2Np`EHF-H9sD%A~RN5lTFEr;N5)Y zdQf-8iVt$pGa=bjJVA8lyvJuVwF@GdmAtg}Ajv5^_+nfk<%gK}0C&pkx*V@0p;;>I zwth(jL5rV~6Q){bs5ADsk?o|X-jy7y=AOLPdn_Yo!S`5Cc-=1$iOLOU@$SA9Ada3j$=l)tR&uv$yq>Mc>Hv} z*CNiTE9e`FcdnejcaZMQ2KFRl;B=?c&{)d${~DlKo1LCqS*{y9Gt~o23WRrbR;UT7 zp3yv99m>Cf5TL{^3fk*wiR;5RUQbcc%xYlNhkWCj-Dq)w5oiXkM>{FlNZ#Vb*MjIgq|{D~A5JIvqjO8_49>b|(3yeEUQ8!CWeG2*@vJU-W4JW}L8!YKHAy@+O< z5I61aK6G7QOM>c#6;tX&G17BA*0Bk9^;#`Ba(Cx?0unI>4kb!4C-`=9 ze6lHrXLvAdrW2{pr+X(?Ee*q`X^im%!^&J2tvT zzr`g%8USsA4DZ7O2PzC290)g!+=kSpf#0AdrQO=|hQ?OYY>1`Mj$~|IizmFxDLq1bXm)*`ks}zlri#mA*%r1huJ?Pz$GY)Hpk9sYJF$Uz zMDLk5!7PElP*zr7WMwH8kyBtxAoL`LIaO8aqe$touO&D~LzH$}$~9N74wwh>ls5dg z5a`(wcTPQBj9{fZI;urT+I50aUls*aXYXEd3Mzr$l#$7;54!ZIHt`a`F&6^Bn}f`Z z+-=J95gKpgek>V@rI0INgW30m*SGwds2c}Kz0P_-tOKPFDy7FS8;nKO;vbg{8>!R3 zcVKbpOSDn|D7cYI?j{#)Iab;JhgV`j5f&9ar+wS_$-5Quj)Fh-%v3PM+5Khe3w1oUqHfE7 z^wovTIy_>Putm7dg#G4gO@hX7RDEGRj=|Z|I05c7jAMwlA>3Hzx9507yU%K{!$rFKY)|S4#i_8~}?EffUKc(%TY_cpeu>0ip@{|7m_e=DW`@&CgA_`k1~b~ZB9cKF}? zPszV|I8qvSOHU6UTW?NlcXv)pPfxHPjjyezmm}DXhL4Ynhl>YDLGfpwrtolZnt$5x F{{yzCLI(f< literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/string_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/DELTA_LENGTH_BYTE_ARRAY/string_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b33d5602c82363e3dba6dee31c745cd95201c5ed GIT binary patch literal 6999 zcmbtZO>Z056{QoIbqt|K08=IdEetr6sUk^JOJtlN8`7dA_5+nDiL|l^LUM+rWJ#p> zO%+8g%2c+oVs~w)&CdwXpWs!N`3qSF=&CdKo^ziSw43HZo;UN}{W$mBGmn(DkuCl% z7K#149{ju)dvzl=8;i!G^O>*SOkF>*5x-gq$J=j5B9Xs^|9<$(kKg_D|6l(4OPs&^ z>G74B7tvq8H4}`FYKuEhGS%l-OY7a``oiy*_BPH--O2Xy`OV?GiA4EXriSyKo2)DsiiK*kUGlz;-Ou;dH~aOM_Y#SZr;q*E`O9?oV!L=gSvZp*D~Ts7@#(Ge z^zz#FU}rU-E+m^`ff6{o?L94oyOLuwWZO6Qn}rY4^BI&yStM+U*`OJ zOoH^!?)E23gIjB<^3$EocM{K6Ruc7#i;aMe)#*}x*nM}p{K+?47rD=B)%(eh8{2dL zoNO)RANDs3r*~J=eF>5t>`c7LmcN!9eetYUDVI)5%iU*N-okDz{iJlS>20q}k8hRF zGXFT<*-B+f57YV8=IL?D`?@&rqThON%>+7G_PuK1Y;tuivzpp_T#a^h9CNgF$ZA7| zN$%u8$52OKhc5$e?RLjT4PzV*wQ9*2*`P_Sb{Gy(bVUR7IFP2VPFzI@0VKYMo{lQU zQ8*lQyMyB(9D*#oFh{>Hb`DmJ`Fi^{P<}9R)3)}(rX-gOX?3#}S*h&FP>LJy3ns%P zi~>oW1Lh5>+fWN*%588JTXX?wU3k4y#@tod2rjM=^_Ni0*#OFBI3DRRT2El5epG`a zQgb=#AfHd_N6_$#KM%qVKS9KiVCGliS zfSKOLjuw{Bjz~=w-1VHk0Kj|>zV_QxrH6p;qxsqp=oS^#>(B^ekH_i`bXtm|#bR4` zHHkz4XT()CH%Vq5sx`r3(6x=P#gc$hy7>*rPqPWo!=d^~d`)XYKX5vLMi59!uofxz zj7;TK9OdOK`gGo7qE6C6Zc+ogwWD@qt7dl+MdS_6N6f0jA_| zAJaKP+*>Zpi7Q2TXq!exh4knaGNHQy?=?=D^|jd;A~cOhh7g?&VvUv&vNpjqE2&|k zxPb|ooS2PFSW6&V)Xd<=>h4nHvVs1)T_Z}mg}9v#!AWk}qKg!PuEAM@l8%!T>oGYw zKF0e`ZHNc^&}?$q_k|04ABJ#4bIi#S1-J zx*=3|5bNHvdL2wj(cA_a{f@xUm5M$KEE0=OXc_`O5zfSfK-D$8DwXgzhCHVV778N7 z6CG2T$b&%D>WPIMs?~UGa$@%g0;EBH+`T;WxEB;WtjJFXsStsQoSsn^5}}e{vK$IU zWoZW~>?})114{!C+iVBIu`#ZcTX-`w;j0AgQB#>2+eEnt#nyPu%_>O7At*94+%Sf3 zXOpb(9Y7&4W=m={mSWW-j>v(z*_DF>r8Y5gNw(BS^w56;%LD>IZyxz9F!Zqg{e4TLvtxmgNv}iU(dBX)~E| zvC$u>*Se+g8GZ;UODzh6Dr3Zh14U_S(i^_V^*UNg3f=4o4DXB+Q#SiO&Z5ZM)ikAQ3wLWGHPhbx7n0PQX+{s5;Rt?FpL^t!aW>K0A-Yq z_!?+)^mLJ80Gm}jx_t078f=rGl}+{+HA*e+m?dvwA z8EsE%M^ctvud@i!CgrQ7f-9m}?`^|hsDw%|o%`Jn<3EYS>=5i6x?O55izcYS1x3VttfK#_Uu0BY0Ull)uPPf0DjU$Fame#uHOX4S*-SKfq!OGiN;d>2K{SMH zGmm7kI;|4Dxzu#^K)BdU;=Obv9_`ke{$VBdMkF?yd?P;NReF_VNu~-|6nPj&9$(_rZrBe3+OI+56&3B(f3wN$o#mfm;Xw literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN/binary_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN/binary_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..80ce3ee71ecc85b5123028ed9da8e8d63cc77564 GIT binary patch literal 10740 zcmbt)WmHyO)GZ<1AV`CBgM@@2-Q6u+(%mgeNej{lk|HG`EsaQt0wPFB2qK6GA|igQ z^Nw$f`|JL=_Zb7u*?X@w*PL_jbDk5Aj;!7dauo8~C#dkpLq1J(*}^2pq`jGj&Vl(a zoyYZK2PGcL{g-!=T<*CxaypQZprD{c!ryY}LHDo!_kZF4Kgr#{a4spFhl}YvIjdSy zA-ZfRV(1I~MANw~pzc}c*4m4H*`?FJfO)xLu!Fi)g-zUC`0mft~*>YcNj$jS|BSn^eWkOEARMnvF!aE3V_}K79=`04v z#)JvLL$9fpV_$A_Yg*Yp7VK{Vz`BQQq40I+m5ZCC#!*?dg8eTYb`pP33kIODUhW6L z+M{{p#Uqa?5MQPTy$NNA8N4pV3>fw0X{z4TB8{a-1eYTQ5s&jd2xBcmRPu5&z&)>@ z*&Zt|`2O97yi<1t2&%u%J|@}NFC|{0Mx+Zt3A(Hiq-7nqH9*U^ooSVG;BnW4&8w5f zx&?H_>xy<5pmu@1cSAZie^-`y|GCSe1!yA1@{5yEH_;}_4}+p0kJaFy;NJJw=8$(d z-0#MxuLrmT3Q+?|P|fNdv-?hii#Qul@_H``0x^ELF|j?W^1o7T1ik8WQ!v~6PmPj` zd42aivw6}^oD9ImjW|~lD00kLI32!QTn)$*!3q0+fXYP03Ey!ZeAR-N?^>A^0!-P1(cj|sp0!eJD^T=k+IC~6 zmz!2X$s8MC;|ki-Qsctt|x z)BXPP{!fxopl~HWE7%=B&+b0C>Q?jQB&6BvzbT=4fb#S$J9CG{NN3p5>#b!7d3a$|)TfB1hg8NzgEid`|}B_cd4tG=n`7_XM->Zor{%>{b1){4r25 zs*<=c`O*R`AL+?RE1|ziaeDJ;7O2%tTRewtW}{~bK;^fYTH-?)S(b)PS>_fhwGr1! zu1A@0Xw$MU6i!F#`?jWn7|bc2j{Ua!yyiLMe-}{YlRH`HmT&<0+dwSMX<3l*4cDeo z$yFTi&&X~#07gQw5aEV?3`_Va6tNV&mB2|5&z`yP5v-f( zR61SfCrO0?wt)_l6{+oHH$eHm5eHcsr=6w*5ZF^5xG4OoKOZ{xj@PCh0m1T-6J@EF z7!wS(2{HdV)FDrT-U9gi*58VF6pL6DQSIG8b~LWjQnnhEGENNK#gz{~x=}n=#*zT0 z9v%1<8{hLkbP)xh0_rzQUHqE&mRX)Rbt-3kQ((cqc$Y|T7 z>0=FG@i~%*u!RP$GI4w%aR71td;&q=^xFT}esgL&qlv11%Maz^JyP4$>E{C;?onP` z2f*JsfBkoWAysCt6V8lW&XxXl2%PdFEPdnL%uQAIWbO`_Ti@y#b!;MA7z2v4KIk|2 z&c5$HU-e}L#VNmkyT6#^rMx;H6nsz73jjP_`Q`@9NU|2!XPvTv)5ysfg!Wzu-fOP%LEV`;!n2v{x^z?RxwE+L9R&sWGk0_bbo}HHJEC3-dw*`Q1n)+?65%fK+@}kZ-uSA&K6@bTFLwlwbn?K41obv*tde-0eNh32U+aa{De(FJn}cfkYkR*u_bF8;c1|G}*C1=Q_WsFq zZ@4t7I*fC^6**HQ`u>At{U?l@reeHw-L2c_=Tz_100CRa(F+oJFhqR&d^(W%J(hy$ z6f+GtWl^VYUznXr028US_aZcmg|5LP4Qzhp=L z;)O?<%GQqzR>1Jx*Lst4w;g4~RqPeqZXK+d46NbgQFgKURy0tEEknkNQ< zKa1`fMW9&C-fbCvCUjGiYS4*EjYV<9rM5kX)LunO; z1Gw)F?^1JUKPg@?k%cL!R8cn_#j!eFt0!Z-B@3WUp=Dt7ab9$hm}Va>As#46pMUry zQd}Lteho3#mXS|HfGWGT4^}0$Wtk+b{n;ySAy9m9Aj|2;#Mbh}P*SQ6#8CShIsFTn z(BiWGNpDNM2%%k0`N%@i`KKL1O&WuBU7nB|8Y#}@UgZg847m3G+Y$&Xm&H?ni)A7;RJ^c ze5kUyH1gxg+GY;G#U{MT<<<$@Tuy+r-A2B(h5YLRF>xIwIIskHwpx;woVdJSF ze3LML5%A8}A6;!l0Mk7f*BqKuk40!_tjh>Andl&%xPR(f;gLnHmiw;{O%71@jIUYx z_~>`Jw9$thC`+#M_Is6T!pr+Gh>oK4sd2Q63F|y(1|&5eg>X?b0B(Bzp8_IVzOxQ` zbnu>-jsN*VJsadSUksn4M6{b4neN4bW85P-hNSlCcOZ=Jw3_kt1|D=<=2gTWXQ`jS z!ueYMv>jBscH993T=|f<170>3^W=TcAheqDq zIVkO{g`}^oC$k1r+Xj!n0Kd3wABd>An{8F7v72rL4Kto+Z;gHUDw{z2{TucAW^rcm zKi$OW!C6ywPacg1E6{A^cqNYi`2`$CuT9#c2U=f|}Tf5Lay zOj3d1QbzU+m_gL?LnVqcWA%ylS7`F@veED0(48KUbOOJlFqLIO^qxv=HyF%~ohC}L z4H8l~xdLWsrF)=S;6s?)L67@C>`#8Uw!rF~Gr24<896#t|NK%B8jc2=MQ>3!vP*9v zSFL1UTc?3n^ke*B$3>R_#r}3%T{V9I?NykWmFR>S2=|amjf5QsJ50J6Wd2e;0BO5P z`R$_iksdfg+*3P_nTKc3Yu?Sw=D#QUh{=O-5bXf zhgw>S)ZioY36>UA>gF4Fwtg%CV26N#R-pMW7zOACF2 zuAk&g8$|BqKEX8U}9F#{$bL+jl5qpAOnau zMSTQ^nu)%KNQzX#bZ9Ti*s1o`5C?ToAm7K$O0HT@c=rY-G>Md~G9Wlf@ezZwK5}EU z|5JQuCOwV^peQJEfzf>}ZfYv)NoKd3y&qpHNJ2%{fGN#_TqZ3!XcSzxuBysbLPIV~ zx$ZloKoY@=6%0olCDXOS&spJ})4)Tp_l@Wj2Lqv09?dkIv=7^(BbdF1@d-`lI>{km zbxf7gzP`qu)Q=;C`c01;-z|CCS5g|K3^2g-mwM)$b1QDqy0ZJVI zsnqt7f-UnnSY_HuUo^RLfzKG~LsvS1qy_F0d`&o_!fn1%W##jKu9St*Q6sH!3Z1|F zq0#zeAcr@m6^~u^pPeR=-V<){Wrd#(9%m&5k4w6RW>8RRZ0F5v~+@6;aNh+vV zkdPQE{2Ax{J5Ma;CFENDetcAJIqPv3H!Lh})qw8cxHLg9jWtIQ2hvBfekmG=g|p2w zg4$|^9V$vSV7MmsVmsVuiC!8S+l=o~N$N^A)*yzd$@}~`9WdG#%1VG0yNNt!#@h`` z(sqh7ktrFl`Py~i(+4DCo>y`UTk!QD2+hG}jVUg_A~4zWi)}Abpq#;wOf0S|XzO~Ky<~8(xA*Pvxmv3fN5_KN2hvP)GCMcyG0Pm!0Z!=SZtT#4g_WjtB z!s#$!rGIY+ovb-epj%X3)PlN6nrdwTI<7Dk=w{7+6x$#%3i-ox(E{_^kRQIhU*u8A zFUSSqt@90`Fx)@uW$1khJtv_8>Jau`bl?tmQM=JGOt$`Hc0SS5EMQc~lRhB@DR%FK zNZpV{a{A0q{_J|6!c8c=!pSkdH5iaKx$d2i@m(&cE;x$ zaBwDT{(?>S5*K9N!4Qj5YX5lEJVIL(GJP{QTPX9d1YQMNbwgqnP(IT7)m{vgf97WM zzO5DFhJ5_SzZ;F}oI@Px%&+^fYI-<1CCF5WsR3W1;w^|cJ{#6yW3Gf5i`{%YSomgO zo_2IVFo!?^605~#@Sp(2bd(kYaugEg=X9Qz&Qi~~sb|UnK!aNp4qu1_)C@ID_k3PV zy+{I)tQ{BeXQSUnE}EEh?>&T`LiCDvy^r(X^zBYODxv_oVdXdFCyl{_|Ksr=)cbRG zvK0UXh7LD*z=o#qC)|XZijDS%#r+U4Yp5!SJlIfJc^&@|*w0+Owuzq8v5qExYF}9| z0zu27t{>Q8OrBRWIS+%lZ2P;d&KJ~EATzI^@5qMFA=bQyhadPF3=w1Zhgc@Gz$**5 z;YxmO2`;`*)@q5Kp!LX6}v z;_t`u>Bq8k0D=rp!tquw10Dtf`-%q*F|T2+zAbdU-!t6=nSoO`ce7+$^2$xyrOq6l!yF`zU|IVO zOK)Bs@j(`gLGLdzEOF$q%&ElzHiT?jXglQBmlqNEtwK}f^ula@+?2LSpa#e+D$gz5 zzkCIM;<_m$Ryk35&cdNRmVd%NbyvC}v=EWw5bkbY$h$n_XX zrzn#roe1-3>j!OdJ!FMO3X;#?b@K%Sn(fNunO`)6JlU&uk=%}aY+#0nsR6NeXVDLp zA>DW>)X~VpsnGzRR zU7%%wu6y_rf}U+nfhuv&4`Q3+uop04Z7#$D`nYYr-t$)|;)&mZz;HP_NI$2G&yAKs zdoEAe1eCV8OK;A_M|T}_Jd>vX$_Emy-fFl56LA-A)qkQj;DT6qj_?pyXS)FBo9|Gg z57*IVAsvG3oL=y9lQ+Yn`Q*KO5f(~^Wh3UH;sf<(=JMEAsHnU3A_2C#prR89FwShu zSbGpr+^4dF$yC1QvgkM1Ky{CN5 z8cmYle@OrWt!LO;LeY;w%bH;Ob*9aji=Ds@#glsExp{^ha`uTbGrvvy77v0rqFHLE z>aW>8vP(P>e@+Xl|KK>~7IL@pg=Zw-4tc8#Tbjvz0?a=I^%^W~ZuDue}Ac(lL>ncE-n7p+zCK3!CzgQWv_;KSg+HV-l_ZCW4}W@tD3} zZLIlxKn%vc8**H|39$AQNmjA0S&8OGO)AHYEQQY(m%fc)8V?ahA}$< z-AQ(Md$PbbCA&ZeI*c)>b&a8N;5CQqm3))HYWDb=3<BOPN}a2 zC)jsoRltT$8M<%RQN3hY!;Gi|Iu+fHix(5ZhnulGZ(WghDVUr2gL1CR9y`C0^AKis z*RcC_mki|VnBV6u2Y>AjrLU$dc$UEypruXiq}n6@14v!aybsw2MQHZUZ^!CU-@rb` z-LuNCOCg_t{pjUw8d(2&?JH>9%UH=n**#+X%MMD2!?DCTZQ^0TsPu4nvROp`kP=}3 zbhkdwfo`_lQUPAp2O>{D0(183R$5;ph)e3+h5V?iulq^Fg zw$J+j2Y8gR2`@}+i8?`jQU6$&9Xh;UOd2nW9CnQcgtYIILzi0Hs<=h(nPU=WtA2)-+FFF79^J~Rw}FfC-p z_zm(o-+03896ZSW;}Yg29rAa{n+*D=NCQEJ`bnCWN=0>jl9h>L`Xn$7{#xT(y0-ZX z_D`h--=;n=v4SJ}$gpTPp)E-Lr@7)8jPa89Vug@3X+lMo#wTk2P9^jqF!1d9;c&3m zu@nsOir)AC&=qr54kaZLY+)(<_g)c5HGJ-wD^9?5Pz?FfT?aGt**JmKXXF_%a!a>FsP(7;9T6U5AER(Nbi0 zGJUioOpV%g#{o6kzH_1q4M@}2=;Qc1Ha{du1(BzYGm3W$?m8k5Lgm{Py~JQWNSf0P zk3L=n<>ra2uQcrSUkUZU{Q|VpTHP(mHgbj249_2<0)pW^^fHX2tZ{!xJTb{_t$2q# z`NA@e`9Nwdt7RDa`t2iLu)r*t5n>iJ@`55Yb3p38&uuSDn2DvDH1-AqF(dunv6n$> zP*?SC=#bs~8?FF~ymnBr#}X$K*GTSc{5TATM+E2V9nOc2*Ii!S0d7Pus27EgZNT_8#YND9x`g;pZhk6aa_aWnkk`zIsOSnNb?1(=9kI5@a|H;yXPE*Iz0)|9&J5Hq9xC zW`Cvp=o}aD1e&+P1Xcw*6b$jGn)@h+T(yahu09O&)K9TuOv@?kPKIng&(yx@6)vnJv@kq<==j-iyL(qa@R*UVL!A&z zzcBwx0c;76c6g;XpSPVNClj@T(-;*P6O~j-Xj>FTnWY8CTqCe2TF3v zU?w7+aqKcdj+^UPQg^!L*Mp7Q&Ui{Pq~8)vMBXpz`&Vpi;s>aQLs=duxUl zdaD5V&ZeB|t$7$kH>s?I2)A_%X~LpyH$KPbQWqr6`WQTOjSA}Je+$(8YPVBBEYH0s z8Dar0nKTShjo4C~q_-eXqYfNnP6jxh3KC+mdh3bS0VBr*2hD~mJ-RCa%fsL|{fpmQ z^PnDswaSiqOhJtJ2ugqCJJoW>PYiW~97FSKODa3)UcA3ZxWG9J$53$g+JrRnp1{ug zTHe#mPi7SmD-(!baEh{f!+kbU6=qvXr%cpi;oxdAcH$WF&`*?pmZi?j`v`&++L;;F zq=&2dUmxm#_FKs)GW(3Ipd@!(VzIEZ0hw=P7xbxoy) zpt`s!*MwJ(L$c1+e1G9lGAU;K3`B@H8R|+Jp*9|l(KAkH<3d+`_&2_Z#pb(Hq&guB zad$zR$_*XRLLuV+cX0Q!L>Zd+e5)7?RG1C`1-WxC?qcK_)}(^`Z<=Ob!Di$d*|lF4 zJZYmKiDHue*}g3C6*)BQx|X2ZH1<2e;m49k^)Yh>KrMUV>PF4-dn%6Lb&za~`-OJ@ zbW;#wDoRRd6^4qmVaSu$dR!=BUX^1Rxp1E`a$8AW zU0eO75GW^!JhaK_nceHkrt3_H#UzYrJ?OO=|)2!-NlzlsPxBDL;TW zsHESZW9Q4xk|HsrnrseO>cgKk6?!^Mit9iBPhLH;^^X|y2fE6W1`qnT!Sd_-f z3}WS-hHIq0@7j=iRvc~QTwr}bnnjM>%8ybLKRe~Yno17~fodpkFaWXjN{2Q{gRYET zOBXEHJB%LbVK2A9s;c{%yUn*fO<@1gS*j3is(KoPNSm49E4N*NL(0sqmRNL+S8fm% z6UCP0;U1WfohOp;L4%0HrZsXy((oS?oAb8=<&8f>3~MwVNW7W`dFX!QcfvSA{$kF} zUnnR*7@Z2jDP!urEKVxRq=p6qRN0P^8~$8MxZMxnbdI`eHQ#S-vcD=&T?|@=!f4D~ zDag}_T^~2osj^jljS}Pqie8;a>?h zCih(GV`t@WRYRU`M{D0v{Q2HgB7;Vm09bo+Z-_1|2R1P@f?<7>zF)g!IbW5XBJYyR zzFaJ_mQ+hVpEUsmjSP>V!jG5ud`40&wB*o7%YOw{Z|TYF8GEJYrM1ZkvJf-FYnXg~ z6(uACWOXv4@1U6peOaeb9BLJFh4H_usTbDauNBtZmABA;7W1_$phK?Vtw!B*yHk_J$1k&e8L%qf8&>OMT0Vr z@yLIIEvf1Ed0^pHed7FbpAe`nYwv`$J$o6q$l&@KP7mqw5u-rK8%uikJ_kD*^5}lj zeaRbw6Bmf*&DZJ2k4saLJGOKWdrv9P?_csxN_~-E7ckK{hB?H)NdeYEr90MwdN%=V z&YW**w~aLsdCPkrMF98~P#M+kS{ym7DkZ_P%M1|IVU~;XhW(OO(ui@|t|8!^eSp*A zT-A{Fy$xotw(!}v*Ecf?P|Gsz=T5>nYmCFiRNozQrBCE&R6HUml$gGD&JON2e4o(Xye?lH*gcVxmy}*G~=u6ltZX@W+NN`G5J1i8F{KtZ_1eG5?LbCADMgGgms>{j- z%E~#(D#^+^$;vw+y>fE0vYN0bFDt7G+spR$P* zsE9<$A^phifBjHe&ehaJ&-s7bmnq0N3MHeLjgPm#gCDP*mlv;%kB?^{V}OH?uZyP# Zqo5$40G|LAE$yXJYt+kch|Ax5{tu%6c~t-a literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN/boolean_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN/boolean_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4678e31da17910d23a6f48f5afd6ae12d4c8a23e GIT binary patch literal 618 zcmWG=3^EjD5Ix5XL^`4u_+LY5C|q69laeL%N?E49o-xq(;c1D!Jfgr^Y;OY)m=>!%BGa=$2tDUkVBZ4wue4rMvJcEQnL1Iy1X=;gX nazTM^Vo_0kxk6cLQE_H|o`R8)o`If$q>K#Ep8>}tL4@tDuiO>%PEVDFfKFKk{|7g z8{x)pJS`Zw(!cj|^OGS`m}J#)$w(Sx^5TaKc*t19?LTr@N(XsQ8iq9|*cjNKM9Vp> zg!Y|3h6dQmP+YaN>6;D{b>`=zuIjkK_@GXPgB2Yu)sGCT|8at3w_@KH4-V3oB}Q0& zvPRhW@`AN@84#>-NL%vB4LxpekAFPKL{RyTJ+gf+IHx6A6Uq?B@y>E(y%r%~5nL`f)?EJLE~J5w@wsg%e^%M}|4&xD0zO&b;52EAYb_$|6UM z=I7nvGf~;tw7qE52&&r2wu+Stp?f<;D*q4(*V~>J zta@mM*t!z?(?{8O$yiZcKgL8^P}xlVP7*TwGj6o|Qz7c$-z3w^fZNRQ@J7xKWgXL$K z4WDBj5x-#gWyTsM@T?F{x%E;UXJ(Vuy8F1}i2s>qZfzb=ChvMwB5Q|KKFezsA<`%e z|7N>-lmio+uEFw1Csbs6c8u6_@M5wvdM=d-z2aYI3VG?c`0Ro0ln@PvgC>1VUO7QL z^+aY(r5R2HvPxsz)eyR5uIbNRJ5Y4(UX)+9N94r?yjxzfaa*V~u7PESTgo=ab_+P7 zEBocQ_+DLnUU_3jX9o*=*?f<6URvUo^_J^Lrra?{uFL6+aKpC6>)v>0twCb`4TB}K zPS`!=ciM2L2R`T2j;Ey(3rZ{~zN85*1BcM~1ZQkW&r{fuY=&+20nSJNVPf$w{-9MFb~wEyX3N|+Ckzcy zL^DW)o^KmJu~XgvaSwkFe%2x(^=^fs39-*U`Sp8Nj@qHdS3Grgz!B+rdU~Py8aR7i zu_F(fEzUUttGJ=nysoIsZEZk=| zpQ}hFcZ>=APQ%`eSVXRsqrT(`1VhW{Gq;P%aV z$3_@1>M^Uhr@_Wv886B6#!PHX%TCNJT8m(9?iU6Q4#*oj_l&y47UQ1g&uk(k(H-$X z{`HhKd|5HvKZ`g>lrgYhQs9AQfr6W5>+LacNM<5Y%?|NFf~L<2`xXDkzxerMXSlKK zR&>nJP{Fx3l{d2nnZ?yX)fX%=wOaaT@{9{i_WM~#%Q?elaF@oSYjgy5B{OPzm*YEc zl2W{iBZ@|X?Sg4c#0hLcdb|$oZEmXn=H;Nm_I31%<9cYkyJg{ydRH8c+)mo?me7Y~ zi%BMXjgeMMx!X-p9KsN882tjW2z;bo00g0yXz60>fY+OU890v%UK=>7~WRCYqg z$;>+|yxB-q=wH5Ys{w+))2ui28KX8xOjM-B1{r}v)}1X>c&)hg=Ht92{Bv5HC!WyJ zW3kgDlvf^4dlb2Z-`hhfV9$NoZybbFzkNCEDv#P{UDEYbLxj9h4_Dl425v*+Rc{z< zL~Kp*V*FyF`zOU~D%Kju{iZ4B4(mYZSl6BJFBouNHF7QXxEsVOn@XK3IS^Lg*BY^o zijK9aaaRP#NrwtmbEUpmBDGIk{K|1Ef_AU?pcc!#tWKVQ0p7%V1SR-4uWIT#n=h14x!n{hopPw@b3UKSU;g@8*X*LsdHm= zQFSIxZ+;zH8Ks1u(_xZLg#Pv|U6RUk!5qEkHSTIsJYadrM}Em`cT9=!aSQ92VSJ}w z^}s$lcI;S?w8`HN#e?^LhfYqA`b$wVE42ch9nC3km$INx5^J4KamF?ufwZ!@ioalHfgxV% zu%y3zHG$Zn7wLi-Do`2o*-^LK1I5a$up*i*1Xg`MyHn5|;qO#Z701`0eWRDKC5eqU zWBuQ51##d}+~)tqmVx2tj_#rhS>QhV*^u;%2g#iq**{JIuf0R=nb@)M>!a}!H(>^v z7cEZaE?}ZBY;M?;u+vR^v;e)!9$06a`6sbZ6|FxDh5Oo=SaLS;q^p$?rWy(p)k7Ir z9(noPTOJAmZ;|^3Q>`%T>G*higAt-$alMb7;08l7&QF1=kAEJ_HPnXyF69-nlG+@M zRBSwwKW+|(G~>#+g;W%pkrW4i+2D6K?Pdny_j8s-E3SxRqh`^k;3P41^v{NG$h77_ zzkQYo>+Xw!@pK$P#6561X0fYQ?&{Aw}@VGUKC; zP%`uh*llft@P+B2!!f4Feku2lX*CVu-u?4GOPWYq4TslNDzhGWG!6FPx5AZX&_v+BeJ1X=uy`S8 zg$iNTx67ZbkUF$*aO-AUWbQwaQ?tSrEf1fZQ;O1rkOQmU2R(LiuQ@{WB}EZ&je`<0eO# zbr$9om$+loW{H8|24MMcT#dscI>wkLTZcNSpm7}+Yl}95_kQ-LnsI&j=e0`B#+jgF zVOLY#eF{#ss9Dmjn5gh*Vw_ZzN1M2`{(QVUX0PvRJh)vSEo+{(RRq)U&-Nr)focbc zOY=uG1yT|0C0atQdrLC&?9kJGNQb@8&_!o>fT8NWxSwo?X0;_VRXMu2bK52J*AyFP zDWlV`haC|%y7Oi12NPH>PqyzE-})*uWL4XSTiJ zY@{JRjq8ELW+wcPxTv=)6YE zeF7dBGfZgiAmjz#q=w(L(!qD@^9?mKQ{4D=-zVR1CF+FQXEyQ7ldM@wS(yax3%EbB zBr%B$q2c8cA3=rWvjnrDeIDpFsDEnaOxT&H!*5b**|6O|x9aR?eR%3B^*nXug5Z^J zZ-2zD!OWgt-UsedK?%8-_K%(mx@D=MLr)BGN zJ(cWNKexe`@;XVW0UFGE3dDp0S3owS@$Iz@?%-Y+*23{qfs$=j<$${f5WI2w0GWx> zp3x~@BMtnn8#-Q)$H7dGZ+&m32O>8yTnsmmQOT$*aocB&TVkzL7Z*p+tQgb=5i$f8 zi{5asWTN_Tw?wY2D&*>m)(P=(uyXHF`-DgCC~WyGeESvyRh_TwEgst8PQY7JCk`Ez zUJjaq${YlI(EB6HP3ZU57e7yqIl;Mgw6Iaz0q?ZSlRO{Uq3vhvx-5G-f&(_a_~yh! zM%AHfml_%1Q~B{(nW+dAYwQmfVj;9aq7&Ucy zAX9ez0OO<*0wvP5`ZSCnbg{L&zt9Q~!_wK~OI@)y-Ej6!IRyedBfnY?IU&P6ukd#X z9XZ2#A6Fg(1m_PPuscfZV_oKOl%zRGCW0N=iW(TGpWLEhtq(@{(NXGrnnAmUb8*$DMR)J;{wT2D|yzoMlBTq+s?`=s>b0@f-CA&7dSiq&LqwTYd z3Z&+)%>;+&qk#NK`ue0UULGoVu66Dk$#=ign@`uBu%lG$rgxY*v^F0r|5M?NBX zVzv=+r`J^09odhnPOgqbW-36@4+A&HD1!{_Z8eIJLP^mfH z@#F9^^!j$@wWqNmdC-XK^B@IG`)Rv_M;I9IdsWPHRvs+6)78-!cL>Yo+{)T60d03m z_WSPDSeq>xvew@UdyYKZvGf%OJr!T6)mPLJ{$;UBxEz72@1{SjZS;VuNtH=%mJwbQ z3{-wv>4Z%uwzeKmv&U5DO;NFACOk!RjrHp}AfJ+*y>q|{OmANgwI%LYxN#fp=x!qJ zUfa-o>C7qq#j|?Ey=T>bcV+bkb`8)Iw0&n^T#+>of#nQs3NyYpt@Fk!la8#j_ zr1nN9t@(;8G9!WqELIXY^K*V_i>fiIwfBUjUZlb7-PKq5*X*HJ`*Vkl4h@fszxD`7 zvvF{q6E0mv#6b$mE=TuK;Y0otxQoq(PqDGrBf?)zT|LhgXy%N4$!h#tFN_A1H+$xd_Y!roZmUd1iju|5sdE%3kK zoJa#-Yfb&iECNr@_M0u@cZ6W*UG3~E4tQibxh&zG2&Bj_(!S^0L1xm<>y|u_(%2=j zd7&xX!q=;rFDB+aq$5{}h=&dbd;DQTz9TtL3Y=|p&sur|Ukv4ZND(Er^?xYtX|pqvC>x%dPRl=7E>F9fA9u4kUh=C+0bu zWhc4b06eEV$GM}O(d4U>GY9-F8EIz}9uQO;pEK ztLz?w4-OzDl)tlnPeVajS!c#}M^t9%23=yC?n zf}dYXMpoNki`e`6xDZE-8GKT@Lc~$BuWsFL%4cF{M%mAa)8-gy^^(Z5nf9g$f+p|*na$eO5n96P7V0%YvZ`0@2k%7AF~|X4IPUQh;xAXlzsQJ zPfiFLeX5v4CZi;e_s_vRXILIsUo@)aj8_}B@zpN}j_tD6+uf@N-$hAV<_J6Tx_(m% zcdiR+3xmbtH+n#=Z0vqqlo$^96?~8@q$8=$Y(rEL4L;GUU$?v3z~sH@qZ7GQB%B(5 z+?>jQ)@9Rhi@hAgjh=2jb!rKoW(pR2$U8zncd5Di9#>p&qW)P{$HdqHpN$`Q>@jvd ze6O~P6MWL8-d}oY4*`#3-_MEAq4xcJO+JBlb4L2Socq_HAzZ0#S1bh;ypd+3W*i*< z-D~7dqv04<`b=9<3BPJjc5!ARH2%(_-)>A~pEi23g3AU2_n#j~s~}rayZuVDcuRBi@uC1ba>z&lPfQ>=hvblF?M2ojsnCN>gD@1 zF~N19TER-f1lehH5m`e2_AhA9K66?dcS|f5Rtr<`{vA`^YriSV%rlLB#V1I8`i$qL z@(g^J;onMG0ap=6~UZb>2ISV`fx_r9(O*`2vh-PjI8{f0Fu z+@{!Dwf*a)A`_i|64ItW=wo$Bg|?`s1I~CgI_|7C#s#Jo$?w}2BB|&Z&o8WsFo#KL`a5}5LZ9Bg-}?3x9fiaDPAC>G#UA^y zCtP$lh_9my`#_@aP-Q+;NAiQ!{vxJ1nAq zJ6@fng6C(zl1V-~Hg0jsFpV@jqh&VTW zBoJ}vrwf!7L#$-n3H(-AyXG0c5(I;^O9w8~A$f?q(LYlc$1BT6vcpz^%rfI_Ch(rK_}$%$%xRpS*SY8!{#UAhoR{$s@tkek=n6nwp7vqvvP;;rD-zYLX&#&vz(67 zriUu&p895iS3a2nLK@h%~9GGCJeX^~T&BfFXC|J3VUl*Bc=P4@vb!uyWyjvK$sKW25SOMD3kz zR+wt}u`aL30f!b=mPV~%!Gn3Lb6!ykw8G9e=PK>7Ci#1z+yg4MsIQ$gFJ+^k?!uw2 zX=Uuyy!$$ zbDA|ar%Pz12$yT8uH%yGaC9#lK@?aKKhS32rV4AysZE-c5dy zeq75tM5N`U<Zr#V@!V6)i2>I%kk z_Ei~yEY23^%8QPV_Ca1Wba=>M9$#e1|4?ORSu9rg^fh|$#kqI?cM|&l2|o9(-Lo35 z+WQgU*$UGHRg&mzLabFHY((=h72b?|zhtmBMd6E#KV9SHK&zLH{2D*}nl!?EEM1D% zo70ifuTICtS`js`94e}fcw?=LpTZ5vRBIKDz3WEyz8K2vD?`kzOgbE128HjN(SJu2 zbswrA8gGWeXAP7aIb#K{A1of~e;j?(D(A^bbHj3=&EK?@sV za8cG2O$rxqd37g(v-ChC`ziXC8XS}>p`R*AsuDK%dL*5C?iqtu^@8?mctN^knAtU< zh!3ALq206$H{KM$DenyQyJjGpdy;M@Wl_TCV0?Jy&Zy3x4<++5w7qZw^Kl=3H5_6@ z6moHGAO(HqtFTCB8#e!L4FyvjxOp>BHC4v)J3Fy{PBz?j|3Z2_i>b3+4EyaXVW0Pk z;jTG{dzNf?+_**2SvSe&&>0jLS;8qy6)x(95DHdd7TgRboxjV_w;~nny;V5ALKU+6 zG?_iM4tShiM~WlO^!8yMJ>B&ygmt$-RLl$a)6{8Kg9Pe77U1>u6#5WahwP*%Y^ilb z(NaEEC@7Kg>>^aepC?&=KK2>;qFnV9S{;i}dbkr=V~Xgz--frxWAXc^VoFO#*QpOf81$I zDm8)|T8xzOU{u(N<9Yl5g^QIS^4@&tDtf|J#-8y$Fig>lN>Mde3Y`~JVbz_36N3)0 zKcI!0)s;A!_MEzMYiQ`ndE_@|p>#4A_FB7$Usr|0X1nob3b-<-2MhME!_lb%B-R|E zyX<9Xx-7#~eEbPYuZC%-O$MHHCt|rvD3rw(VXxU0Vx}CaS2z-0pGtA4dI@Y!ZlkJA zmM~dZkLR}Wlz8(Ni4?~nW$$y|)|iv{`R+E{+`Eo03KwH@?p{Q#zeDrh4l~zI?SLys z1#X`ZP9Z9?_iCy%ws6F|<9-^_8*J*iQHEw&SLE)VjCUkwU-XIkB zg<^5CVI#I%?7^e^jgTuS!-6OcI1>jIS~*yL#0+6qm%uA&CWgghsj;O4mbvM$2@xf+ zgxS;!Uxcj8!Lq%#nXx7X`0Xo0$eYLX&~O$^7BoS9AQgv9`S4llkKxqah#ENuzQ=mJ zJsSfWY)8$GvxsHcQDg++(&Q-`nz%u}X+})dz##eNMbbwJFIbwEgKhj9#uGPzBi(_9 zHZP=~76Vxaqd}14c%1?M?EDU+J};TZCItMhD##?2wGLq+fpY(juXCr-2S zp&>2Emz#jkdsjkVA%px|rKtS89qdkYpswl|p82N1BO@Fy%!<&^6^;2re5Cp(Am&jR z;udy8xUG-$E9;@yvjKJQYANkR0&?PdV5CsO^nQ%RpH_z$E30JYz`|rSNw;A7Cvh!+ zPg$0ZXf`$gi_?TE;}684?acFoiE!6VgWUZIGM3As=mS5J_UiKpuU`Vc+ii%iF@UK^ zCXTvQVz{7&X_~bfeMVo&K7OO^xY%d0tB$?Kif_HIO1}#h9hVe`ea$;Ih-&Ft` z`Fo81(LOvfmPXFea%P54Hg=zyNvZihjNv2?c~5!ZvkqcoTQ$PA-=evL-6%3w!p!D5 zWTBt|nsS{S`R>OF!Z1g!ZI^dC1I{Gwc&=~hV z#VcMS)46PzE)eErTpl294Ij}8c}&&zO^A@PMu}%9V%2KNS6!QKmBwLD{v?yIwHN&c z73jF#&0Nl?h3q;8VP2+4dFzQ&FHS<{unT@ungJEh7`%#>%@!jJ}xD;HcJ1sSk zi8P1ap#oelszQibo&OxmEL^lM{-FFSc-QE5^L3Tx7?XP;uo0D$>outFh<2(~)*qk==|P1Ye`lg6IuhGd&1r7g zF+BF=V3qws<~3`G2{bv0T3;b_ot4AvfDIUt%R#@;C2A>;!*rReY!XK29^WRPeQVL{ z*@vlLmLW1ql_XPik#M5|h69%1z0HT3b_wFz5|DqqmbQK_!=?cdI?T_(7pZ1?|2PVx zDwdR-dza ze>Zwk-_zx@s<=OR8om$ssDIl;ca~)$Wpg31VZ=NvsDiC^6gcj4QN(e=;;~9h7hiPVWwdK-!@IDEP+n4!6UC-+++RP{!>J zAJ2YZ;QCO5YJ%Nq=tLKF2Q8)o2N!65ai+Nn&w1)8Mo`-jge;?SS{0TAc{eHOF3p9$ zN(j$^*M^gs!l-1M!S%E?6pu<%Q)De7L`3ObxD}%~eiBg!J~FSm6%jGa!cIFYG7h_d zS2cx{`zRiVixVKFSO~T-XZb3Yy!ZiUw>|rv*upHfq?)k2sEd=o6Kmd884j1jRcCS8 zk{mU07Uv&3d)hJYFI~Y68|Xo&5Z~gOmgJ_#JTf_EKKCQcFimhr{Ix_UtCaVwnrN)c*n*m`k1j literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN/int32_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN/int32_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3e1dc514c7254e8ac87469c1f64b12aac9582396 GIT binary patch literal 3766 zcmbu?YjBp;6$Ri!5(7#Ev?e4NBm@XDN`N$Iqkw)1s6bT+rh-y1+=5j@3_)uVQMq`b zl>jX%R3%CohKhCyj&d&q8L=klaieR5Z+L`|HN1Nf~yzf4HueJ8t z=lg~XOO*CW59xCY{ku4Qdt1+`>8aV7DQ&ukkeQMaLdd91zVN?Z&pAhIW?XxwA8t7M zzyF%?{}eYI9i1FPoAeN_J}-nx86o_=MF>M^+a~s4P6#cWcRN3X2hI#(vz+s?La5iz z>vD?D4&j5&Azaoqgj3E}T@b>?vqE^Q3rvkSXs@Al2nFYca9hg|E^&VBj1U^t6|@au zZf*!8ts{6wbqt|U?TBt6+?*LgCH{?gTjI}kJ}fPSKB*zhlGnoXN%3SDAIX2eLkJhq z{uI9b;%~}1LDMnM-Ra$d_boY1_DZorXYGF=hpy@LH(3{Ehw!-mu7q(_rx04vaw827 z+26zQsIxEQdy5cK+K2EDJzPlVTB7wjNH5sp0fu!;~jY4`6)Uk z$?2fChk4ir?kIRYXqzs!#y*zDPhor^|64j|Ip3}K#rkSe|Cr}L;LD~z&HDyCW!}%1 zH%Hw?ZJK=?Lho_fciH#C>TpH~2l$jEHVM{N-Y<35pN`+FsY^0Tdf$tA@Yag;H@EdX zY%O+D&sA_H;IDGFLHhd;cM@{3{+RH z->u^B>itn%w>Y0}J{#JZnVdy9PUvZa^M!Q0EN3CV$GTUq;5cA*^`2LX&%`%e&Ha2i z%ln7$cd2=jhTUe6YIalLALDfWDUUI*5M)zY@-7d3*8o(@!$La@Bule(i95$g3x4?g^ttP4soNy{BHs@NT2E zmxihGHo;p)<4*YTGyI)?XTr$j)gGGr(ol@2!uzv4Z%_Ag)?&K0sJjpM1MWdPGq0!R zVH)!I_K})Z>c3Gl4t5{)Y5KUF_POeQBzHBv!<&ll5^JUP!^oqb>F}bj3*pql z86dYPe&)=hk;eYGGh%N5V?STRxnF)!Bv3m|ZZG>t_q|E4pVF~O?Le{7c!u(D3ElJ6 ztl|4Q9>kdpRQFSJ`6JCY=wm$oljW-ZqLiDUHbc0e-rrg8hv#%?ZbCiO&40;#{Y#?N6%uo4SK86e;S-| zawj|Ms5au1xii0{DSltf!yi9u3uzuo<14V=6yF28RLy+aYV70W%u};NU;F7PM>#+xa*IW1SYc{{1mcP^cL6}2eFQDmjXM@Gp>+uktTKzmLXC<6fe2w41ThylL zC)@i9Tv6*+ysy>QDf=#X7mLl3H^{7?)Z@AC_e*Aan?7%H{0kKl~+pFwYt z{Z$zE@qRLIgZg9o-<0Az4d!q4RZrUr{myY*q_@>{^n;%-c0Ig(F!IF?sOgP6@(0*8 z@TSE(&bOU7)_LBhpI6mQ^j-)1cYM7^{c>JBq-MR`^Tj5}*-z(Yu}x}fdGs1zzBaE9 zry#R>ZeS_i7#LgO3u8O{&tr_z3m{>AEFpn03T z`BqEsvv`~?Vrg%qw zZWaUR>4m=^?qPaeYkv7WzfaAjbnk?Dtv+rSTLLdtO_Eh&y- z)kBsZKfxcr$8(&=JF(UKAZv=8A^013TnG0{co(SYj=NExduW#)68PWY<#oKBL)$d5 zqdeLo=Q_CYb2UhB?>O7*tV(>I*eI(Ton7@d!}>9yIWHqhB295rIw#2%JEl;(ZTLe#F`E5xsWd$rhPJdON0o4#EA z?=bIDn7`1|5^EN%Bh9aep7We{qVZn$XNCL>*aPH0M%P|AwQ-lX#eI1b{~G%m`_>%& znA@xPHsk*aZl(7e&%5>UDgLE=dR+Z2>R<4j!@s|HFQYjH$6z>5x^Hog?^!R>5WiPS zH2b6tyG@ux5me}<6ZDLK&@l{nl%d4xW+?{jR#OkTH ZPN~c(D9G=f-#aVY_(F|24D%Q3zW}D?l->XU literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN/int64_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN/int64_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1e6e36e541954093d164c3a309c98ecc80f10a6e GIT binary patch literal 6998 zcmbu^c|29?zX$LgA}KA^TY6Yi6Htqcns&lW(p`)S`GTdY4DYIi-7hBc|}*WeKz(HU@tiD4R?o>4FW zO&+yn!p`@7f5I%6#A8q~GTaRMjzwLEJkN(a;kuJYzQR)1h8xg+Soj>Ya*nTuI+{IO zpx2iQy55LGS|c*Id*ocmj#&H`w7Mqr4n7G#bpjqy`A+>>y?0F$vWWg)biMQi5B-rj zBCB)Zp$;z^zoJ!5G;(?8ZzVW8!Ndo$ufC)EG-<`(LjIxUF95eF8t6g3qM$q&_pwzG zigRBHhF^`&(>w&Z&tHX{7`nz9uGskW7j$~8OXDgOioJ~-vo32X?C?^11()-3dBYn* z=PyBN4;4PRtU4Cu!?4)6Bp%i&*i46~GNf+8r@Gn~;lf0r&2VR>zyw^`^@`TPoiC2= zA9*{3`knEylg9U8g$UKdS6OX0oL^D8kNUkWEkp?Ux}KybyxsbO2RcLyvtaZag;uye z&}$!58%SLOn{WL10nb(`{0(pQ9@q^lIAv+PNelY-ATKN*ItMwIowSARAM`5V!4t7} zq41fOQCNOhCjzFX8PR%`-23qo`HI%*!*Hk2!er<^ek=w4a*n2c288C*x;cjEyP$ny zb4n;=<)_g+Z}i|wM?UY8+yiqKPq3kUX%WriU#gvJk-z$ljzK@;+*J4@H=z%PS`OMn zz90RXFtWjL7Zi-t+Yc2!Up^1#{^V?d@#`;_!(E<53n25`#e0xvqkb85a<8HJf2l5f z26@Yk2^FZM@|mtvkn=P zsGf9U|IkC$+TTa}FVwe~?&lRf;fwa!;SXqhJx8r+K3|j#(fF+5&eJ}NIQNi_8(;09 zeUcD2NOctaQOX(bw;gzI4kcbIxI)$p5d)Y#n%fTFzLlnW9>1t(gBKJ=OyfRWq{pE9BT7uly^o!!GjEcF8HG4rX^J0o=N?Y9FY8leDkd;jlZ+mBLKOqV=fDRW{%T7$Y&QetVmu#hd zU3Ax;`oCbnycEZ)DjcXj_9e%RAh&5h9fbLPO*`QOo4z9G`(kYvZ1N7*g$de=&qC&o zYt3-!G#jdy-eBQGWS`g3%!sR|eV7=! zVkP2y&6hMEPs$2u|1LLcrv0vZ;Wo{$(eg5S4!C>NB;ou@Y3;4BJ7|y@M0oqKzHy@5=O{|Cas+H*ewyr@`*UjuX)@YCC z7pMEL-_y7q`PYEdpYUFyC+&~;qdE!5iKsC%7$mcN~0i`XxOV)Jms52VTiu zIga)(drwe($OhHZKpa4E@56gw7A5*A)s-2u` zkY}h$g+q?q)zlAb+ZV@>nckeVKB~vNqmZkAyrSn;-CqU4$Qf>YA7IC*Wg|@I+D7{@ zO{kmJg)iKiuGgiu~fbOo^Dk3=ZWC(7Kolo_vSAw72R4 z4A`Sb^A(@&{2f`m-J1F%H=KSEnJsJC5BZDt3P8is19ZK)8Y>5o16oom@b*YXrM{dygr>Qy6{FW56jd9#8f9420Y}e@lZTmH(;dYB^xZ+N)9h`gbuF+yOZB{8M2ZjR73S5` zeLXZqvyp=rpZo^nZ0T{y*r&xwb$xomG6(H*k8~e~&+Y8#=Y#fzUsI8fdONLwGZIZ! zK>pzvT6bQ351PN$Zq);7dNe1sh&I|3P^?nfdHXqs6Ugrr+OftL+b9Ap~ z!3PtT^6)j^MSnQ>b8!kZ`l_S=i$*vWLp9e`)DO=`F`dX=Mz1^IIX{6QxX&PTC2YJC zPM=q&vx`)ai&bWG!-af1u6fR?v`%}@ubuxO`RalQ=8Wn1+{*EZ-x>4=G0y;>`C=d zz}riCUVJmnbJm=0s_W!cy0rh_b(nSGefOhlQlWZJRvt8vkt%|Vv&86qp|L#ld9%9Y zdjQ%wGW9k?<1i<>PsCi;*~l#)!p31i^BV`~@}X}VY<$b6`j&mtO2-}h_{z~PRVKC{ z`Zqe$^>_}t2q2&4I79P#EASTWYq`(zKhS>nmg*2RyZD;+*L9Ee;`^?&kX61|UamIY2@B>0Wk!jspzHo%yT%YMUW z8QBjZhjJp-|H)|ke&kQ#g0xRWr7q7$wm-F+*3s{uUB8e6{bZ&?E>~e1_p{cr3S_qd zb9%q$`8Bi-`lb%LXg_f1WGj?neWZD{TFw?jt~1Y}{d>CYF$=k=y^-dn$8{_9Us`w@ zeV(3cE|tOY@3nOq@S##w1Y9(}jDFtvwy0hax$6&~>+nPaixN=530P%%FcKPkGkylICZBVG zm3e2@!IYOFv=3|Js?w30#u8t_6{k3>G5@gnS9>!=YN_WxB7D8e0na!iU8)-xj-!nvi8g}5l$eOuRMb zZGgzdm%`EB9sA)MJYug;>nDEqz#`;0(WHDBP$9Yl##H(X!sO$7gJD-j1~;rVOrW~k z7a@K5}VrzHB+w*M`H1bUk4& zlhG_4O^0wX?&LN*U*016|kG?Df!DhVH`IR5vF;_xVwY)YyaLa^!ec@ z*U7|jZf2wkG!Ip$`uT15mHO+Dy^8L~dC-IQ9cP(%HqKLMNuzx{$KIh4+4)jDt%vLM zuuNo)9M_GoV^#>&fksYQ7IIF+%Zu>BDKVPYo=3uakxM)G+=9B=`>6i>5B=;$zTJ7w z4eE$4@`n?VzV!a?)x~zm+`)@>z%vJ8qM^Mz*KRmFx{d1VUXTfm>#cl4Iod~4#hhRi ze3(V;=3}ERGK|ksHs?-3hx3#3bRDCVnGWF0_v7|F)1NeV!)UG>4&GRM8K84K+Wxl!vN?>56$xZ1ph7fyKB>%;O_ zZ9TBJe&jxsc&SD8!OoV>K_2@0{19B=`S23_+gqNVXV2}l{E_e1k7mOX`*~FN3DU)H zkjo|J2*NE-?CCrkryEhoPlJ4E9*zFymq5-6tf%=a$e0*K-eMc<3!UBNsBU<2b!Q=) z2^U~}ML3vCG$(&q@9*U9ve!|BgCWATh(nyq*)hP8A=)Uwlw?ZEGbBa0n2Wd>%&B$} z91x9joBSs~L;HXCHy@_tABC z62Cc36|dziye#M|i?yD`+Rrj@W36Se+*pQg)M{YBVr{^8hAfsToXr2_WHLt6W_+3a zZc;bA-vrm0d|BY+sh6=KojG}t$rpV))s9;mcy6_|bf3y@EF-$U6Ylr#YqC54{nF84 zh|Bmm`uPUB1gJRq_^3Gg`FZb`+2`Wt@8RtwvuxQ?wWVrINy*9aM@?feHckFT?LXnJ B7hC`U literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN/string_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN/string_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9beedb2be5d073be867dae49c40fbc5ae98271ec GIT binary patch literal 9764 zcmbuF%Z^=F5r&T)(wNASl|fs5q#k%;i$M}XP#B<0PDhDAfJiJMV1m^5i~D-q?QS=U zA~9kZaVqyEhexfpHr(=)nE6u_u1_)7Egb=I$QnQw|DsW z-s-Cd(`sdPW&P*hy!GREZtGfmdz0(kcW1NN*ZlkEU;h2w_y7CHSO1Ulci;bTa^ZPg zKbpu3U+~Z4dfOr^6S1%Bor$W5Zjn8X3-UTH$g8+e+|`ARggrk8Wm(s~X|Dofv$bKx&l(V5DFD9BIY3^)Q;+6m8F=z`oKmxyvYt=L(c01 zY`4S3umCKQyQ%aZ>H;X*V;`WzoW6$G2tplzcoq(UJzsz)%s#@(I}()hv@@3q+RUj*qC#=S{a$=QPM|yq^gl&_tG)Qn+(B*Q3t_q;)mP9op?(*LI2Vx}^47rDd z?O-O_nom|OdO)+Q(3St_xxxEpD48S z5KHtqsCBxb5Vw@wU2wAbk&fnqnP&rvK3(?U=iH1jwxl7F(ZY7I|_`;$(8jWy_ zT*?x7*)NDX&Mg)NiL{)211*jR_7xBNl^(1`%Qm}s+D-=alz!Odr3TM+J2(KdR2pXa z1S{MIr4AaMG~-<4{19>)14L!RNVOk`(L;R)A>t|C#MIb0&K&3|22OxNMV6u3QJ+mP zeZh~q0+7&{&*EI*s%Z8zACO&>I(F3T9A3K@cfnA3#(ZNt5(2-{a0);2fT6q-bR^O> z8rXBLIu);t-E0fqgjR7#DbfrCA-TMWOg>-3Kv3s zfJP($RC~R%gY{amsLEl_p`snO;kt4dOpjn22Ae`*KU$ty?WV%T)+*dps09uZM?- z2pzVucXabrS+JfZA*taMA!LoWbO&!CuvKXSv%IRK&*YI$b+$?hYTs9SafCMf!jlQae*lxEUk^;y6D8F;$qw$>PN3FA?;Kkts+3Ebf88F z0I0}ehY8r(Z1Ee!miP2T*_A_y$C|}SYeoiM{O%xJyAwa6gO5iF6$Cd02-#sEgnM}8 zq}-yW+CoSTeJ28A0naB8QGRgK1|n6#9ae2}fO5E~WMMERRCw8)V8>XoD<*_e!Cq9O zj}6*&hYszygVtEG;f{rNFj&q4L*{s6QPQNyYNxMb^aqq8Lcll8q1-^Hi=7q3?I8JT zXA44^OUQH(nby*SYh)%uf{O7+V0eN^U0~c<%O&#R1VO8zIv0^Gj2B3#M_^aywALA1 z8qBk(LkW{H-fytO#X*%UxecK`S~yb&R}BK#kw;{mEdX5@y?5)LkV;iAqGE--?d-Cs z`yy7g1H8|uE@6;~NRr%U+uO)HWgu1s&`4W=YfO6IfpMoDAG71RLNn=}4Q&OKTY%#V zYwB%qGE?wG!pn@I<#XgpZ#rX?5y?jh0;=u>L(PjL<{KbXowrLh(qK(!iD1R5T1C zUsm*nuv+RFu-%xHwti`Nf^kAMK)P#zcJE~)oxpAs3@SR1?Sv*cwIf zkMX1t_gNmdu54w@yWAR@THt^RjYw6TMb(K#O(^nGhR_IM)JNr(UMhyi@H3m!7ruvU zP~jF5*%;FoW-3+!bfpL|ONegRLf6A~bOb-<8R2Y1FgzT1tAMal-j1&L$cQ(ovWS{; zi<*ma3b&58$~Rnf2QFm3AX5!*EDJktg^(F2Ayw<>&909q{EWtfin|r&lspi+S$Ebn zc6npN`aSfyY#Z8-0Ek!P8AELn zEx~^ES55F4;2G8Q)q6{jB}bm31_Y4~qw5mv?V;;rLA$&dRv7^FZ9%W{GcWLK9>6T2 z@n@(bnnj(NzCYo$CkA6qt^TkUM*@Z-xrC(-N6Qt2 zwozy|95!9F_>)KM02et%!qVH@gLN#)cM$Cp@~yeMo<+!IgPlo&%z*??C9e>=O~~vA zw&fCDMx6n7?w}**@wbS+-YO>oP6n{q96cKoau%k&#$0#}6(>$h6i68pS*k+@1dk!K zJF23E>5P#*pZ72HtS51-wCak&hJ)Aw!bJkYsfgbsks|y{1$5m{$1G|r$Lg_i_D=PZ zGXPL=NY@CpL1ooO55Wot2Xc$p6&SokBF4xK7;dMRFNw_u6|{!})eHi*(~8MbZq)&^ z(xo>rHL*vi`y4~vP)Vo0bQtXc^S2I0F_7N2Sv_y8+~!1z_UL zfSDeT*fkJiPaqVdJ~)7@Y8>!#7E!FapRfp$~3=vcmCeTv$d7y+XuTRo2z$ctM?z@UHie#=H=$&*>Bb-oQJO- z%-84ZPiE_@_vepSX7kuxW#i$$zP-c0C-*-2um8_!zCQg=sM&=4$-}??dA>4#^37ZS zxDPJ6uO2+P2QizkuialfefqmU{31MA`}>`raQ$q-^`$-+i(fAm*Nct)#pjE~{^HYp zaW^&=i{C)>*?`iGtY4sVb-}5n1)Av65zXWHxzC<{#&pwyrhWH|bkLitPfBNIo z1Ft6)?a$U8oo_BLu68fq-#$Noe{;D!yMFXyclrGA?DWw`AARuQ2OrMY$>7eN+3bty H|A+ktC{yi+ literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/binary_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/binary_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..42fceed1261ec83aeafe4dfc2161f7ee3f134a3a GIT binary patch literal 11772 zcmbt)WmHyO)GbIO(jX1e4bmV;cQ+_1-Q6H9ARtOew={^9gmi~AND9&o5-Ny@`mvsO zd}G{S_s6}@0M6NauQk`4bMJGW6KF~6+$BLJDb7WPUp10W91Pzgbf8>!X>~84UvFri1otGk;#}|1-Fnk{{%lSxODDn? z0;)@NFs?V9y%d@eJ2+_9yNSb^{_NP-JLsd0-ra6^=Q_2h*RAj4iccZI^=2R>sDx9z zh>tLZ9*MgyH?KE@Grj&@U$~2V=whK{eWb?htyusJ7F_!I8geovN_({zK0$Ex=jvgF z%P=f6BYFUTdRw^&^Lm?J-oX4h>u3uA)}5uZ1@1tvOvD%kmeT4I*xzlo5c`ds)dPif z(&qu|kh0U2Qx=^!sz3*NqYDsbaM|V3qE!{eE4xt$*5rS`bv>dNl$Pm?`^>~&DI+}* z+;jbr;=J;n``>-YyKs_&psG78L*g~v5~6u3c-jyYt<4lfSkQc54YYjSn^3v}9`;7e zt}T?N^`I+CTew*dxfSfS_ix$yRZ`&g+n!Ma&;$)-7so<`Zj2V4`i4LrliqPw_{qnI zkaybO?Z~aG1GqhMAw4gk+LLewr@d-BQD&gzIxP+Y(Y`s-GbfjNp_Umyuj=eL%=S^C zL2NFU$B|14XZ*R19@w}U;XnXI&gily0{3&v0D06e`sg=M87)5NJ_}0pdKH+Nst6rd zQI&LUtAyRkC6#-ffLLSpE%q$rjY-wf2v7A7N$ARA4mKpmSHO`Y=j}!p;bEx&&?H|q3N$i4V1#VbO-8*WLorGH4>*mo18wQO<4-wu9 zZD}=5V5vdWvE;wEBmPID6fK-FTv&(xPDt^oMPab3gOu_TPU!s;S6%x!VQ1ttR z`Q}sy2bGw@Lrj2;$ZCp@n_UhgmNaJA+XPo$j=GDwzleu`!j;SZ&tsJ+ExNGhRQ0y&^%ZX|Jd#LAph~JxGNl2(DZ$%Iq4ge8_NFso973; zKJ4d=rn*(HSmZ)Zv<{i9I8_i~d6%+yc^VPq7^KEHt-(5=>}?kd=S#YI7Y>Cp*RoGV zX+XuGRP3+OZauI}){&A_K6PDD?)1-!xEYJ5qyM_Gwq zvCV1}97F;ClIX4#U_|Hg<8JDPF$P{hkw?P!qM0dq3j%Of7%a*4f%UR>M^TU=gwB$L z(s*Mpay@AS^o}j8lHjkYu#~0FmqCh1gI!(Y8YW4sQ56ADA1;Z1Wd|-4<7m-FvAkaR z4AxDxC|s=b5X8X%b5FC;io{N=BcPmYMnIO@MT_w*2y8F%Srquvoe7|{ottrrH%~I>Tk*V+0rl%6ZJhGyM@cS1S|w8+uVBHx|LCwRFdO_nJ5K~$h^lu+ z;#dcp&BiflabK(+j$V9tt_byWe=R|My^<8+2>lvhUU=B?O>khh8koDs81g2;6kOt? zB{ptRb}@M~x__62u!U;&0#O`(Q2?>sIESFGI!(XLzuGijQihaG@<6#L=eR~ynmO;6 zhh%^60N}6GKVEylko)r}txAWW;m6e&O=20N~=lJ=^nuAZc-Z#wGbWkle9`PS&?`RFE2mT=qBrKNBAyOCZd-Y{W076n^ z4j|(gZI$@=XU4JdMOw$|6(B#~>DG~A>nlM3Bl_E?(77vtJsufmw}$zc~5boTriPHV;937|KwRawEE~t zIzJ`5oMVrG+Pxc5_f!pTtrECi?ka)9A9VsM<+wfm%|f-zwZoq-M`X&QdlwLlt(P=Y z5qUn*5h#hQ3gc{(f|n|IC*KIxf55nL9NGtm{mLUAHq{n25U_m~x*(PTL-_ZvCVUvC zG31QL87RRilS);~?3B1$Fp&y}@1F)RQgy%DsC)-$gd=vul7+AU>n*4`31{zsuyP5n zB@2@G@0|;jw!bAZ0fxts#)sFKw9mk8ZPeEWkej&oB_}u^$mzA_=sd2Ym2UQkhJdbn zbNC?eM~=N=NQ}203d|(;VjU3Z54F3E;y{=_#B#SB*dBJ@?B1hrt?%NE3%v)GsCA1d z$({sY0q&<$dkR*~H@OQ&(l7-jN~*?#SWhn2s)(5HNdsu@(*iL1EF&~WM7@g&_a-Pw zn0xsqSX33je)cg`6p)Mtfhvo}`6qc51+NL1x>Hsh{h@eoPm;~I(e35Yz87&?5JTZ% zVDry^RD<302aP%YBCH3e`nFkGC=z@5i9URgO8##eAa_;B{r+sEcn&bdM}me!S%M3A z=0sfGKr2YJ4mMg{S>TgA29--~9tim4NTgJZ^6iY!>T^YTEEH_{pufoPT<56IJ~i-t zfDIfvb|+6^SIdkdYMeR-7i({hEjNr}r?Ua1`3~Y+o8PO_2OZm5TsG}Rx)mUo^LZ9> z2^fz1;ur;Z34(VXUMMOnyy#AT*bkveo+d~&Wm<4cot_%v@w>;rW`8%S&~W-WUuy+r zFS+X^&%XaEk~Em#gR&%AlP9Ih(H|mV5Cuu$OU+;_J;qh)6iBN1?$1s^3%CiHzq9bn zxi6b(P{4b9W}bHoRm_l6_qYEFDX7WV!1yo%9OD>B)hDz}cno3G7iDxGH*Z4sqm1IH zvn17XSU3kog-xK+q4_Z&V9WYX_PCmv%#n1xh0rpx^ACDLmvOM4+dJFKCa}_spw8@v z+n?p{9HxWy5KJATM|UDEIH-Kq$-F%CLwLk7O&*yc(cUxZ4~wu?}i@R&IDywwXiV)7u^p zsD1Zr^i)Gbo&tPiILFX{N^RUDwr0@Lrg5MSOiawQ&-$0nb2_dNr;uCE9|(p{SdPMw z@FNplz1ANj^qY79tJ#D&9-iqHuK_i!?Rh!5*By9(YFoH7*bAMPPNoD3ok57ZueoQ4 zZzOGsaxvOGN;}(zjTU){aj0$JW;X}EmQ83Rq||?-{$(cGBct#WhV0N;!gA7b3c40&^|xr)JW1T z?41b2YePPRLv?ru{^EiqFddqUQWnY`<@mnMq=@h1It2%fH(ZB3qw07HPZA;cg?uut ztuErm=;()N-&8^bCqR*srURo$4Gs!&(=i4|p^ncV znfB#tA0KXeIJPlIG%|1(H_r);-;>X$LrsaodtRY*^8tPfK=MN8;g6c3!wLgwqJaFciXZ%(oJiWG9YMXr#IvKOuLsb^rn`w85+ z*%Bu=9}zZ@UJ8y&fz7w?2wXfT5OG0G&#uSOfuI}4HvUb&{;Gx!Ib>?9Z9))UO39xV zC0g9gSkxh9eh4b9bE9@VJniYqf-#J9k7JsM7iAeRS^I_PSD(8AOl&$ zW(AhrnBoEnFk$(B?)#pv*^Z)^l>V&%bz_uengDcGY|Punl#=|cT5J&V`)5OW=XM}J zaQP_5xrB$09l{&ts-MDe_l&E)+XeKTKjl@0uxa6*#~e9L24^tYx??Gsc!f#8D4R23 zR02{gKJgPeA{NQTTTj``J6&=@PrqehV#dpf!dXCKnZK2# zsQiB7#$7l#W7U7crfrEGG9ROfgeWw9CVm*CDGZq&uea*SGEseqfmUUohzXPrHtjqN zgYs`3&88b0Ag<5dbL7NeQ0off2&aBV!m4R!V-q7%!lwXyx#CF>adz3S#mrCwGxlsN ztv7qPXO3#HhcES(93+;BOx=V6q!S?;w1}e+H#4>6Du0Gz%2D;T6aZ8^hG22~i$Tq& z`Ux)Yaw!%GAd;#1Z`9@B*MYya^xEMsp(h)q_)|w(=5W{kXmSoI&K^-VTXAZ7?b5vkIg{CFW5OT*V;wC05WrOy4K7%tRi)XH*tJE zf*~R-UJy%v!v}Q%J5a&1G1|`KjSQ?t8kIwZLuq-|_FPcO9M2geACU6kD>zyd`fo

QQXzI4o2-?}v5UoUd=U zK6oU02!TA06Qd+e>H~h&GNXWkpORD_*Z^wPGz1B&#LUZ9T+$B;hMcwiL56h*8T*!Q zcROF`WZ=OSK*(VtziHWO&B1b0EdU;RyhrlK)nOlDuD&j`P9GX?flQxqq5UN3`ivsu zCW%X{cQ6Mr0~n^B{gPY61MY~$qSx`02tyR{Smwgy7!yKvOf;=BtBP`Nd9FfJ>EiE$ zxe;TkTHbOXv#2<`6wVgOR$xmZ$|a2m@XQD?P%e9)Rv!SmMSp1>WM8fI5o;S%&v@AWxduBACOP zn;FayG}gn{Y{~hiv^>gdiHvxbX=>hN-E>s@3HclAls}|u$31&N6?qF7$#L2Nqw2g3 ziv&!G4^#ne(0mo)Q2>Lk84#K_UayX;* zATUsd8q%+*qtZhqZoHEvs|8Bi9Qi`CQK7A8tZyZ04!A+$lSwrvU?OVAq5My{99$3y zOcm&3Z)xIX8$N*=UD(!U3keWpVe_7ggQN}?%@?=*zX4CFG0a4qmE58J)NBUJ3OPla zPB6fhWfivo0lKNpDN|=W(n#_rFqw+sc8i|9)#TwMV>p?4K=h<26gClDcfmiqxrB^T z#YJW4I&aH@>8$FvsWFP3-i-zV4VRc2{Gn-}WsR@t4*gcx-#wpZ`SU8obMr(Q#Mvk0 z!NdDhlQ$uVHI%Vpyy~`jvPI0f=sPM{{l{mo?ji10cG<_3t2=Tfzagj2f^4epU+VwJ zc%#()9Bo+l>uTRNBeRIkC0PsR)6^RcOC-DVO8Z_PMRUe_Qkywv>sD#L{ zrvjgHdoYDzE# z(%(-0X|X?ELz@|e?iUs}52bl|)i+@K2PPd-zO*$anrJ!$R z_R2UcJMaBMoQKd;TKk>u*d-#qj(OZCS$Qh<$UM{?z_UbVZw*Zf8|8M{Z$RoV<+T41 zD7s;3J2_N^JPiA2_Ln6GOa5Pg{owU&8d(2v?Eo|$Caz>4?SH5H!vae112II{%%Whx zApf*~tWHq(lnh{hw>7*=g>L5kd|oc5=YoZwfjP^aCmIJq_<5C1{9fZrK!>iyb!jB2 zNv{_RamncJ8!JU5a>Vr<3wRVT3oMLo3)?__VXtQ}JJdIS(yP7CvD!E2;n$2Lfi9KC zRZ)|UOY0Zt!IwhYb`TsIn*$?mlr0&rmUqVJh{|4A4w3?&6_;BN($voN91*v`$A0#L z;jx+E@Tq>_^9lY3bYCH#ZP*27=lFTbZ@U0jN&i1fZbZ;OPUr(NRL|ozl#0u$UOX|f zP8b8Geh1|q`DqPa{&*;V31eI&9T@&ZwdzojvF446 zmrWjxKMcIRbK39cdL{t_T*8rF^Q~c*MNm>K+8ma`zXVQ)&&@!pdZT?dHyYbY-ha2P z5@u+_GJXd<=SIUDIEIe*RptM^4tjz35>{{$3gImvX7so%Ld4{N0ngKs&y{$)sR2T_9fnDh(MB^utjK z(7q>)ctPU1QF=r1C&ZI448yQ_LQ`oC{ih!%lexfx2eEVzv!IsY8?2rRQjgs4yFP-M zSSpWasn!!Q(Crxd;JXHOmF=JQS-M2&sq~5?jNx02m(No2#N{Jv z>%0eEG55zXfWI_!u?eN3pfhzam`3Po7pn#F-~smrdrqH>@O_B&ur)-62;DXIbcLcv zY~eA$?v4)OjQF;1)yg(CwDTLL=jlGLl+P-lbXeRq6!1sLf}mD%5Hc zWM<2xLR@K7vOUVBEz2c$4-vldvx8b%**@Fg690oJk~>bkAxc0}V9MfY3F7*Zy?8KU ziYM?0eY_X(6u_Nv8%UfL255DF%oz#A)_^=|>La2SZK&T>MJGBnNJ)RO4aY^a6q{aD zi!<)}2RYkc$&z5xtb%aL0oiBU2=6!0ycfW`%GV;#NV8T7t8;jq&t|L4k_wn%*%c$s z`^>~rz^O(2b+myk3~Y;I)kBYz8dA6>E?7aiObX@f<6X^SU&c_ft(c4ZOVR+Jq!F>2 z-ahK(_#w#P84P6%c?Nreeq>>oGa!8U;~jS8MN0-~c^ zZ>k}dM%1RPDR>g`(y{%DaA5KI@{R5T#QU(}!j%{zdI6yK{y9p<+Y&48M{jwKAHXVp zulrjzW+VViMp0E#cL=|33Uf3T$z|?{P_Py=%NcoJ>@*mfPm}>%nOe9qD8uIz=)r zB?uQW2p0mMCy#8kWw2n5wwbquekP2B zMAobbu;I4riKLibz$K0@6!D^=^6mJorBvsJS2~Jspr1n79C08iE+THKUbcJMnMeVI z^=t=lAH(_G^)wN*9U5Xc&){OQW*{#3_=72>@E^a8zQ6egNQ&BGVI5yFXJ#YR$egl| zP4mO9u&V4))V3YW>O<0YNAVJlnbg{*g)ez{urw6qsjX9J_4oEg95DeU3a2TIewA4A zF;u>@;+v1_0}lUjoHnI>ud@n(pUjHLCm#ktbgj~gzd&PipE@ksCc`Tnc2z#Yq|bg+ zx5=Si=2su>1B<;Zd|8e|aeotV$*8)Ie88N-D4`zlH0szo?7WAykPjDw$xTPN5*S%W zTd7x<>QJM4FZYArH1B_H&w+Y0rcw)vAvqEJ?@;&t^;h?Vj|$+|$=zw0y;qYSexp;aL4yl_k$iy1S7 zc<3igGs9T~MUb(V{r+Balil zqb!bi<#>}ThcP5ppCw^xB+s_=@1Rw zs3taaRRwpV0~BPgTsd+P z&#=biWPefCdGOUCu8|$OmBEunQi2eA$sbM2Vh4yr!=iNwx{aTm+&cZ7ms}M#s|VCl zdWbj6m#4{D{nkOUA@(lS(fK{Z2;*qW3zlA)y0V2F4u;Tn@(1L-s1kHE0n^Jp9Vc>!LLX=%C-rT8rK-k*qDIg8T zhW5Jz5hogP1Ck=!CMMZDc!Ny%6*~5I_m<@GA=PNB=aFuh0iSky<3t6LBsuJtj;dLEOp@QsBS6;KUeD z2=Iq$NW*A=*nrxsNl>jVrBmMu%k?pxb3(v}ZLq2|QhmSfzKap;Ctt?#Q^l#qLx|)9 zBOJv>R5+v_SXAH(uW`xr!eS!besp>WCZuEtM$fC^v6?jmZ;I>xgJQE@7NET5hrfQg z+HpIJ6Z7h~Zsdx)myKrm@FD(1i z($;`sU1S~ytN7zmk`Mb3$OGG+ioN-P+ivC3ut`%A!3o%;UkT+B zJ*9PkRn+h+V9lSuXUeA|1YoldxySb#nPL!cc_Wc{fo~SMLB+nwcdJ!}7qIMJd-G{A z$b`7TeqIA%&Je*5*KP+}HxL1JY37>qN8S^XXFdi+q~ z3ked*62bKkC&>gJ2|fur2|ATfJPIrNe|HOHH~){vkOTzRnZfY8{Pei(=l}bk!2h4* zwx8I+NT_t3NT|f7oyeGsd7U?K1Xepy@a5>bP>JsGsO^7hj8;0R2c_S_QL%a7hjIptd~HE@l7wGn!s8w zk(S(@K4LvRvpy0d%QtHWj`mmWl%-#B9eVC^cm0^Upm)K(D?jU2q z2;Ydn#t8p}9K)!Z1Hp)pQKp^{imNMkY;PRQU0ZcfPJ%QH^O5$jt{%9B|YPAX7& zZB8oEMKDe&F(zA1DYF(8PN{I#Z%(Q5_A!1^6PU65q%N{m_(?qVyzTg8jct!LYdE}dvB zORl{nHcM`UOeIV1BZ50i9ux8`Up%MvZN7NTTa|qAUh>-c;{Pd)cWhLOpjLk}*>cv*cO3>BW&Pp&6I_qi(Dyi*iC?<31Y8Z~t?&>pq1=h82 zVguW?2r}!^wMZ)O-L>a*k*w=cj48J3(X7R#>oJ@SyX!A_`&l<)1!iqGUW#m&ZoCpd z-`$9lLTB5Imm{^?Oi*Gj+e}mw+S`1srNFk8q-S8am26~Pwv}Syy|YFu(zG*+RwI=az6PvvuXIzWO-&MAmQhVEfuYvp7{as!D3hv^0xs->By z+4UP3JaQ^>DrO`q5*%_tBy@c8{{~24gfyxa8Gf0kbCH=b|8J)>a?b02bVdSbr0etF z4J2{WVkdHRa^`rHPB?iZfF|&mF<}RZoQe!bT1Q9IQ0zZOWO>Lu@Zpz+k2d13w6vq1ZqYc6T zedOg4kur#W#O{CnP+G>p*ht6rf7{n7h;bw`I#)AyH!mwsE(=#zE;Dy`7auxrD|Zh& b7iT&?K5kxaUUDj`Yo(6J*VnSw|M&er9VDIY literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/double_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/double_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..537f13cbce46351c36bd0eb2e6752d7623607184 GIT binary patch literal 8017 zcmbt(c|4U-_pi*2h6tg|LzKvr@oSzrjx(h)WhP08OqHQYhC)(_455-rMInSFgpg#O zhY~`X@9BN-@4x%cy?1@~*~8jqnY7b09}vR)O_TYooQ4YtB# zf3-e)EfPuwWA8V<8PoOSdzi`>Xp61s;bY27dRWisG7-+$bnLsW5y?$}(vqCiHK=Ix*d3f53NuaIb}Lqt>AE0wah=HTlV9hkSHA%Q0{K>M>k z{HC{L?5-ezz1%cz<7aF1S`VjwI!}g2;jz<#1D42<<1F_kaU-?8P+X}=6(Nra%cOZN z@Nm^NKb+Z)*4C3>jP^+2>GY1)?ai9Vz8w6zw$TQ{x-q=H+WLq}95QzPs*K(5>U*;v zY9OiGWhl*?3a#xQOg==@yflbN)HF%mO5KgDnIl)&zUxDrw#ChS)--UFQCwGlG<#ANlJchvMT^%$@mVzg)eE{PZF!fm zqd^0qRe8oYFHzA$+E!LQMMi-~!E*I+T_n0BK5lg(fYa2aerrDk%IOl-Rtv_EX|F1n z3)jHv%3JoLLUlYj+U=oWtbxeL!;X(MtZ@FF5))Sd1yza$m+e*!KrOPqp*>H-mMn7Z zmryf=uNm)2+$9F4ZJVN>_HZL>HEOqwqYW;)WOZ7%*g;(H#H&0(BgC+1Khp9NK&JnS z;m%1K)D1dD3Iokilw#jDVNApOh4$dzF=Qy^{>;i`CgN`AOT)!=wutvwa8mzZ4(^z1 zN#(^FxaLmD53!Mg_r~A#e=3Y%qGTG zo|W)^MSKx?e5}2Vf>B3e$KboNe|3Xg^yM`*sA$(K{nY-Aj1xY2Ed@;^ z{FYuHeol@Lk4D@+>whI9V9m|d(7RN)t5|*x6cxp#JrlVH?h9h?4)3&HCOgzzV-}@K z+M&uLG9&k%EEtqcy~86ca3CRl`>|6RIAZK(ap^4?8-B8S?2s|SP2UjT-z(-A8#Ccd z)TPV$k?Cv4g_RN3@N49YoGxN2id59;_w2LtpndVA5z3voV^)XEkdUsVj zrUe-^G|fhr{VAr8muT_6C~62x?<=OI^!u8&#Z4-sUmd2x%3Jrb>_L%v)*$?hG2_`7 zRMV@85qrOL>wl`_o%Qwo6FgK*by>dSmNACc?Y^I~F&3Z(UMtnn;(=F)znkYz8a!Kl z(l1YtpxUcZ^g@P;*sTZoZmW@TC@v*3DSJ0O`j^^b^f?>I#&@85!N z%u!HsFie*R zo(;O{v1*8`G^yy;qafnE)}+u)b?i!eaiFK%3Lf$XWZqS4EFL(!e~p3_col!RXNsGn z?RwJlZ4Ok#Y#-dR_OLQMzuD>^7*Io{2Nx$tlK~Rl$MoBq2spUyY2T;6+Hgs0Zk&5V zM6cFyb#G>2yz3QZ*fe7dez(&#f-5w5l@0fdTM47GvqPYopaQQxX@AjJ4KS*x?dT&> z5pX#AAn7L=-9JnYE{5tO)p^MzCtd;TE_Xct_MQZr9TSg2Q?0>OT%T`VOv5JWGtB|} z2x!|a8CJqRt$QI;@(TaFHev?2xl2+B@Hn|`Rw|T=rIhQDr5|@7;&|dWZBI!AKV98x zP-uaQ%vc>^Elo_WNp|YcwnpCXD>S|X%HS(ZS3r&Z>C^-UZ{S8i3N^0vW0o}lVGxL ziMCzD3h}q9`rR{k;`GR-PhRhI;2Yv?6c$Rso0`{+yC&@rGNUULMc1FKH&`>4*QlUJ zfg-T-RUKRx-Y2jpNf%A@E?_k5uM(J)9OQ&rfO_tv8X- zxPHSa#tbqB{C7F#!^`*zr8nDi$yQxeTTGB*vhkg@SrzKMoN2= zuqE(*&M=b++@IIm@KtTfB6s*k}NLoC~`5fHe+?L^{ffbqTgC2dz-7(5!)wA@2RO%sI~ z>^dmgL|M82SqCv=Yex?I7$WKHwY2hWhG=SdlOq-+3%=3G3r|>#@Rg9*ZFicAvDr<{v?kjRhu1@7FC#I77i?UHrF%3A+4f@^hSB7QvYv z?vk(T&7jeqnVy?x16Lp3AgV$5J#a~H z=3fm{N88$t`l=ce+-Q>0ChCw;WLHnRE-H)`Zh<|2!)>rydZO<9(LHF|^{%DJ(-v=! zMhUW&nSxt@HK5*|0MCP*d4#HAT@Cv-CHV#-j2*}BTEGq@Nrw%CdPZoJ+PM5MO%czZ zStk8lq~eyz4!Av z+|!h4s<(pjo?m0AreZC}EiDl7G(d^7EG^q@X2bM;X@ejncSCjyAk}lwbs=F zGD^vt-L4RENvKV+>$wRo+;@9!HM$8Y zAp6SScMpOzA?EDIn0rYJ7EhE!rsOn{a=$PuX;l~3Kb>q@Tw{ULPhM+Uh^Qe($KKIo z9~~db%5CQ@w1tpORU`WY0bDuZrQj=SfwfsVjMJ|T5rFC5?o$Z)x6DcvegKi?|;ecS^hx|OeP=tQUK!QeMvo_0483Iwz5 zTof`Sqz}%Ad^N;x&zioyzg+R{2>v_14qF-C58IVvS`(9G;pOJpSw-hnpiz z=+|j>vRnQN9*_{QgOmrkwrTQ2kH_j%zF66I}~{Nj4jrjU91Sr z(nK7=E^1tRUUz?w`m?K#s5l|vMmoVjguH3U=o`Adr|J)V!x9k#gW=Q(1N!;#wCsyM zZwj9`8&&H&?2sh5e~5Hl4DP%Masx7|Sa-L%doWW64SosK>CIM%O;A~VUT6X~riq`; z7tE1plb-o2kBGE!rB5Q~0rtP=&lz2!-(yAcco3f^bk*70Qbc7iRK4IUqrV3v|4Wm5 zrlk?!@n9Xt8*?0-8(Y&It%SL=noa>ZD$sG)n5$GVh5McNoHSt~!uy}`*=w4^>Xx2W zouw8mJK9>lY?T22??=m?UVD(C_e!92K@mL{GP>k)R&<@tn)iJ!HOH}ht|tzDnvnCk zT==KR0vC-o7la(4`<(|DlTYi(qFdAcozxRDmfoIY@@%K$;)S?dETxv9EL2XJgqT1| z)LDjs)ff`xH`~6)^Pu0UJ-s!K3cmBI3|~e}KsH`7I)9Ob>46WqOt*wVA(}s!46(r` z!L+BzM|mM{W0Eq{y%W1rIK6he=-~9lhGUyQ(9m1-l~7h94gdKK68=JTTwRgSP+4aO zN%e>7SCUonK4Yl(vxqrduN`hqjWfn#`x8#CQ)Jk4UQydqO@rPI!PV#IbU=1+vXk0q zgSCf_*j_qG_q%u3HeN2FVmhzA-Qkro_y?@ZbhfI1U;OWLq8A-+I=P2rIV)mw=DVm| z)@_(~YjdC6-mWXvrx4dzVuhpt&mk=lI?nuZHNQzx4Q2AD{bKIgLSyv7hpUf_p;YR7k%FQdg;qCgBL@FmDc&eu0Ly#p-ng8DB*QbRS zMNbW<24qmR#^sJhq%ByQ%d16_>3Di|P-8u-8Q8rm>&B}90a3#%Ji zfi_}H&I^wKHaW#Fo~GKu z&#+O#sY46)?oL-aC?rTcJ&~++$^z!&@#pF*jUj$dIydK-E*)2_E7+w+gX72B@BCE^ zv1?@VZ|5^JRLq`?U3o;qY1y0-CAxk5UEZCOTxf_3Yr+HC~QG2+{=45pjy(~Q9usJCC!F+%sNf7CI3Xr{-h?!PQsW-^mO60>@h zIEe;6YsU{vb8290sWV);tAI_zy!w^rE%39k*}MI^1xB_H^9Quj&|SBcP~AZRkL8`9 zv)pu?KvlFcjFiR$os?eXSySjn6prf8*dn8#pgr-Z8H$q?J?>F8alK(*YtktiPCR|w zf7($U`wn|}C?;88&5!xKiJb=U<(jDu^D@Jf@@KIUx*sL@;pwybt7IHcEch{ZQxg-- zj%0%uhKQ37PTswrirLWFwsnWhaByR-UQ9FzhM&IOVEd?z8$*s~T4+`n_^OcleU*j^ z@2PONFjHtQ8h3YoHiyULJJB>fJ>;b`|2dy-0qt}9vnS;&@ZrD_mg+6QtagoNykd#UOi!-xLw1lVn5qd2;=(!Sj9H;fBBH7^4g_V}!ZCQ~$5tx? zsL$+ub?piP5jUn^H^z`4cYm+HRxAx+lQ)}h+}MbBN$k0H!e(Go+^lJH+6s5f34eI1 z$e23kcxaZ%7*nPGvGSJYa7^Hzxz}S1HoME;ayW>P`gXhgDjn~pO$;2g7~F+gf3cPm zp(ZF|4%C>`pds~Fzp9O`EiOaEaY=_jx2wi_mKJ1qhd*Ec%bJXoo2qZNF&JQ|rt4f> zkuHvYmCSDXW{bT2`Jd!|5n;DdRW4Sx0oyn#yG$Jf!Lja)Pai#=qCRP)wa7w6iPaR7 z^&KKaslsJl4~_8fAma>rg-Q6fl{L1K?gxtxayX*DB^4XTt!Gwwy~!=qmpM9FCX6A`yGy9Yb-isU(EO_6o5&g^)Z8t#yFOs=O} z!p`mL^DC8>m{D#poQl=P12&pYPXGr%HTa{;WiK1}d|S?Hr0;*#x$S`| zG6V>^EFV6%jvhCC#TIbkhb6>Cy>zzP(D7Sl<*rUvF|d2c=MUW{g6{%jolBA;Qi}^G zQv7y6kD@{Iq2o)j!I4eV=go1IWh;kmHw6!GF;Q9R{=?XkujG-3dlA#Nel?%Z6stn< zFXCiLu(aiW|D%wI{Q3rogfD=N_AiGRZW&@)(T>T(~Mrg8;`3`-zie41Vr+ z$c~~RB%Qe?ZWj$VKOA&BKtI1t;Yr1_X8ItRjMecNt>`XP){BdDnxnX4<41kE{=BS0 zW!c(6!1`R#(+>2w?`U`CsgOhxT6UUg4VEc;m?V6LOI-(w>SFq-OLACW zuRYFPK*hsv0hP~>=wPww`@ZySQ(RbEoFBA{0z2~4_P?TXu+41m%PBU-u2bJKg)iL+DDa67Z#6~;} z(b)92zv49&h1u7V8{_oh6K(R)O3oMqqe53MzNBKSRBigO3mLo&ynS5sB+uK%!OO|Z z%F8Om&;CC_LVouDaig~+G0FTN*j!u;3=C2Kegk1&D!=`|XA%EDlPkZii(+6Fsb*m2 zajIU!E|FEuxIuZSnu*g;q=uP?6cRIb?s6LkUa+B85&`S<5i`H=(@;KLVnM!2Waa$@6*KH;kiq`XxNY3@V zG}r9-n5wL>u^BQ=A(FJae-fw)&P1HwgOIiM|pF>~wx595R;uYFqfy@T=`7 znZ+7KVt8B{MbApyY7~oC9%&R$G8B6)kwS8LEt&3m>$TMNz>(L|SutWwGPx-(O|k{K zx0-g`Egfl+E2$HEBVXF-@@8k**sV9aDwamxC{#0xH}9_FIn=D!D3Q~w)T}((yr<1j zyhXW_bf`t8$2F%#wJ&hAMQtcXyj6WHiIytk zopuBx$u1I!?AAr5x#e|H4g`&NQJrEXyJ@beZr%Gl^YXgweIJi^@At2le19OY%kBNa zknz0t4&lq=?;TIFNcA|y@VfUnpOwt-afw%%=sA>RB=x~Hh3x*pE!{2ugZuTMi4PuG zu~NOBxvB2GUIlsiz20{pPxShf)JuKzE$wpuc(`ml|D#{U^2Em@)hyC|{&l<_eMcK5 zZ}$Z>t4#JCYcrDW59}m+^au60-R=+W3!3adJ`^iG5HgnPF%UYHcYEN(r^k~6Ve|FU zgW*eE9)l4UtK+u^BY!PV4n{Gs$_$-k-r_lQie0K;==28FsiA02W0~O?9*XC1EWdlf zaGX%^)bJURIGK^N5^0_z=VbB=M$XGWnHssUyFq3&Ub)+I^rHGi!Dxck%G78gSY^kO z47YfVC7VhWj$N`;ogTYPFqR!pAyK@>Q)%vn<7o$ir^l~2#mP>jyQX0z_#?59U<#&VxaJ1IV&ANRQ5`TV3Wc=q$tp*Xp@vavLu zx$>#}J9E!IJ(-<*KHnhsrDCbu=S$`4#GNlyzgA|yykKCHpRZ=-^PR6@m%ck+yFu;K z{7X&~`Gq{A>MTYDh#Zg0{`vc{nxW4<~Th`6}d{W|6i&&0eSjoV}^fAG0;^cBZHPK ztAJDzQ}qVA;`{g1HV!FvddVOl#J@q0NVK79{P*Sf@2*k!M|(W<=w$mZqmzZ6-f=yB z4?SZ&Jr6yD|8(i;>+9)R{p&Q)(=+>5(%b)2(pyIVSmb{hy+$uR{>?Z2_Z&Le{u}&X zGBW(nNd5ot|5N=h|KHO3KKp1yPkP1Vf6u3v{~`L4{~>J#ei47?fTKZffjbZR`|orP m2sm zoC={qB}wCH79q*Sz3=JX_xJg{|Gl5j`>xO0YprLkXRW=Sy`R1I`aFijXcL{A?!TFX zd+9X-blNf+?Vgs3g-MOkaFrWXgY!TOVFLPYbIht z105+vr*JW%hx(qah)=s?5ZNS0uJu(SP&5}((q))2QNdn6Euz0cmGpgO;ksiGiEGS3 z(9$?^$u9@u>nhOnh8uVOEChF`0n#Q)Nm8IQtQAgUv|p`>d134k@0)ebf%3?|afBuZa#`3jzHIoX<##q5 zmZ;aukeY`)IB~cDhm${2%&ia5Z$yXFtJma0LKkuM`2!j0MzB0D4QshHEbx+|R(5$2 zzhE<(xo zRD9})B!Bwe!S#qh{8oGv>55U`OIB=#xO> zKow?h1mp3K3=$nFh1{SC5|^+5z9NgsUFU4gH+y`sr-hG*8)OrOv|Bj(LXmQG(<0Nm zi{P&AfTwZ(=)0^|1Z00s!r=`TB5@C2#3#QT|+z%KN?-t3P&+82^MYhY{ef>3iu zNds2^Tz+K3=e7b&GY^nL9V6^pRf5Tbp(MQPHCdPuf=Js*%{@Vx`2FnxblGkvPq;F$ zJINM-JKqq|*-olG{|M|DQn3G_gT_x;IL03Z5$P-&b7 z<85@GjE4AE7fh+RV&@KD3@ivnW~nv~?Y9AYpbR3ZIanDe4=chz-iAadT{Xb@76rIO zEI}u4Fe$rN4WpzOn0j*)o-iTO05^DVPQ+TZk-v-ZdT(?;WbRSMZ+o9 z4<8KDQCfQeE83!P#XSr`?DGg&#R6AVBhe@*fkgc--1%BeqHl&FF{B>4;#pL~_h1Yg z`%uQlm#NdME~8xF9!7f`(e4pN5{!<;gAW-c zqMd_w)oq+V^qMShWg*>gHI`H?Bm2eWVH&}UFJTH$4~oHl_h?+iuO z1e+44NchuXGPZb%m@u!Pcd(L_XF6d)%Ukl>6Sn3N;WAi-*JDn69j0VA5`p^+Ol32X z%Frb?iMNT5UNsi}^#E_RZXkPOGpSxGNIp9`!RQPNJ#9t!*;x$TPgR)sqJjK}B{*=z z1KOSS$jgsK$F>Ia&NNd;P4AK3@G2BW`pfsBE{r1ULo3L~Etko_ zO$K7Dib)dPiQ3^_jmn86+~1LfmjVK)5W7cV{2Z3H)k0CQko4>`z(mA7yd86a?)L`N ztO_Sh8(YXmKM7bf^l{Eh3q&OxHd>8%w9S*`uFZh*?_1$=`493`$qT$OaoF(G6m;oh zl)(BEXy>!XdENqWSuP_2-_-E<3=?ecc0xbeO$@dgkkRTpIJ_W|GG9{-1J^t(C^=4T z9Zg41%0gHRTtL%7bFlei(71jv>0=I%P>H8Re>olcE4egdpEna`Q4}tSCsT!obl}f# zf-L761j`l^H@S`Eb#@4BQa)2*dm7NRBM;TxEb4h|F@(2MIPap5$XRFPf5?R3C2QPT zy%G`WUhfZ6%Gbnn_ZjD3lX@;s4h`SXE^}bS??kgZ7b( zfkW@RvOxOo{jf?rWXWtV|odwe~y18!#bsG1{ZQNZ(o>OU4u73tr` z^+gTHxLb#Q)k}~%z8UM^$D#g001ikSlI1zqFyh95>fu$?Ct4fjxi1sNZVOO*Uj#xP zyU-(&h^7Tk$-UeV_;$AKklc5kJQ(RFu8!N#;M|D0vuhC$C{6exRS@Pin$o>hbWcic#si5|6x zL__Lc6>>*QA;Ol2po1Syjn*LVm(^(HPJqp%GIE<(sK4@!JijlEf!4cl8;nB9Y&m(e zHV%=y(+C~9)KF?64w?jl;jkR(3`@xL6<{iPJNWk;LO@0>1e?y2G^bb+c&`wD_6U=@ z{YfzH_8=PGsu-Qg#cwjkaN)eCuRh)oFi*wk&;8`O$s}2Dv zk2<%2okZcY?vEYi#wWqpt4G$qJB5$TP#jpT1dk)faKzU`&vweZ+ zd&22MH@zZ!;?Cf`kczW=*Q2SIg>X?dY%@MdHav{gJl{S_S*{4d!M~YkRhPic={l_B zaz(a96#R_G$+Ex0aMIx#v^d{*@}-B^A5{wPE54Ndnewpc}yMt_dXK z1xR^7G5i;Dldpcpl*DKz0#ARZKC&d>-$}zUGh?E6{t-SFrI93dC@y7$fnOpGbQ--! zn1f367D2iYotMtb6yu)%A&PO&Zzs{Hg$n;;C@f5)(fsDG-g6%weEq)@-~W&EgRlI4 zG;YZn8n=i=%>q7|v>Gk}mHwKALRylw+#<#nwLD_hX|=pe_x{>Nl0lMnd@`{Xb-yTN zrq%H)mGsvwR%S^)5>RQkc(h~-JMGcZ?KAz41S#${EFmosOO~*nOgd}XUX=mXazm{( z^&-Z`mi2Tq>-2h2bN7Mz6&68j8pNz)EgKm2nduEHol6E9#9diy8kz3xmW`{PiO7wCTyjS-0(0$jWT{O{tXKwnw>M=H*_M4(pfuwhU*!)ZhMv{nCKqk!?5BqT96Z z*OSd^H`=Q@+!@D)~lmFUxr^D^XHN4vIwHvc3FnVW_KNrP#x*Aiqe*QZ5?Z3``RYK zCi}H*vd744yR=}rZu`tQ+ir*4tn6;bg3^(0r{a3KH_jy;wr@^U3}?S_sroYV=41_z ze2*)OZr9`XST?8U)HBu5p3^Pb@^9VSP3+!!blK#*_3ZH&eS4-qSibixJI=1xYb-0L z*ZW=RXz#h{dig$|nGU-?-#v4~Ieq7Ud>QTYqwy;A`*Vxh_XqIF<@N^(sEzes5YkZ? z2of>19|#t+%^kSN^c)+wBzaL`FhnNaelS!aJ9jWlscdX8T)9DEC_<&ve(3U+k=&uk z?XzP;S14XZc9fQ=1N*9;+%0zWUbS&{jG>O=aICSZ!*HCL?XBT>bIsutMrj2a?tr>y_j9W#|Gw^cOM(++zcgEni%hCD|?$0mUJ!}ollla zjN97M-I6BSBFJDem~u2GosXeP;}v53YoO0tMA=##l*`*J$M?UKG=>uAScWzqn#L)C zT(s?qybQUhg*5`4LN_0!MNp29!)Xkrm;f;{GB~L5Z!%&93pUQfuXcn3`{xM<_7HN0 zz>eq;Vn=rUVC(lTF+MGJ= caMDF`^X3i88>4cQ7FlBszW$M2O}X_vUVy<3tB{xB}&mE31dm4LS(eiBGgGGvdoAv zilS6Zma&9tLg+DB#(ph>+}GjW=ehsg=ehShefxfYpYP}MUOt~+<>9JyA0(AX8wc>^ zB7Ltb6&;1}(Go&mD#V1&LM$*8;+lpKP921(CKfFhLW6tru0pJ9BgAQPI?IGmNrbpS zj!iou>P>|h(MyOH?)P^W;$&MPe(wopZy`zqOIHXhc!G3<7|Q+oRzfsUXVqSa4Q4`k zv0lRC-AxEmc0?2F0F)Urbn8@7JySp-!r%yjfPzE(=?eQT5qZIz&c87G&3urf4amz z<2MW(Ke)rVD<*#$I-97u3+_66ShEgrpT%<)*!E~YPF*!EXo1fhE+gI(2WourSxNi` zo;SlYm;2SsrPzR3k+Ti_k9aBOJ{b+={_GplU_am_Z+--yR9d%L6B#~GY+KS;{PCumZ{KedMgKvc{ z9df!5zXx_G7^lf=gm)A^wCIx=wO^Q-0X%=uquprj4@LwvKCJ%un@zt?vKrxDN=_+Q zS!lZn_80s`QEN_uD-}Iu)U1Vb9WyszzA7}t!L6WA4b<$X z{w+20z#T-r9{NY3c>{IB$vucxTl(}D=LCFTz)Lu}ThY{m-4jeJW;_P1Yw(aluWgvs zQTn}~T77h!q<$nkPrxKaEa1Irg|A?0?}E1!zM-sq)?hUcepZ8*3`QatW5}~n|6I}k zCiIPl*HG;Xus78IfKMAWFg44_8^G?xJN*=o58*GR&ViT@_|Eih2YM5zDWumY@a{m( zEN1W%I;Wz2KD?i)^~3K6bhkl|DSRK;i^!S7{X2NI&}Kw@iYYxLFAMJ;XiWqA2)T0d zRPdj|Yau-Rh8A}CQ?WY|@5fwB;Lt_SZ0cX)%?uqnVEa&?&3h2eoCAq(Am0Y;H_UDc zxli%5f^!b57H^;M=0{)7qc06j_u+d^O;5C3h5rRB4Lzygmf$T6?|R_OBR7z{Zt7X` zj%-0sJiN*1bVgSdc$dJx1D-p13FwJnp9e-fxixrsfCg*&oxlu&(SHnIed%ctYb8C3 z?Ipw>@^A9|7)&RylhE;uJ4fP0_<90QA|4KrlMl{*dRa!D1pOwQ^Wb?+pA+%W!d?rG z1$tx18OKaB@zas__Z0IB!qW`yf1_^)*0&r!RU{sz)NLEU@2mcsWDtaN;+ z(2|G0^<3NF)S0(07_xDK&}o<2*fh!#w^VXBT?<(*rYl>c+gn$@v0p1vRux z48vjlRFplt_IGx-H3X;%+L~h2-7C z_aOS@24**6lR3{|&x5y-{(OYvE%ghTi4|C}c!=SC6CQu1--+NE;PoTB2l^bSD?{5w z_DJp@)4x#m_vlWg{y18z$xC4A@Ek*rjL|s<@8xi0ke9_;iLPY$JEQ9=c%$f-lvp!Z zy4-uyWK;Z11W$&Z@9FUvbPRxZ6r8SjJIY)v>FrwbhoSc-Sd-8nOl${O+SF*WLYYA- zeOpA2o~eI^$@>8JeR9IlRm<~o;tKj^z-&CxP{(~Fn2DVA;4@~oX9jl6x0!en@6-cg zH|VDh^-sWZXB|be3_thalyg_fK904RJ12aU(8o0JUV+n{+}?0D;pq-Kp%zMbuhXx| z^l!Zx?>Dh`^rDQM$#@uthwH=}iH8x3M}s%Z9BsYu63KFb=P|nqp7r!!LO)W$+eFR* z`lH9nhtC6Dws5tkuhr!5LEAvQI1qmVegv^EA6EQQf;ZW ziLpfek9A}9zfgr?qE?^(cN}QEwX0wzrj-Rv0*R_O0kJu~&9QM;GgR6`foiOe;H2o$6aD_43QE zH1vtMS7|gi*0jplZ@X`mEHF2_s!Q;(dsSV-DooFrMARswyRB$U+Sq;di>n)ZM2U`Z z@)!g6IMa>tcb(~!FzWRc zkA%_h8n4@q`TObFDPvncX}Q{JwsUY*YIlouwQJkI!qr}Xw8YJ!<0J>SafY*E-Nttb zsc@UnJyPO6(d_+{g>G~Ag zR^~OX4pRRbw~6L?HSR7)u5b64qLijg^>Fh{@tp3Pk}_@Pk{c=0=d6>add*MrO!W@P zPDz~+bmT_r%*AEWG@qqcJ=12bxR;VPd-cm3X>+194AbYvbeNXzyU{c?eO~;ao9Xi- zM{DosnWWLP&TqTtf;#_n|Gjk!b}f5Y7ob|Ns1M9dSx~=lU*6vOMF)R*SReFrrDCVi z(W}#Th9unHFf+9Hud11gOFwJ-gq625(q2+|WPZbv^XB;t;TMPe*|7A=xZbxTYCHpO zFRS&>zrDP4$*tWh?yfV+{Qg1G^vspce`cp;u6lChR_5yGWky+RUSFM_75VO7TGrZ+ zFK=Z5T79H^>&9Fx}W~ z)qB%8)6K)pHkoInpV`!B@4ow+EspQo6F;EbIA_b?YhF1&4!fV8vvuUF+MI;3nzGzP z`;OkZNfXR=p7f6BDkJG!NMFDf!mo3-kLdAjdr8^sR)jFpOv zg$0iqGJ+LX_U>F?{RdLiW3J8N}APu$Ye9*?>voDhdkBa zII#z)>nEyl__(Q)zkSBpf6WAh&)4I(xSHB7$lJ?3@Y{2M(%~!qd^O*{KI6VA`T8_^ zv@n*3`Gzl9<{#02eppz4-|+CoE9A@l!<;fAZ{Kf`pJ@=R#{jXw%*X9bo*9gVJ? z&~YS(t~X&3UV+SE=OX~iyp`_5?HPQX@SWGSEAaf1G3wWYH?Sjna zlOG4qHaXJx<+m63A!j%LRDg4W44q)v<4(FyrB>iG$ECTeK26*kiRR)Z9|_16sXZL*q)}yZ9X0e?#O_*yO1C398RxKMFJX zqVGX*`z2g(S^h;&xNVDaAS{;KH3MFYSojoHtaNmCADpFeoneO^?PslX_O1&$yzZ7_`I%uCbaPW#)N+Da&>T% zo1;5a9loIqt24iTg?IDh{)NxFPaJ|d(7c>t{zFWZkDHYtCWX^HZ?R{;i5zVm(gkCc{**!4^oKN$_f?wLBY$?D`~{s2;%>mNaY4P%!(`MP za(x}tfIj7V2O+P|+T&1e=s`3T_&&WB25x$g4G%gzTnt6OB$mLLTXeIam0bbN|3@|c zTgW>z|15zDONQt=$+7RWkvH#t@EuOeTTSD4_co>ak+#pVM|*tgBVIVrKjSQv=j)?- z(z*E0T4b%`y|n*4j-}H59R24{ev+4|X!hMg`s-fLxV;RVZ-{J^-MR@4>F!P2d^(#s97wzNM=C$;kV(b~t zz=7`o&Nl}>EHk0=I)sO59^>r1PoiDbUW%Se zBUcvGBOjLyq3f?$VL<(jC~XKuyQ6^))q6R|23j`Nb}L5AV~E! z=Ez6y(~lT7#_=du=lO6%>Vr3YG?7C6Md}&+6!rzJ~Mm-_9mq)ho|)- zv!Oe$O$r>9GordQjbEXP?A0<$1dhiF(|Nb|OvoW0xO0>0(OQFB8<{U{{0)vx!2l}EkSz#wgMhmk9j&85 zjW}I@$=c;FaD244HVZn~E0Re!kJ58spgoBl?E)4Ts$iJ+qGfPAc9_;>EAQ1#Uap3` zIBQ%KZhjJ?2t9-kdcffYIZWu<+C%+Z?l@?OJZOKJ*5!7>1l?z7{`I$Lk5FHC0ZN|a zpmn&~l)e?&K5tYA#@4K%I=VBFN#mcUYC-dxmRe?j<3>rdC*Zj~&Gyi2aJe|#w=JLU zU-tIEQe=+vm#N($g{=vB<8xV3t=y|lg~IbyHz zzMRDlDo{c6A&oQZ{V3Iil!ebY+6&`forZ2IhSYx!{~fgdG`g#4U6W1kQav9RT*!s< zQW8q(zV;eIG03Nsu6}_5X7spZxNA+Px(@wg5{q_$^KIu~%^q|5{h(3$^9|$+N3B*v z&R|1L$n(vg)_s<)J-{RABMI5eT<0AOzOGjU zW7j;1gs=XX$ifz`L>D;vT`3Iee^!u#4=31^psLME>W9M{|7PS?{gx&e~6qR*?)n1@S{QNV z4tkEcow<7i*<+8mGfeQvp!yy;t4HUTHE`4Z?)p;j3&({YY}*Bwyb7y?d_xKIpBo^^gmT<0xfDr=(%nX@`d_wZ&oYSNmu{h-y;hPig|IK>ikw;sP%KK8hZ0=rS@!o z2danUS>2T92UgQON6u@bx(->nhW3AFlTj<)_tLN622|^cOn~bo7CwYZk;3#okBc+u z^JZ1rm@C@Z?yTJb4Lq&rKHdU0bC7HMJb%OF>UIlg-PgMpR&T^}dNa>h&qWSpyG8T*%*)O9;16UsXNeh*-G-mW{k|?M z2ibObE4|+#dNr+s?na9>Xg_iGY8_n2?5BC%u3jdLT)Z`w_HSszTPAX4V+GAim(6bK zzc~M1`aF%QPM5&(vBKgA_*x;)8_N7%M!)ZTktvl&ZvDsU3A|FyoCT%UE~5UoRNka< zhY6|Ez8#o2z6-~hU23$Bb{o%GBAezPrTc$4-ZYH-{oiFeu=Ho~6_~g2j1OG*#o#@B z91>*>a}#cDfMFj`(>^Q=$h(PL`78Jn)VwyG>UoBxy#jKm<;GVq>CZ~q|AF!%*O7Pd z{-%A(njCLH{$jC!1$##=G$2>dCNHS!^MLNFpjx&NIlfPc=6l;g{Yqp>e^XvV=srGh)`e^ z%DK3Y{jYpxL-nx)?fE@j1=+_)^f^QPmXmWYg{iYgdVF3 zs9%qBH_`_1QM%`a_O^?CU*LIjU0Oepq7yR60YcZ4plgoMe(0a;!V5z#A2|hEBO*9p zp4vTF82tfwXT^Zl9p}$^P~r0PW)9y5FE@nvOU;u=J@TGNoHzYZQ5 zq0bLzsb*0e=MeQ-0=IgoQT_bf^O^c<5wnu+H~ow~?Yrq&A~85mt~Q+Z@jP>j3S`#3 zKw1x*8J>5Lm&e*{flb_}sScLMW(RQc$b2?*PA+_R@NH25+bR8Im+J6zv!H z#1z2VeWnlLsz_1V7YTy1FCzO|xzPOSo)BJv%so&+`?sMxqy~A^;sveOz%$Wk8R~X6GHyc(&3mkxL$--iAq>@J;X7g>J`hU)6LZ7E0yQ)3W zzWkM3J^WKEO6R#ct5bc79{A;f_FoD?60k7Lp4yj>FWioNH2CkoFMc`}<{)qLsZD@w zVi(`R9?jJju*p%K*8Q>dBnvry^IfWIg+3R`0`=omcM20_V>s?qCMXH*DuO7#I3YV9 zd7hUuJ(n+dKQ~6cXSr__dW9>bLGMXEx~~dP5Y@Gq$TB9{E#iDPz*&3s>foxawX@)# zqoukq`%^;~>@J;n2^W0SqWUO{5syV4|6FqxE_Qf*5B_^pmY!!dd$e4TUzSeBz%=vu zRQEyRsqM(w3+C~{o$t))yj@nAzQ|Q4kI_8p|I4!gInu3^<}W$o&m{6rvs1?)%T9{w zW@g+PZe%0=WUQ|sL$HZ=>PuO$Vy@skK@LF_}(z#?xNr*u>v8@GB(-t>9;68Y=`rWW}#L|9Sz z9Ad65`7sDLo5nr$;U+%%vR25Dd18qk|^X>ueK5C9utF?LMD60DOXeTW zXf0XbRJ8b|m}|A=%Y`R8qF*lZ7;Akg?!_)yD&Z?|uypYS>6lW<0QI&~sUUsHSJEM- z2VcpAvtnM!-ga$!B^Tu@StcJBaVoF9^nNnJjVvPBJne{iy)|4zy*Y>w7T70F-S9XM0 zm9OfFi!EP0kkMYQHBuz?PJ67{>YdK_j@Wl=Cdb;}t!2!RuFz!{WK}RZW#THOTRZ#uVKC4 zys|Uy{TA)79q+g5&XB1#))%y{-exEhUu|Nf(OJFS*g&Sn)O3e+%?>l`_?n%2-8yS_ zS)7-tHDiTZ*Y389kFVWx_(^B2xl^%B-Coxk>$-g>I^*m1dwlJzJK!}#w%)>5(5Bw< zf=ojF!2pe}daED<*#=g~4x0w+aO;Eyo7-+(4YpC|Wk1-(h1z_uzZakI;ZVwxt`81r z#j=fuGiq!ak38*6Xmrf^+SPb8Z-!ixQ<0!;)3H*S#3tuA8r@AU6$Wx2U8{H4esrt1 zPW?K0Q(__sL_d#`e>hP2W2cKb@WY+WpCs z!6Dz`#V%ym;>{_0uf>O3v!~_Uy!G;}z5+Y#TF;Bv+-vm{bMI;Om++HsyC5BA*LG1p z;a=M%<;>v?E&hwcI{VIcHL_a)E@6?zpBfj&=I6BWZ!YkQ1*UDu#skO$93cN z3Y{UQJMB9|&1~*>hV6Cl?Yv>(r_dG73bXIJX_s)n>(=4S-mVCz5{2&DuC?~vcTRNO z@4o9X-rF7N#i7^} z_`v8(L3UAPjj6vbx>(s+AF&ifsu39lymUBvmsF(;h z)5K)!uGN1#C-QWf3SR0=ytdJICUX;$d7QbkItOB$kdB|{M(LOuXEUKW@7g@+cNd(`c}B# zf4-)=^WQHW9fpX6lcn=9H*41=R!&Y!ES;T?9+z;pc6PBp>L{^n*;3V|s-j|IQ{%rn MjlnRU`osEv0DPT0jQ{`u literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/string_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/PLAIN_DICTIONARY/string_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bbdc391c68f58d9e001ba75fc3280e380e655f48 GIT binary patch literal 3166 zcmbtX-Bwyh6h5LLS|Xx32FNj@2wLzTQDaQxU{I_W<6o3St^A27C?HDE7>&lHG1_X9 zCT)7tC+Kx=x>g^ct1C~?SLkJLd%l@7XZ5Bx?OtSN&z^7Z{mtw>XPNurR3~4;KNqB5 zfd5&=b5$Hy%5mHll`=`qFo8WyMa>`@Nr@9Fm$Y3)fK5_?nx_JFn+hCbM36%cn@%H2 zRuQeVipYYL1qeJhi2(dq5v&+E5&=8~0X#$m8V!wTAfD_Q$A-cep5$6#rG=43Scxm*$pwWf zHxn4qjF_yz zxg7Y=Y;a&b1&l(7&|{1UC6+Lq1`|0Lk|+%fyPO?%r9F{^fslk22P243^03RNlO{xx zOMys=0~U<#V>r|1Kmxfbqqr|}Y$${!LMzQz0G6E+I@>`?IS&HUMngWO|A3~CI2Hq1 zu0GJT5s{JjK_R>FS55^U%8imMWu>CUQhFN^Y31Oe6gP>aasXr?#E?OG23W9H5CU3W zARui;1XQ|q6LwlwfaPb4W3rDR(hwa+Mg%&0;>gJEBnC7&EfB0`FcKY5KoMBc#k7yR-)IVJ!xL z>dcKm^@EW1#qPV%7)p&Q;s3vz~Fi1WJ`1i-& zpTGR|--rGGFv&+@(?s4|^-LzG^`g?8-s2$a}E6w&h`awgx(^DaO z%|myae8a+s|E{ZfbYSR<+OdG>ZsErVwD;=W+KGz$lT)6i>6zIF!*he3lj`}PaPQ%x z1&w#Ht@Oc^YN`H_uk1^=r^DqdSq@c~*&D)`85GrZc6% z>{f0&>}@hT9D$u1(^=8!%J)4k4E6-IwW~|JdtF;k(xcPM-PdAm``zZ?;samzjPHrY z`P6ja?A!JqUfF&&>*$TC6Gz5{naJ_J-Kfo_GU@ec=j5&D>A=Zu=S0$zdZ7=#%q?tM zbM19sy~=($wRd`D;^|KJ;cHv}YfrZ-X&7;qXJ$Ij_)x8QHv7uDwwxF(J)Suq+wx$lhj^1wWEdC~U;XggIjs+VFjSW0kc4K+}ymCpqYddsXM_$i%Zty$vo=cs| zQJc8TX6F1shtPjrb(Cm)<8yx&J8^`U)yoNA=t(H)J71VxD@pfh3RStuw}X!L$}Rsp zS9;~?+0$&HQ0vb0)?X~X`f6c+@#x*!MgKij;j%$9=6v_E0TmJL%^D*rYRhGOe>N+UR3wsW0;oH0n({B@17yL!|5 zBy)4W>#m{M;N9d)Of7ECsM(6F#!?|(#qm0u%2*N&_+wE!#3e!|$ta3rqL>%GadB7_Pzgbf8>!X>~84UvFri1otGk;#}|1-Fnk{{%lSxODDn? z0;)@NFs?V9y%d@eJ2+_9yNSb^{_NP-JLsd0-ra6^=Q_2h*RAj4iccZI^=2R>sDx9z zh>tLZ9*MgyH?KE@Grj&@U$~2V=whK{eWb?htyusJ7F_!I8geovN_({zK0$Ex=jvgF z%P=f6BYFUTdRw^&^Lm?J-oX4h>u3uA)}5uZ1@1tvOvD%kmeT4I*xzlo5c`ds)dPif z(&qu|kh0U2Qx=^!sz3*NqYDsbaM|V3qE!{eE4xt$*5rS`bv>dNl$Pm?`^>~&DI+}* z+;jbr;=J;n``>-YyKs_&psG78L*g~v5~6u3c-jyYt<4lfSkQc54YYjSn^3v}9`;7e zt}T?N^`I+CTew*dxfSfS_ix$yRZ`&g+n!Ma&;$)-7so<`Zj2V4`i4LrliqPw_{qnI zkaybO?Z~aG1GqhMAw4gk+LLewr@d-BQD&gzIxP+Y(Y`s-GbfjNp_Umyuj=eL%=S^C zL2NFU$B|14XZ*R19@w}U;XnXI&gily0{3&v0D06e`sg=M87)5NJ_}0pdKH+Nst6rd zQI&LUtAyRkC6#-ffLLSpE%q$rjY-wf2v7A7N$ARA4mKpmSHO`Y=j}!p;bEx&&?H|q3N$i4V1#VbO-8*WLorGH4>*mo18wQO<4-wu9 zZD}=5V5vdWvE;wEBmPID6fK-FTv&(xPDt^oMPab3gOu_TPU!s;S6%x!VQ1ttR z`Q}sy2bGw@Lrj2;$ZCp@n_UhgmNaJA+XPo$j=GDwzleu`!j;SZ&tsJ+ExNGhRQ0y&^%ZX|Jd#LAph~JxGNl2(DZ$%Iq4ge8_NFso973; zKJ4d=rn*(HSmZ)Zv<{i9I8_i~d6%+yc^VPq7^KEHt-(5=>}?kd=S#YI7Y>Cp*RoGV zX+XuGRP3+OZauI}){&A_K6PDD?)1-!xEYJ5qyM_Gwq zvCV1}97F;ClIX4#U_|Hg<8JDPF$P{hkw?P!qM0dq3j%Of7%a*4f%UR>M^TU=gwB$L z(s*Mpay@AS^o}j8lHjkYu#~0FmqCh1gI!(Y8YW4sQ56ADA1;Z1Wd|-4<7m-FvAkaR z4AxDxC|s=b5X8X%b5FC;io{N=BcPmYMnIO@MT_w*2y8F%Srquvoe7|{ottrrH%~I>Tk*V+0rl%6ZJhGyM@cS1S|w8+uVBHx|LCwRFdO_nJ5K~$h^lu+ z;#dcp&BiflabK(+j$V9tt_byWe=R|My^<8+2>lvhUU=B?O>khh8koDs81g2;6kOt? zB{ptRb}@M~x__62u!U;&0#O`(Q2?>sIESFGI!(XLzuGijQihaG@<6#L=eR~ynmO;6 zhh%^60N}6GKVEylko)r}txAWW;m6e&O=20N~=lJ=^nuAZc-Z#wGbWkle9`PS&?`RFE2mT=qBrKNBAyOCZd-Y{W076n^ z4j|(gZI$@=XU4JdMOw$|6(B#~>DG~A>nlM3Bl_E?(77vtJsufmw}$zc~5boTriPHV;937|KwRawEE~t zIzJ`5oMVrG+Pxc5_f!pTtrECi?ka)9A9VsM<+wfm%|f-zwZoq-M`X&QdlwLlt(P=Y z5qUn*5h#hQ3gc{(f|n|IC*KIxf55nL9NGtm{mLUAHq{n25U_m~x*(PTL-_ZvCVUvC zG31QL87RRilS);~?3B1$Fp&y}@1F)RQgy%DsC)-$gd=vul7+AU>n*4`31{zsuyP5n zB@2@G@0|;jw!bAZ0fxts#)sFKw9mk8ZPeEWkej&oB_}u^$mzA_=sd2Ym2UQkhJdbn zbNC?eM~=N=NQ}203d|(;VjU3Z54F3E;y{=_#B#SB*dBJ@?B1hrt?%NE3%v)GsCA1d z$({sY0q&<$dkR*~H@OQ&(l7-jN~*?#SWhn2s)(5HNdsu@(*iL1EF&~WM7@g&_a-Pw zn0xsqSX33je)cg`6p)Mtfhvo}`6qc51+NL1x>Hsh{h@eoPm;~I(e35Yz87&?5JTZ% zVDry^RD<302aP%YBCH3e`nFkGC=z@5i9URgO8##eAa_;B{r+sEcn&bdM}me!S%M3A z=0sfGKr2YJ4mMg{S>TgA29--~9tim4NTgJZ^6iY!>T^YTEEH_{pufoPT<56IJ~i-t zfDIfvb|+6^SIdkdYMeR-7i({hEjNr}r?Ua1`3~Y+o8PO_2OZm5TsG}Rx)mUo^LZ9> z2^fz1;ur;Z34(VXUMMOnyy#AT*bkveo+d~&Wm<4cot_%v@w>;rW`8%S&~W-WUuy+r zFS+X^&%XaEk~Em#gR&%AlP9Ih(H|mV5Cuu$OU+;_J;qh)6iBN1?$1s^3%CiHzq9bn zxi6b(P{4b9W}bHoRm_l6_qYEFDX7WV!1yo%9OD>B)hDz}cno3G7iDxGH*Z4sqm1IH zvn17XSU3kog-xK+q4_Z&V9WYX_PCmv%#n1xh0rpx^ACDLmvOM4+dJFKCa}_spw8@v z+n?p{9HxWy5KJATM|UDEIH-Kq$-F%CLwLk7O&*yc(cUxZ4~wu?}i@R&IDywwXiV)7u^p zsD1Zr^i)Gbo&tPiILFX{N^RUDwr0@Lrg5MSOiawQ&-$0nb2_dNr;uCE9|(p{SdPMw z@FNplz1ANj^qY79tJ#D&9-iqHuK_i!?Rh!5*By9(YFoH7*bAMPPNoD3ok57ZueoQ4 zZzOGsaxvOGN;}(zjTU){aj0$JW;X}EmQ83Rq||?-{$(cGBct#WhV0N;!gA7b3c40&^|xr)JW1T z?41b2YePPRLv?ru{^EiqFddqUQWnY`<@mnMq=@h1It2%fH(ZB3qw07HPZA;cg?uut ztuErm=;()N-&8^bCqR*srURo$4Gs!&(=i4|p^ncV znfB#tA0KXeIJPlIG%|1(H_r);-;>X$LrsaodtRY*^8tPfK=MN8;g6c3!wLgwqJaFciXZ%(oJiWG9YMXr#IvKOuLsb^rn`w85+ z*%Bu=9}zZ@UJ8y&fz7w?2wXfT5OG0G&#uSOfuI}4HvUb&{;Gx!Ib>?9Z9))UO39xV zC0g9gSkxh9eh4b9bE9@VJniYqf-#J9k7JsM7iAeRS^I_PSD(8AOl&$ zW(AhrnBoEnFk$(B?)#pv*^Z)^l>V&%bz_uengDcGY|Punl#=|cT5J&V`)5OW=XM}J zaQP_5xrB$09l{&ts-MDe_l&E)+XeKTKjl@0uxa6*#~e9L24^tYx??Gsc!f#8D4R23 zR02{gKJgPeA{NQTTTj``J6&=@PrqehV#dpf!dXCKnZK2# zsQiB7#$7l#W7U7crfrEGG9ROfgeWw9CVm*CDGZq&uea*SGEseqfmUUohzXPrHtjqN zgYs`3&88b0Ag<5dbL7NeQ0off2&aBV!m4R!V-q7%!lwXyx#CF>adz3S#mrCwGxlsN ztv7qPXO3#HhcES(93+;BOx=V6q!S?;w1}e+H#4>6Du0Gz%2D;T6aZ8^hG22~i$Tq& z`Ux)Yaw!%GAd;#1Z`9@B*MYya^xEMsp(h)q_)|w(=5W{kXmSoI&K^-VTXAZ7?b5vkIg{CFW5OT*V;wC05WrOy4K7%tRi)XH*tJE zf*~R-UJy%v!v}Q%J5a&1G1|`KjSQ?t8kIwZLuq-|_FPcO9M2geACU6kD>zyd`fo

QQXzI4o2-?}v5UoUd=U zK6oU02!TA06Qd+e>H~h&GNXWkpORD_*Z^wPGz1B&#LUZ9T+$B;hMcwiL56h*8T*!Q zcROF`WZ=OSK*(VtziHWO&B1b0EdU;RyhrlK)nOlDuD&j`P9GX?flQxqq5UN3`ivsu zCW%X{cQ6Mr0~n^B{gPY61MY~$qSx`02tyR{Smwgy7!yKvOf;=BtBP`Nd9FfJ>EiE$ zxe;TkTHbOXv#2<`6wVgOR$xmZ$|a2m@XQD?P%e9)Rv!SmMSp1>WM8fI5o;S%&v@AWxduBACOP zn;FayG}gn{Y{~hiv^>gdiHvxbX=>hN-E>s@3HclAls}|u$31&N6?qF7$#L2Nqw2g3 ziv&!G4^#ne(0mo)Q2>Lk84#K_UayX;* zATUsd8q%+*qtZhqZoHEvs|8Bi9Qi`CQK7A8tZyZ04!A+$lSwrvU?OVAq5My{99$3y zOcm&3Z)xIX8$N*=UD(!U3keWpVe_7ggQN}?%@?=*zX4CFG0a4qmE58J)NBUJ3OPla zPB6fhWfivo0lKNpDN|=W(n#_rFqw+sc8i|9)#TwMV>p?4K=h<26gClDcfmiqxrB^T z#YJW4I&aH@>8$FvsWFP3-i-zV4VRc2{Gn-}WsR@t4*gcx-#wpZ`SU8obMr(Q#Mvk0 z!NdDhlQ$uVHI%Vpyy~`jvPI0f=sPM{{l{mo?ji10cG<_3t2=Tfzagj2f^4epU+VwJ zc%#()9Bo+l>uTRNBeRIkC0PsR)6^RcOC-DVO8Z_PMRUe_Qkywv>sD#L{ zrvjgHdoYDzE# z(%(-0X|X?ELz@|e?iUs}52bl|)i+@K2PPd-zO*$anrJ!$R z_R2UcJMaBMoQKd;TKk>u*d-#qj(OZCS$Qh<$UM{?z_UbVZw*Zf8|8M{Z$RoV<+T41 zD7s;3J2_N^JPiA2_Ln6GOa5Pg{owU&8d(2v?Eo|$Caz>4?SH5H!vae112II{%%Whx zApf*~tWHq(lnh{hw>7*=g>L5kd|oc5=YoZwfjP^aCmIJq_<5C1{9fZrK!>iyb!jB2 zNv{_RamncJ8!JU5a>Vr<3wRVT3oMLo3)?__VXtQ}JJdIS(yP7CvD!E2;n$2Lfi9KC zRZ)|UOY0Zt!IwhYb`TsIn*$?mlr0&rmUqVJh{|4A4w3?&6_;BN($voN91*v`$A0#L z;jx+E@Tq>_^9lY3bYCH#ZP*27=lFTbZ@U0jN&i1fZbZ;OPUr(NRL|ozl#0u$UOX|f zP8b8Geh1|q`DqPa{&*;V31eI&9T@&ZwdzojvF446 zmrWjxKMcIRbK39cdL{t_T*8rF^Q~c*MNm>K+8ma`zXVQ)&&@!pdZT?dHyYbY-ha2P z5@u+_GJXd<=SIUDIEIe*RptM^4tjz35>{{$3gImvX7so%Ld4{N0ngKs&y{$)sR2T_9fnDh(MB^utjK z(7q>)ctPU1QF=r1C&ZI448yQ_LQ`oC{ih!%lexfx2eEVzv!IsY8?2rRQjgs4yFP-M zSSpWasn!!Q(Crxd;JXHOmF=JQS-M2&sq~5?jNx02m(No2#N{Jv z>%0eEG55zXfWI_!u?eN3pfhzam`3Po7pn#F-~smrdrqH>@O_B&ur)-62;DXIbcLcv zY~eA$?v4)OjQF;1)yg(CwDTLL=jlGLl+P-lbXeRq6!1sLf}mD%5Hc zWM<2xLR@K7vOUVBEz2c$4-vldvx8b%**@Fg690oJk~>bkAxc0}V9MfY3F7*Zy?8KU ziYM?0eY_X(6u_Nv8%UfL255DF%oz#A)_^=|>La2SZK&T>MJGBnNJ)RO4aY^a6q{aD zi!<)}2RYkc$&z5xtb%aL0oiBU2=6!0ycfW`%GV;#NV8T7t8;jq&t|L4k_wn%*%c$s z`^>~rz^O(2b+myk3~Y;I)kBYz8dA6>E?7aiObX@f<6X^SU&c_ft(c4ZOVR+Jq!F>2 z-ahK(_#w#P84P6%c?Nreeq>>oGa!8U;~jS8MN0-~c^ zZ>k}dM%1RPDR>g`(y{%DaA5KI@{R5T#QU(}!j%{zdI6yK{y9p<+Y&48M{jwKAHXVp zulrjzW+VViMp0E#cL=|33Uf3T$z|?{P_Py=%NcoJ>@*mfPm}>%nOe9qD8uIz=)r zB?uQW2p0mMCy#8kWw2n5wwbquekP2B zMAobbu;I4riKLibz$K0@6!D^=^6mJorBvsJS2~Jspr1n79C08iE+THKUbcJMnMeVI z^=t=lAH(_G^)wN*9U5Xc&){OQW*{#3_=72>@E^a8zQ6egNQ&BGVI5yFXJ#YR$egl| zP4mO9u&V4))V3YW>O<0YNAVJlnbg{*g)ez{urw6qsjX9J_4oEg95DeU3a2TIewA4A zF;u>@;+v1_0}lUjoHnI>ud@n(pUjHLCm#ktbgj~gzd&PipE@ksCc`Tnc2z#Yq|bg+ zx5=Si=2su>1B<;Zd|8e|aeotV$*8)Ie88N-D4`zlH0szo?7WAykPjDw$xTPN5*S%W zTd7x<>QJM4FZYArH1B_H&w+Y0rcw)vAvqEJ?@;&t^;h?Vj|$+|$=zw0y;qYSexp;aL4yl_k$iy1S7 zc<3igGs9T~MUb(V{r+Balil zqb!bi<#>}ThcP5ppCw^xB+s_=@1Rw zs3taaRRwpV0~BPgTsd+P z&#=biWPefCdGOUCu8|$OmBEunQi2eA$sbM2Vh4yr!=iNwx{aTm+&cZ7ms}M#s|VCl zdWbj6m#4{D{nkOUA@(lS(fK{Z2;*qW3zlA)y0V2F4u;Tn@(1L-s1kHE0n^Jp9Vc>!LLX=%C-rT8rK-k*qDIg8T zhW5Jz5hogP1Ck=!CMMZDc!Ny%6*~5I_m<@GA=PNB=aFuh0iSky<3t6LBsuJtj;dLEOp@QsBS6;KUeD z2=Iq$NW*A=*nrxsNl>jVrBmMu%k?pxb3(v}ZLq2|QhmSfzKap;Ctt?#Q^l#qLx|)9 zBOJv>R5+v_SXAH(uW`xr!eS!besp>WCZuEtM$fC^v6?jmZ;I>xgJQE@7NET5hrfQg z+HpIJ6Z7h~Zsdx)myKrm@FD(1i z($;`sU1S~ytN7zmk`Mb3$OGG+ioN-P+ivC3ut`%A!3o%;UkT+B zJ*9PkRn+h+V9lSuXUeA|1YoldxySb#nPL!cc_Wc{fo~SMLB+nwcdJ!}7qIMJd-G{A z$b`7TeqIA%&Je*5*KP+}HxL1JY37>qN8S^XXFdi+q~ z3ked*62bKkC&>gJ2|fur2|ATfJPIrNe|HOHH~){vkOTzRnZfY8{Pei(=l}bk!2h4* zwx8I+NT_t3NT|f7oyeGsd7U?K1Xepy@a5>bP>JsGsO^7hj8;0R2c_S_QL%a7hjIptd~HE@l7wGn!s8w zk(S(@K4LvRvpy0d%QtHWj`mmWl%-#B9eVC^cm0^Upm)K(D?jU2q z2;Ydn#t8p}9K)!Z1Hp)pQKp^{imNMkY;PRQU0ZcfPJ%QH^O5$jt{%9B|YPAX7& zZB8oEMKDe&F(zA1DYF(8PN{I#Z%(Q5_A!1^6PU65q%N{m_(?qVyzTg8jct!LYdE}dvB zORl{nHcM`UOeIV1BZ50i9ux8`Up%MvZN7NTTa|qAUh>-c;{Pd)cWhLOpjLk}*>cv*cO3>BW&Pp&6I_qi(Dyi*iC?<31Y8Z~t?&>pq1=h82 zVguW?2r}!^wMZ)O-L>a*k*w=cj48J3(X7R#>oJ@SyX!A_`&l<)1!iqGUW#m&ZoCpd z-`$9lLTB5Imm{^?Oi*Gj+e}mw+S`1srNFk8q-S8am26~Pwv}Syy|YFu(zG*+RwI=az6PvvuXIzWO-&MAmQhVEfuYvp7{as!D3hv^0xs->By z+4UP3JaQ^>DrO`q5*%_tBy@c8{{~24gfyxa8Gf0kbCH=b|8J)>a?b02bVdSbr0etF z4J2{WVkdHRa^`rHPB?iZfF|&mF<}RZoQe!bT1Q9IQ0zZOWO>Lu@Zpz+k2d13w6vq1ZqYc6T zedOg4kur#W#O{CnP+G>p*ht6rf7{n7h;bw`I#)AyH!mwsE(=#zE;Dy`7auxrD|Zh& b7iT&?K5kxaUUDj`Yo(6J*VnSw|M&er9VDIY literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/double_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/double_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..537f13cbce46351c36bd0eb2e6752d7623607184 GIT binary patch literal 8017 zcmbt(c|4U-_pi*2h6tg|LzKvr@oSzrjx(h)WhP08OqHQYhC)(_455-rMInSFgpg#O zhY~`X@9BN-@4x%cy?1@~*~8jqnY7b09}vR)O_TYooQ4YtB# zf3-e)EfPuwWA8V<8PoOSdzi`>Xp61s;bY27dRWisG7-+$bnLsW5y?$}(vqCiHK=Ix*d3f53NuaIb}Lqt>AE0wah=HTlV9hkSHA%Q0{K>M>k z{HC{L?5-ezz1%cz<7aF1S`VjwI!}g2;jz<#1D42<<1F_kaU-?8P+X}=6(Nra%cOZN z@Nm^NKb+Z)*4C3>jP^+2>GY1)?ai9Vz8w6zw$TQ{x-q=H+WLq}95QzPs*K(5>U*;v zY9OiGWhl*?3a#xQOg==@yflbN)HF%mO5KgDnIl)&zUxDrw#ChS)--UFQCwGlG<#ANlJchvMT^%$@mVzg)eE{PZF!fm zqd^0qRe8oYFHzA$+E!LQMMi-~!E*I+T_n0BK5lg(fYa2aerrDk%IOl-Rtv_EX|F1n z3)jHv%3JoLLUlYj+U=oWtbxeL!;X(MtZ@FF5))Sd1yza$m+e*!KrOPqp*>H-mMn7Z zmryf=uNm)2+$9F4ZJVN>_HZL>HEOqwqYW;)WOZ7%*g;(H#H&0(BgC+1Khp9NK&JnS z;m%1K)D1dD3Iokilw#jDVNApOh4$dzF=Qy^{>;i`CgN`AOT)!=wutvwa8mzZ4(^z1 zN#(^FxaLmD53!Mg_r~A#e=3Y%qGTG zo|W)^MSKx?e5}2Vf>B3e$KboNe|3Xg^yM`*sA$(K{nY-Aj1xY2Ed@;^ z{FYuHeol@Lk4D@+>whI9V9m|d(7RN)t5|*x6cxp#JrlVH?h9h?4)3&HCOgzzV-}@K z+M&uLG9&k%EEtqcy~86ca3CRl`>|6RIAZK(ap^4?8-B8S?2s|SP2UjT-z(-A8#Ccd z)TPV$k?Cv4g_RN3@N49YoGxN2id59;_w2LtpndVA5z3voV^)XEkdUsVj zrUe-^G|fhr{VAr8muT_6C~62x?<=OI^!u8&#Z4-sUmd2x%3Jrb>_L%v)*$?hG2_`7 zRMV@85qrOL>wl`_o%Qwo6FgK*by>dSmNACc?Y^I~F&3Z(UMtnn;(=F)znkYz8a!Kl z(l1YtpxUcZ^g@P;*sTZoZmW@TC@v*3DSJ0O`j^^b^f?>I#&@85!N z%u!HsFie*R zo(;O{v1*8`G^yy;qafnE)}+u)b?i!eaiFK%3Lf$XWZqS4EFL(!e~p3_col!RXNsGn z?RwJlZ4Ok#Y#-dR_OLQMzuD>^7*Io{2Nx$tlK~Rl$MoBq2spUyY2T;6+Hgs0Zk&5V zM6cFyb#G>2yz3QZ*fe7dez(&#f-5w5l@0fdTM47GvqPYopaQQxX@AjJ4KS*x?dT&> z5pX#AAn7L=-9JnYE{5tO)p^MzCtd;TE_Xct_MQZr9TSg2Q?0>OT%T`VOv5JWGtB|} z2x!|a8CJqRt$QI;@(TaFHev?2xl2+B@Hn|`Rw|T=rIhQDr5|@7;&|dWZBI!AKV98x zP-uaQ%vc>^Elo_WNp|YcwnpCXD>S|X%HS(ZS3r&Z>C^-UZ{S8i3N^0vW0o}lVGxL ziMCzD3h}q9`rR{k;`GR-PhRhI;2Yv?6c$Rso0`{+yC&@rGNUULMc1FKH&`>4*QlUJ zfg-T-RUKRx-Y2jpNf%A@E?_k5uM(J)9OQ&rfO_tv8X- zxPHSa#tbqB{C7F#!^`*zr8nDi$yQxeTTGB*vhkg@SrzKMoN2= zuqE(*&M=b++@IIm@KtTfB6s*k}NLoC~`5fHe+?L^{ffbqTgC2dz-7(5!)wA@2RO%sI~ z>^dmgL|M82SqCv=Yex?I7$WKHwY2hWhG=SdlOq-+3%=3G3r|>#@Rg9*ZFicAvDr<{v?kjRhu1@7FC#I77i?UHrF%3A+4f@^hSB7QvYv z?vk(T&7jeqnVy?x16Lp3AgV$5J#a~H z=3fm{N88$t`l=ce+-Q>0ChCw;WLHnRE-H)`Zh<|2!)>rydZO<9(LHF|^{%DJ(-v=! zMhUW&nSxt@HK5*|0MCP*d4#HAT@Cv-CHV#-j2*}BTEGq@Nrw%CdPZoJ+PM5MO%czZ zStk8lq~eyz4!Av z+|!h4s<(pjo?m0AreZC}EiDl7G(d^7EG^q@X2bM;X@ejncSCjyAk}lwbs=F zGD^vt-L4RENvKV+>$wRo+;@9!HM$8Y zAp6SScMpOzA?EDIn0rYJ7EhE!rsOn{a=$PuX;l~3Kb>q@Tw{ULPhM+Uh^Qe($KKIo z9~~db%5CQ@w1tpORU`WY0bDuZrQj=SfwfsVjMJ|T5rFC5?o$Z)x6DcvegKi?|;ecS^hx|OeP=tQUK!QeMvo_0483Iwz5 zTof`Sqz}%Ad^N;x&zioyzg+R{2>v_14qF-C58IVvS`(9G;pOJpSw-hnpiz z=+|j>vRnQN9*_{QgOmrkwrTQ2kH_j%zF66I}~{Nj4jrjU91Sr z(nK7=E^1tRUUz?w`m?K#s5l|vMmoVjguH3U=o`Adr|J)V!x9k#gW=Q(1N!;#wCsyM zZwj9`8&&H&?2sh5e~5Hl4DP%Masx7|Sa-L%doWW64SosK>CIM%O;A~VUT6X~riq`; z7tE1plb-o2kBGE!rB5Q~0rtP=&lz2!-(yAcco3f^bk*70Qbc7iRK4IUqrV3v|4Wm5 zrlk?!@n9Xt8*?0-8(Y&It%SL=noa>ZD$sG)n5$GVh5McNoHSt~!uy}`*=w4^>Xx2W zouw8mJK9>lY?T22??=m?UVD(C_e!92K@mL{GP>k)R&<@tn)iJ!HOH}ht|tzDnvnCk zT==KR0vC-o7la(4`<(|DlTYi(qFdAcozxRDmfoIY@@%K$;)S?dETxv9EL2XJgqT1| z)LDjs)ff`xH`~6)^Pu0UJ-s!K3cmBI3|~e}KsH`7I)9Ob>46WqOt*wVA(}s!46(r` z!L+BzM|mM{W0Eq{y%W1rIK6he=-~9lhGUyQ(9m1-l~7h94gdKK68=JTTwRgSP+4aO zN%e>7SCUonK4Yl(vxqrduN`hqjWfn#`x8#CQ)Jk4UQydqO@rPI!PV#IbU=1+vXk0q zgSCf_*j_qG_q%u3HeN2FVmhzA-Qkro_y?@ZbhfI1U;OWLq8A-+I=P2rIV)mw=DVm| z)@_(~YjdC6-mWXvrx4dzVuhpt&mk=lI?nuZHNQzx4Q2AD{bKIgLSyv7hpUf_p;YR7k%FQdg;qCgBL@FmDc&eu0Ly#p-ng8DB*QbRS zMNbW<24qmR#^sJhq%ByQ%d16_>3Di|P-8u-8Q8rm>&B}90a3#%Ji zfi_}H&I^wKHaW#Fo~GKu z&#+O#sY46)?oL-aC?rTcJ&~++$^z!&@#pF*jUj$dIydK-E*)2_E7+w+gX72B@BCE^ zv1?@VZ|5^JRLq`?U3o;qY1y0-CAxk5UEZCOTxf_3Yr+HC~QG2+{=45pjy(~Q9usJCC!F+%sNf7CI3Xr{-h?!PQsW-^mO60>@h zIEe;6YsU{vb8290sWV);tAI_zy!w^rE%39k*}MI^1xB_H^9Quj&|SBcP~AZRkL8`9 zv)pu?KvlFcjFiR$os?eXSySjn6prf8*dn8#pgr-Z8H$q?J?>F8alK(*YtktiPCR|w zf7($U`wn|}C?;88&5!xKiJb=U<(jDu^D@Jf@@KIUx*sL@;pwybt7IHcEch{ZQxg-- zj%0%uhKQ37PTswrirLWFwsnWhaByR-UQ9FzhM&IOVEd?z8$*s~T4+`n_^OcleU*j^ z@2PONFjHtQ8h3YoHiyULJJB>fJ>;b`|2dy-0qt}9vnS;&@ZrD_mg+6QtagoNykd#UOi!-xLw1lVn5qd2;=(!Sj9H;fBBH7^4g_V}!ZCQ~$5tx? zsL$+ub?piP5jUn^H^z`4cYm+HRxAx+lQ)}h+}MbBN$k0H!e(Go+^lJH+6s5f34eI1 z$e23kcxaZ%7*nPGvGSJYa7^Hzxz}S1HoME;ayW>P`gXhgDjn~pO$;2g7~F+gf3cPm zp(ZF|4%C>`pds~Fzp9O`EiOaEaY=_jx2wi_mKJ1qhd*Ec%bJXoo2qZNF&JQ|rt4f> zkuHvYmCSDXW{bT2`Jd!|5n;DdRW4Sx0oyn#yG$Jf!Lja)Pai#=qCRP)wa7w6iPaR7 z^&KKaslsJl4~_8fAma>rg-Q6fl{L1K?gxtxayX*DB^4XTt!Gwwy~!=qmpM9FCX6A`yGy9Yb-isU(EO_6o5&g^)Z8t#yFOs=O} z!p`mL^DC8>m{D#poQl=P12&pYPXGr%HTa{;WiK1}d|S?Hr0;*#x$S`| zG6V>^EFV6%jvhCC#TIbkhb6>Cy>zzP(D7Sl<*rUvF|d2c=MUW{g6{%jolBA;Qi}^G zQv7y6kD@{Iq2o)j!I4eV=go1IWh;kmHw6!GF;Q9R{=?XkujG-3dlA#Nel?%Z6stn< zFXCiLu(aiW|D%wI{Q3rogfD=N_AiGRZW&@)(T>T(~Mrg8;`3`-zie41Vr+ z$c~~RB%Qe?ZWj$VKOA&BKtI1t;Yr1_X8ItRjMecNt>`XP){BdDnxnX4<41kE{=BS0 zW!c(6!1`R#(+>2w?`U`CsgOhxT6UUg4VEc;m?V6LOI-(w>SFq-OLACW zuRYFPK*hsv0hP~>=wPww`@ZySQ(RbEoFBA{0z2~4_P?TXu+41m%PBU-u2bJKg)iL+DDa67Z#6~;} z(b)92zv49&h1u7V8{_oh6K(R)O3oMqqe53MzNBKSRBigO3mLo&ynS5sB+uK%!OO|Z z%F8Om&;CC_LVouDaig~+G0FTN*j!u;3=C2Kegk1&D!=`|XA%EDlPkZii(+6Fsb*m2 zajIU!E|FEuxIuZSnu*g;q=uP?6cRIb?s6LkUa+B85&`S<5i`H=(@;KLVnM!2Waa$@6*KH;kiq`XxNY3@V zG}r9-n5wL>u^BQ=A(FJae-fw)&P1HwgOIiM|pF>~wx595R;uYFqfy@T=`7 znZ+7KVt8B{MbApyY7~oC9%&R$G8B6)kwS8LEt&3m>$TMNz>(L|SutWwGPx-(O|k{K zx0-g`Egfl+E2$HEBVXF-@@8k**sV9aDwamxC{#0xH}9_FIn=D!D3Q~w)T}((yr<1j zyhXW_bf`t8$2F%#wJ&hAMQtcXyj6WHiIytk zopuBx$u1I!?AAr5x#e|H4g`&NQJrEXyJ@beZr%Gl^YXgweIJi^@At2le19OY%kBNa zknz0t4&lq=?;TIFNcA|y@VfUnpOwt-afw%%=sA>RB=x~Hh3x*pE!{2ugZuTMi4PuG zu~NOBxvB2GUIlsiz20{pPxShf)JuKzE$wpuc(`ml|D#{U^2Em@)hyC|{&l<_eMcK5 zZ}$Z>t4#JCYcrDW59}m+^au60-R=+W3!3adJ`^iG5HgnPF%UYHcYEN(r^k~6Ve|FU zgW*eE9)l4UtK+u^BY!PV4n{Gs$_$-k-r_lQie0K;==28FsiA02W0~O?9*XC1EWdlf zaGX%^)bJURIGK^N5^0_z=VbB=M$XGWnHssUyFq3&Ub)+I^rHGi!Dxck%G78gSY^kO z47YfVC7VhWj$N`;ogTYPFqR!pAyK@>Q)%vn<7o$ir^l~2#mP>jyQX0z_#?59U<#&VxaJ1IV&ANRQ5`TV3Wc=q$tp*Xp@vavLu zx$>#}J9E!IJ(-<*KHnhsrDCbu=S$`4#GNlyzgA|yykKCHpRZ=-^PR6@m%ck+yFu;K z{7X&~`Gq{A>MTYDh#Zg0{`vc{nxW4<~Th`6}d{W|6i&&0eSjoV}^fAG0;^cBZHPK ztAJDzQ}qVA;`{g1HV!FvddVOl#J@q0NVK79{P*Sf@2*k!M|(W<=w$mZqmzZ6-f=yB z4?SZ&Jr6yD|8(i;>+9)R{p&Q)(=+>5(%b)2(pyIVSmb{hy+$uR{>?Z2_Z&Le{u}&X zGBW(nNd5ot|5N=h|KHO3KKp1yPkP1Vf6u3v{~`L4{~>J#ei47?fTKZffjbZR`|orP m2sm zoC={qB}wCH79q*Sz3=JX_xJg{|Gl5j`>xO0YprLkXRW=Sy`R1I`aFijXcL{A?!TFX zd+9X-blNf+?Vgs3g-MOkaFrWXgY!TOVFLPYbIht z105+vr*JW%hx(qah)=s?5ZNS0uJu(SP&5}((q))2QNdn6Euz0cmGpgO;ksiGiEGS3 z(9$?^$u9@u>nhOnh8uVOEChF`0n#Q)Nm8IQtQAgUv|p`>d134k@0)ebf%3?|afBuZa#`3jzHIoX<##q5 zmZ;aukeY`)IB~cDhm${2%&ia5Z$yXFtJma0LKkuM`2!j0MzB0D4QshHEbx+|R(5$2 zzhE<(xo zRD9})B!Bwe!S#qh{8oGv>55U`OIB=#xO> zKow?h1mp3K3=$nFh1{SC5|^+5z9NgsUFU4gH+y`sr-hG*8)OrOv|Bj(LXmQG(<0Nm zi{P&AfTwZ(=)0^|1Z00s!r=`TB5@C2#3#QT|+z%KN?-t3P&+82^MYhY{ef>3iu zNds2^Tz+K3=e7b&GY^nL9V6^pRf5Tbp(MQPHCdPuf=Js*%{@Vx`2FnxblGkvPq;F$ zJINM-JKqq|*-olG{|M|DQn3G_gT_x;IL03Z5$P-&b7 z<85@GjE4AE7fh+RV&@KD3@ivnW~nv~?Y9AYpbR3ZIanDe4=chz-iAadT{Xb@76rIO zEI}u4Fe$rN4WpzOn0j*)o-iTO05^DVPQ+TZk-v-ZdT(?;WbRSMZ+o9 z4<8KDQCfQeE83!P#XSr`?DGg&#R6AVBhe@*fkgc--1%BeqHl&FF{B>4;#pL~_h1Yg z`%uQlm#NdME~8xF9!7f`(e4pN5{!<;gAW-c zqMd_w)oq+V^qMShWg*>gHI`H?Bm2eWVH&}UFJTH$4~oHl_h?+iuO z1e+44NchuXGPZb%m@u!Pcd(L_XF6d)%Ukl>6Sn3N;WAi-*JDn69j0VA5`p^+Ol32X z%Frb?iMNT5UNsi}^#E_RZXkPOGpSxGNIp9`!RQPNJ#9t!*;x$TPgR)sqJjK}B{*=z z1KOSS$jgsK$F>Ia&NNd;P4AK3@G2BW`pfsBE{r1ULo3L~Etko_ zO$K7Dib)dPiQ3^_jmn86+~1LfmjVK)5W7cV{2Z3H)k0CQko4>`z(mA7yd86a?)L`N ztO_Sh8(YXmKM7bf^l{Eh3q&OxHd>8%w9S*`uFZh*?_1$=`493`$qT$OaoF(G6m;oh zl)(BEXy>!XdENqWSuP_2-_-E<3=?ecc0xbeO$@dgkkRTpIJ_W|GG9{-1J^t(C^=4T z9Zg41%0gHRTtL%7bFlei(71jv>0=I%P>H8Re>olcE4egdpEna`Q4}tSCsT!obl}f# zf-L761j`l^H@S`Eb#@4BQa)2*dm7NRBM;TxEb4h|F@(2MIPap5$XRFPf5?R3C2QPT zy%G`WUhfZ6%Gbnn_ZjD3lX@;s4h`SXE^}bS??kgZ7b( zfkW@RvOxOo{jf?rWXWtV|odwe~y18!#bsG1{ZQNZ(o>OU4u73tr` z^+gTHxLb#Q)k}~%z8UM^$D#g001ikSlI1zqFyh95>fu$?Ct4fjxi1sNZVOO*Uj#xP zyU-(&h^7Tk$-UeV_;$AKklc5kJQ(RFu8!N#;M|D0vuhC$C{6exRS@Pin$o>hbWcic#si5|6x zL__Lc6>>*QA;Ol2po1Syjn*LVm(^(HPJqp%GIE<(sK4@!JijlEf!4cl8;nB9Y&m(e zHV%=y(+C~9)KF?64w?jl;jkR(3`@xL6<{iPJNWk;LO@0>1e?y2G^bb+c&`wD_6U=@ z{YfzH_8=PGsu-Qg#cwjkaN)eCuRh)oFi*wk&;8`O$s}2Dv zk2<%2okZcY?vEYi#wWqpt4G$qJB5$TP#jpT1dk)faKzU`&vweZ+ zd&22MH@zZ!;?Cf`kczW=*Q2SIg>X?dY%@MdHav{gJl{S_S*{4d!M~YkRhPic={l_B zaz(a96#R_G$+Ex0aMIx#v^d{*@}-B^A5{wPE54Ndnewpc}yMt_dXK z1xR^7G5i;Dldpcpl*DKz0#ARZKC&d>-$}zUGh?E6{t-SFrI93dC@y7$fnOpGbQ--! zn1f367D2iYotMtb6yu)%A&PO&Zzs{Hg$n;;C@f5)(fsDG-g6%weEq)@-~W&EgRlI4 zG;YZn8n=i=%>q7|v>Gk}mHwKALRylw+#<#nwLD_hX|=pe_x{>Nl0lMnd@`{Xb-yTN zrq%H)mGsvwR%S^)5>RQkc(h~-JMGcZ?KAz41S#${EFmosOO~*nOgd}XUX=mXazm{( z^&-Z`mi2Tq>-2h2bN7Mz6&68j8pNz)EgKm2nduEHol6E9#9diy8kz3xmW`{PiO7wCTyjS-0(0$jWT{O{tXKwnw>M=H*_M4(pfuwhU*!)ZhMv{nCKqk!?5BqT96Z z*OSd^H`=Q@+!@D)~lmFUxr^D^XHN4vIwHvc3FnVW_KNrP#x*Aiqe*QZ5?Z3``RYK zCi}H*vd744yR=}rZu`tQ+ir*4tn6;bg3^(0r{a3KH_jy;wr@^U3}?S_sroYV=41_z ze2*)OZr9`XST?8U)HBu5p3^Pb@^9VSP3+!!blK#*_3ZH&eS4-qSibixJI=1xYb-0L z*ZW=RXz#h{dig$|nGU-?-#v4~Ieq7Ud>QTYqwy;A`*Vxh_XqIF<@N^(sEzes5YkZ? z2of>19|#t+%^kSN^c)+wBzaL`FhnNaelS!aJ9jWlscdX8T)9DEC_<&ve(3U+k=&uk z?XzP;S14XZc9fQ=1N*9;+%0zWUbS&{jG>O=aICSZ!*HCL?XBT>bIsutMrj2a?tr>y_j9W#|Gw^cOM(++zcgEni%hCD|?$0mUJ!}ollla zjN97M-I6BSBFJDem~u2GosXeP;}v53YoO0tMA=##l*`*J$M?UKG=>uAScWzqn#L)C zT(s?qybQUhg*5`4LN_0!MNp29!)Xkrm;f;{GB~L5Z!%&93pUQfuXcn3`{xM<_7HN0 zz>eq;Vn=rUVC(lTF+MGJ= caMDF`^X3i88>4cQ7FlBszW$M2O}X_vUVy<3tB{xB}&mE31dm4LS(eiBGgGGvdoAv zilS6Zma&9tLg+DB#(ph>+}GjW=ehsg=ehShefxfYpYP}MUOt~+<>9JyA0(AX8wc>^ zB7Ltb6&;1}(Go&mD#V1&LM$*8;+lpKP921(CKfFhLW6tru0pJ9BgAQPI?IGmNrbpS zj!iou>P>|h(MyOH?)P^W;$&MPe(wopZy`zqOIHXhc!G3<7|Q+oRzfsUXVqSa4Q4`k zv0lRC-AxEmc0?2F0F)Urbn8@7JySp-!r%yjfPzE(=?eQT5qZIz&c87G&3urf4amz z<2MW(Ke)rVD<*#$I-97u3+_66ShEgrpT%<)*!E~YPF*!EXo1fhE+gI(2WourSxNi` zo;SlYm;2SsrPzR3k+Ti_k9aBOJ{b+={_GplU_am_Z+--yR9d%L6B#~GY+KS;{PCumZ{KedMgKvc{ z9df!5zXx_G7^lf=gm)A^wCIx=wO^Q-0X%=uquprj4@LwvKCJ%un@zt?vKrxDN=_+Q zS!lZn_80s`QEN_uD-}Iu)U1Vb9WyszzA7}t!L6WA4b<$X z{w+20z#T-r9{NY3c>{IB$vucxTl(}D=LCFTz)Lu}ThY{m-4jeJW;_P1Yw(aluWgvs zQTn}~T77h!q<$nkPrxKaEa1Irg|A?0?}E1!zM-sq)?hUcepZ8*3`QatW5}~n|6I}k zCiIPl*HG;Xus78IfKMAWFg44_8^G?xJN*=o58*GR&ViT@_|Eih2YM5zDWumY@a{m( zEN1W%I;Wz2KD?i)^~3K6bhkl|DSRK;i^!S7{X2NI&}Kw@iYYxLFAMJ;XiWqA2)T0d zRPdj|Yau-Rh8A}CQ?WY|@5fwB;Lt_SZ0cX)%?uqnVEa&?&3h2eoCAq(Am0Y;H_UDc zxli%5f^!b57H^;M=0{)7qc06j_u+d^O;5C3h5rRB4Lzygmf$T6?|R_OBR7z{Zt7X` zj%-0sJiN*1bVgSdc$dJx1D-p13FwJnp9e-fxixrsfCg*&oxlu&(SHnIed%ctYb8C3 z?Ipw>@^A9|7)&RylhE;uJ4fP0_<90QA|4KrlMl{*dRa!D1pOwQ^Wb?+pA+%W!d?rG z1$tx18OKaB@zas__Z0IB!qW`yf1_^)*0&r!RU{sz)NLEU@2mcsWDtaN;+ z(2|G0^<3NF)S0(07_xDK&}o<2*fh!#w^VXBT?<(*rYl>c+gn$@v0p1vRux z48vjlRFplt_IGx-H3X;%+L~h2-7C z_aOS@24**6lR3{|&x5y-{(OYvE%ghTi4|C}c!=SC6CQu1--+NE;PoTB2l^bSD?{5w z_DJp@)4x#m_vlWg{y18z$xC4A@Ek*rjL|s<@8xi0ke9_;iLPY$JEQ9=c%$f-lvp!Z zy4-uyWK;Z11W$&Z@9FUvbPRxZ6r8SjJIY)v>FrwbhoSc-Sd-8nOl${O+SF*WLYYA- zeOpA2o~eI^$@>8JeR9IlRm<~o;tKj^z-&CxP{(~Fn2DVA;4@~oX9jl6x0!en@6-cg zH|VDh^-sWZXB|be3_thalyg_fK904RJ12aU(8o0JUV+n{+}?0D;pq-Kp%zMbuhXx| z^l!Zx?>Dh`^rDQM$#@uthwH=}iH8x3M}s%Z9BsYu63KFb=P|nqp7r!!LO)W$+eFR* z`lH9nhtC6Dws5tkuhr!5LEAvQI1qmVegv^EA6EQQf;ZW ziLpfek9A}9zfgr?qE?^(cN}QEwX0wzrj-Rv0*R_O0kJu~&9QM;GgR6`foiOe;H2o$6aD_43QE zH1vtMS7|gi*0jplZ@X`mEHF2_s!Q;(dsSV-DooFrMARswyRB$U+Sq;di>n)ZM2U`Z z@)!g6IMa>tcb(~!FzWRc zkA%_h8n4@q`TObFDPvncX}Q{JwsUY*YIlouwQJkI!qr}Xw8YJ!<0J>SafY*E-Nttb zsc@UnJyPO6(d_+{g>G~Ag zR^~OX4pRRbw~6L?HSR7)u5b64qLijg^>Fh{@tp3Pk}_@Pk{c=0=d6>add*MrO!W@P zPDz~+bmT_r%*AEWG@qqcJ=12bxR;VPd-cm3X>+194AbYvbeNXzyU{c?eO~;ao9Xi- zM{DosnWWLP&TqTtf;#_n|Gjk!b}f5Y7ob|Ns1M9dSx~=lU*6vOMF)R*SReFrrDCVi z(W}#Th9unHFf+9Hud11gOFwJ-gq625(q2+|WPZbv^XB;t;TMPe*|7A=xZbxTYCHpO zFRS&>zrDP4$*tWh?yfV+{Qg1G^vspce`cp;u6lChR_5yGWky+RUSFM_75VO7TGrZ+ zFK=Z5T79H^>&9Fx}W~ z)qB%8)6K)pHkoInpV`!B@4ow+EspQo6F;EbIA_b?YhF1&4!fV8vvuUF+MI;3nzGzP z`;OkZNfXR=p7f6BDkJG!NMFDf!mo3-kLdAjdr8^sR)jFpOv zg$0iqGJ+LX_U>F?{RdLiW3J8N}APu$Ye9*?>voDhdkBa zII#z)>nEyl__(Q)zkSBpf6WAh&)4I(xSHB7$lJ?3@Y{2M(%~!qd^O*{KI6VA`T8_^ zv@n*3`Gzl9<{#02eppz4-|+CoE9A@l!<;fAZ{Kf`pJ@=R#{jXw%*X9bo*9gVJ? z&~YS(t~X&3UV+SE=OX~iyp`_5?HPQX@SWGSEAaf1G3wWYH?Sjna zlOG4qHaXJx<+m63A!j%LRDg4W44q)v<4(FyrB>iG$ECTeK26*kiRR)Z9|_16sXZL*q)}yZ9X0e?#O_*yO1C398RxKMFJX zqVGX*`z2g(S^h;&xNVDaAS{;KH3MFYSojoHtaNmCADpFeoneO^?PslX_O1&$yzZ7_`I%uCbaPW#)N+Da&>T% zo1;5a9loIqt24iTg?IDh{)NxFPaJ|d(7c>t{zFWZkDHYtCWX^HZ?R{;i5zVm(gkCc{**!4^oKN$_f?wLBY$?D`~{s2;%>mNaY4P%!(`MP za(x}tfIj7V2O+P|+T&1e=s`3T_&&WB25x$g4G%gzTnt6OB$mLLTXeIam0bbN|3@|c zTgW>z|15zDONQt=$+7RWkvH#t@EuOeTTSD4_co>ak+#pVM|*tgBVIVrKjSQv=j)?- z(z*E0T4b%`y|n*4j-}H59R24{ev+4|X!hMg`s-fLxV;RVZ-{J^-MR@4>F!P2d^(#s97wzNM=C$;kV(b~t zz=7`o&Nl}>EHk0=I)sO59^>r1PoiDbUW%Se zBUcvGBOjLyq3f?$VL<(jC~XKuyQ6^))q6R|23j`Nb}L5AV~E! z=Ez6y(~lT7#_=du=lO6%>Vr3YG?7C6Md}&+6!rzJ~Mm-_9mq)ho|)- zv!Oe$O$r>9GordQjbEXP?A0<$1dhiF(|Nb|OvoW0xO0>0(OQFB8<{U{{0)vx!2l}EkSz#wgMhmk9j&85 zjW}I@$=c;FaD244HVZn~E0Re!kJ58spgoBl?E)4Ts$iJ+qGfPAc9_;>EAQ1#Uap3` zIBQ%KZhjJ?2t9-kdcffYIZWu<+C%+Z?l@?OJZOKJ*5!7>1l?z7{`I$Lk5FHC0ZN|a zpmn&~l)e?&K5tYA#@4K%I=VBFN#mcUYC-dxmRe?j<3>rdC*Zj~&Gyi2aJe|#w=JLU zU-tIEQe=+vm#N($g{=vB<8xV3t=y|lg~IbyHz zzMRDlDo{c6A&oQZ{V3Iil!ebY+6&`forZ2IhSYx!{~fgdG`g#4U6W1kQav9RT*!s< zQW8q(zV;eIG03Nsu6}_5X7spZxNA+Px(@wg5{q_$^KIu~%^q|5{h(3$^9|$+N3B*v z&R|1L$n(vg)_s<)J-{RABMI5eT<0AOzOGjU zW7j;1gs=XX$ifz`L>D;vT`3Iee^!u#4=31^psLME>W9M{|7PS?{gx&e~6qR*?)n1@S{QNV z4tkEcow<7i*<+8mGfeQvp!yy;t4HUTHE`4Z?)p;j3&({YY}*Bwyb7y?d_xKIpBo^^gmT<0xfDr=(%nX@`d_wZ&oYSNmu{h-y;hPig|IK>ikw;sP%KK8hZ0=rS@!o z2danUS>2T92UgQON6u@bx(->nhW3AFlTj<)_tLN622|^cOn~bo7CwYZk;3#okBc+u z^JZ1rm@C@Z?yTJb4Lq&rKHdU0bC7HMJb%OF>UIlg-PgMpR&T^}dNa>h&qWSpyG8T*%*)O9;16UsXNeh*-G-mW{k|?M z2ibObE4|+#dNr+s?na9>Xg_iGY8_n2?5BC%u3jdLT)Z`w_HSszTPAX4V+GAim(6bK zzc~M1`aF%QPM5&(vBKgA_*x;)8_N7%M!)ZTktvl&ZvDsU3A|FyoCT%UE~5UoRNka< zhY6|Ez8#o2z6-~hU23$Bb{o%GBAezPrTc$4-ZYH-{oiFeu=Ho~6_~g2j1OG*#o#@B z91>*>a}#cDfMFj`(>^Q=$h(PL`78Jn)VwyG>UoBxy#jKm<;GVq>CZ~q|AF!%*O7Pd z{-%A(njCLH{$jC!1$##=G$2>dCNHS!^MLNFpjx&NIlfPc=6l;g{Yqp>e^XvV=srGh)`e^ z%DK3Y{jYpxL-nx)?fE@j1=+_)^f^QPmXmWYg{iYgdVF3 zs9%qBH_`_1QM%`a_O^?CU*LIjU0Oepq7yR60YcZ4plgoMe(0a;!V5z#A2|hEBO*9p zp4vTF82tfwXT^Zl9p}$^P~r0PW)9y5FE@nvOU;u=J@TGNoHzYZQ5 zq0bLzsb*0e=MeQ-0=IgoQT_bf^O^c<5wnu+H~ow~?Yrq&A~85mt~Q+Z@jP>j3S`#3 zKw1x*8J>5Lm&e*{flb_}sScLMW(RQc$b2?*PA+_R@NH25+bR8Im+J6zv!H z#1z2VeWnlLsz_1V7YTy1FCzO|xzPOSo)BJv%so&+`?sMxqy~A^;sveOz%$Wk8R~X6GHyc(&3mkxL$--iAq>@J;X7g>J`hU)6LZ7E0yQ)3W zzWkM3J^WKEO6R#ct5bc79{A;f_FoD?60k7Lp4yj>FWioNH2CkoFMc`}<{)qLsZD@w zVi(`R9?jJju*p%K*8Q>dBnvry^IfWIg+3R`0`=omcM20_V>s?qCMXH*DuO7#I3YV9 zd7hUuJ(n+dKQ~6cXSr__dW9>bLGMXEx~~dP5Y@Gq$TB9{E#iDPz*&3s>foxawX@)# zqoukq`%^;~>@J;n2^W0SqWUO{5syV4|6FqxE_Qf*5B_^pmY!!dd$e4TUzSeBz%=vu zRQEyRsqM(w3+C~{o$t))yj@nAzQ|Q4kI_8p|I4!gInu3^<}W$o&m{6rvs1?)%T9{w zW@g+PZe%0=WUQ|sL$HZ=>PuO$Vy@skK@LF_}(z#?xNr*u>v8@GB(-t>9;68Y=`rWW}#L|9Sz z9Ad65`7sDLo5nr$;U+%%vR25Dd18qk|^X>ueK5C9utF?LMD60DOXeTW zXf0XbRJ8b|m}|A=%Y`R8qF*lZ7;Akg?!_)yD&Z?|uypYS>6lW<0QI&~sUUsHSJEM- z2VcpAvtnM!-ga$!B^Tu@StcJBaVoF9^nNnJjVvPBJne{iy)|4zy*Y>w7T70F-S9XM0 zm9OfFi!EP0kkMYQHBuz?PJ67{>YdK_j@Wl=Cdb;}t!2!RuFz!{WK}RZW#THOTRZ#uVKC4 zys|Uy{TA)79q+g5&XB1#))%y{-exEhUu|Nf(OJFS*g&Sn)O3e+%?>l`_?n%2-8yS_ zS)7-tHDiTZ*Y389kFVWx_(^B2xl^%B-Coxk>$-g>I^*m1dwlJzJK!}#w%)>5(5Bw< zf=ojF!2pe}daED<*#=g~4x0w+aO;Eyo7-+(4YpC|Wk1-(h1z_uzZakI;ZVwxt`81r z#j=fuGiq!ak38*6Xmrf^+SPb8Z-!ixQ<0!;)3H*S#3tuA8r@AU6$Wx2U8{H4esrt1 zPW?K0Q(__sL_d#`e>hP2W2cKb@WY+WpCs z!6Dz`#V%ym;>{_0uf>O3v!~_Uy!G;}z5+Y#TF;Bv+-vm{bMI;Om++HsyC5BA*LG1p z;a=M%<;>v?E&hwcI{VIcHL_a)E@6?zpBfj&=I6BWZ!YkQ1*UDu#skO$93cN z3Y{UQJMB9|&1~*>hV6Cl?Yv>(r_dG73bXIJX_s)n>(=4S-mVCz5{2&DuC?~vcTRNO z@4o9X-rF7N#i7^} z_`v8(L3UAPjj6vbx>(s+AF&ifsu39lymUBvmsF(;h z)5K)!uGN1#C-QWf3SR0=ytdJICUX;$d7QbkItOB$kdB|{M(LOuXEUKW@7g@+cNd(`c}B# zf4-)=^WQHW9fpX6lcn=9H*41=R!&Y!ES;T?9+z;pc6PBp>L{^n*;3V|s-j|IQ{%rn MjlnRU`osEv0DPT0jQ{`u literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/string_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/RLE_DICTIONARY/string_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bbdc391c68f58d9e001ba75fc3280e380e655f48 GIT binary patch literal 3166 zcmbtX-Bwyh6h5LLS|Xx32FNj@2wLzTQDaQxU{I_W<6o3St^A27C?HDE7>&lHG1_X9 zCT)7tC+Kx=x>g^ct1C~?SLkJLd%l@7XZ5Bx?OtSN&z^7Z{mtw>XPNurR3~4;KNqB5 zfd5&=b5$Hy%5mHll`=`qFo8WyMa>`@Nr@9Fm$Y3)fK5_?nx_JFn+hCbM36%cn@%H2 zRuQeVipYYL1qeJhi2(dq5v&+E5&=8~0X#$m8V!wTAfD_Q$A-cep5$6#rG=43Scxm*$pwWf zHxn4qjF_yz zxg7Y=Y;a&b1&l(7&|{1UC6+Lq1`|0Lk|+%fyPO?%r9F{^fslk22P243^03RNlO{xx zOMys=0~U<#V>r|1Kmxfbqqr|}Y$${!LMzQz0G6E+I@>`?IS&HUMngWO|A3~CI2Hq1 zu0GJT5s{JjK_R>FS55^U%8imMWu>CUQhFN^Y31Oe6gP>aasXr?#E?OG23W9H5CU3W zARui;1XQ|q6LwlwfaPb4W3rDR(hwa+Mg%&0;>gJEBnC7&EfB0`FcKY5KoMBc#k7yR-)IVJ!xL z>dcKm^@EW1#qPV%7)p&Q;s3vz~Fi1WJ`1i-& zpTGR|--rGGFv&+@(?s4|^-LzG^`g?8-s2$a}E6w&h`awgx(^DaO z%|myae8a+s|E{ZfbYSR<+OdG>ZsErVwD;=W+KGz$lT)6i>6zIF!*he3lj`}PaPQ%x z1&w#Ht@Oc^YN`H_uk1^=r^DqdSq@c~*&D)`85GrZc6% z>{f0&>}@hT9D$u1(^=8!%J)4k4E6-IwW~|JdtF;k(xcPM-PdAm``zZ?;samzjPHrY z`P6ja?A!JqUfF&&>*$TC6Gz5{naJ_J-Kfo_GU@ec=j5&D>A=Zu=S0$zdZ7=#%q?tM zbM19sy~=($wRd`D;^|KJ;cHv}YfrZ-X&7;qXJ$Ij_)x8QHv7uDwwxF(J)Suq+wx$lhj^1wWEdC~U;XggIjs+VFjSW0kc4K+}ymCpqYddsXM_$i%Zty$vo=cs| zQJc8TX6F1shtPjrb(Cm)<8yx&J8^`U)yoNA=t(H)J71VxD@pfh3RStuw}X!L$}Rsp zS9;~?+0$&HQ0vb0)?X~X`f6c+@#x*!MgKij;j%$9=6v_E0TmJL%^D*rYRhGOe>N+UR3wsW0;oH0n({B@17yL!|5 zBy)4W>#m{M;N9d)Of7ECsM(6F#!?|(#qm0u%2*N&_+wE!#3e!|$ta3rqL>%GadB7_ goldenFilesAndEncodings() { .flatMap( e -> Stream.of(true, false) - .map( + .flatMap( vectorized -> - Arguments.of( - encoding, e.getKey(), e.getValue(), vectorized)))); + Stream.of(Arguments.of( + encoding, e.getKey(), e.getValue(), vectorized), + Arguments.of( + encoding, e.getKey() + "_with_nulls", e.getValue(), vectorized))))); } private File resourceUrlToLocalFile(URL url) throws IOException, URISyntaxException { diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 9747e993c6a6..c7dcc625b381 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -498,10 +498,12 @@ static Stream goldenFilesAndEncodings() { .flatMap( e -> Stream.of(true, false) - .map( + .flatMap( vectorized -> - Arguments.of( - encoding, e.getKey(), e.getValue(), vectorized)))); + Stream.of(Arguments.of( + encoding, e.getKey(), e.getValue(), vectorized), + Arguments.of( + encoding, e.getKey() + "_with_nulls", e.getValue(), vectorized))))); } private File resourceUrlToLocalFile(URL url) throws IOException, URISyntaxException { From 8fec7e3d7a3a723769ca1e6d0e5ea6d26a1c5748 Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Fri, 5 Dec 2025 14:40:37 -0500 Subject: [PATCH 43/47] Correctly extend parquet ValuesReader class in vectorized parquet v2 encoding readers --- .../VectorizedByteStreamSplitValuesReader.java | 10 +++++++++- .../VectorizedDeltaByteArrayValuesReader.java | 13 +++++++++++-- .../VectorizedDeltaLengthByteArrayValuesReader.java | 13 +++++++++++-- .../parquet/TestParquetVectorizedReads.java | 12 ++++++++---- .../parquet/TestParquetVectorizedReads.java | 12 ++++++++---- 5 files changed, 47 insertions(+), 13 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedByteStreamSplitValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedByteStreamSplitValuesReader.java index d879a098862e..5c3b812446f7 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedByteStreamSplitValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedByteStreamSplitValuesReader.java @@ -24,6 +24,7 @@ import java.nio.ByteOrder; import org.apache.arrow.vector.FieldVector; import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.api.Binary; /** @@ -34,7 +35,8 @@ * href="https://parquet.apache.org/docs/file-format/data-pages/encodings/#byte-stream-split-byte_stream_split--9"> * Parquet format encodings: BYTE_STREAM_SPLIT */ -public class VectorizedByteStreamSplitValuesReader implements VectorizedValuesReader { +public class VectorizedByteStreamSplitValuesReader extends ValuesReader + implements VectorizedValuesReader { private int totalBytesInStream; private ByteBufferInputStream in; @@ -170,4 +172,10 @@ public void readLongs(int total, FieldVector vec, int rowId) { public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector) { throw new UnsupportedOperationException("readBinary is not supported"); } + + /** The Iceberg reader currently does not do skipping */ + @Override + public void skip() { + throw new UnsupportedOperationException("skip is not supported"); + } } diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java index 1c8834fee693..d2834493ce27 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaByteArrayValuesReader.java @@ -24,6 +24,7 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.FixedWidthVector; import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.api.Binary; /** @@ -34,7 +35,8 @@ * href="https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-strings-delta_byte_array--7"> * Parquet format encodings: DELTA_BYTE_ARRAY */ -public class VectorizedDeltaByteArrayValuesReader implements VectorizedValuesReader { +public class VectorizedDeltaByteArrayValuesReader extends ValuesReader + implements VectorizedValuesReader { private final VectorizedDeltaEncodedValuesReader prefixLengthReader; private final VectorizedDeltaLengthByteArrayValuesReader suffixReader; @@ -51,7 +53,8 @@ public VectorizedDeltaByteArrayValuesReader() { @Override public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { prefixLengthReader.initFromPage(valueCount, in); - // actual number of elements in the page may be less than the passed valueCount here due to nulls + // actual number of elements in the page may be less than the passed valueCount here due to + // nulls prefixLengths = prefixLengthReader.readIntegers(prefixLengthReader.getTotalValueCount(), 0); suffixReader.initFromPage(valueCount, in); previous = Binary.EMPTY; @@ -161,6 +164,12 @@ public void readDoubles(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readDoubles is not supported"); } + /** The Iceberg reader currently does not do skipping */ + @Override + public void skip() { + throw new UnsupportedOperationException("skip is not supported"); + } + /** A functional interface to write binary values into a FieldVector */ @FunctionalInterface interface BinaryOutputWriter { diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index b99ac73303d6..2b833717ad16 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -25,6 +25,7 @@ import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.FieldVector; import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.api.Binary; @@ -36,7 +37,8 @@ * href="https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-length-byte-array-delta_length_byte_array--6"> * Parquet format encodings: DELTA_LENGTH_BYTE_ARRAY */ -public class VectorizedDeltaLengthByteArrayValuesReader implements VectorizedValuesReader { +public class VectorizedDeltaLengthByteArrayValuesReader extends ValuesReader + implements VectorizedValuesReader { private final VectorizedDeltaEncodedValuesReader lengthReader; @@ -52,7 +54,8 @@ public class VectorizedDeltaLengthByteArrayValuesReader implements VectorizedVal @Override public void initFromPage(int valueCount, ByteBufferInputStream inputStream) throws IOException { lengthReader.initFromPage(valueCount, inputStream); - // actual number of elements in the page may be less than the passed valueCount here due to nulls + // actual number of elements in the page may be less than the passed valueCount here due to + // nulls lengths = lengthReader.readIntegers(lengthReader.getTotalValueCount(), 0); in = inputStream.remainingStream(); @@ -176,6 +179,12 @@ public void readDoubles(int total, FieldVector vec, int rowId) { throw new UnsupportedOperationException("readDoubles is not supported"); } + /** The Iceberg reader currently does not do skipping */ + @Override + public void skip() { + throw new UnsupportedOperationException("skip is not supported"); + } + /** A functional interface to write binary values into a FieldVector */ @FunctionalInterface interface BinaryOutputWriter { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 285a0d3eb223..497089afd353 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -514,10 +514,14 @@ static Stream goldenFilesAndEncodings() { Stream.of(true, false) .flatMap( vectorized -> - Stream.of(Arguments.of( - encoding, e.getKey(), e.getValue(), vectorized), - Arguments.of( - encoding, e.getKey() + "_with_nulls", e.getValue(), vectorized))))); + Stream.of( + Arguments.of( + encoding, e.getKey(), e.getValue(), vectorized), + Arguments.of( + encoding, + e.getKey() + "_with_nulls", + e.getValue(), + vectorized))))); } private File resourceUrlToLocalFile(URL url) throws IOException, URISyntaxException { diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index c7dcc625b381..43c9e227ee8a 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -500,10 +500,14 @@ static Stream goldenFilesAndEncodings() { Stream.of(true, false) .flatMap( vectorized -> - Stream.of(Arguments.of( - encoding, e.getKey(), e.getValue(), vectorized), - Arguments.of( - encoding, e.getKey() + "_with_nulls", e.getValue(), vectorized))))); + Stream.of( + Arguments.of( + encoding, e.getKey(), e.getValue(), vectorized), + Arguments.of( + encoding, + e.getKey() + "_with_nulls", + e.getValue(), + vectorized))))); } private File resourceUrlToLocalFile(URL url) throws IOException, URISyntaxException { From 807253aaf8012f19c42eb5bae73d16769193792c Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Fri, 5 Dec 2025 15:06:24 -0500 Subject: [PATCH 44/47] Support 0-length byte arrays in vectorized delta length byte array parquet reader --- .../parquet/VectorizedDeltaLengthByteArrayValuesReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java index 2b833717ad16..4afd2b9b58b2 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDeltaLengthByteArrayValuesReader.java @@ -101,7 +101,7 @@ private void readValues( for (int i = 0; i < total; i++) { int length = getLength.applyAsInt(rowId + i); try { - if (length <= 0) { + if (length < 0) { throw new IllegalStateException("Invalid length: " + length); } buffer = in.slice(length); From e25cd56a810f2e80aeac21643ca379590bc56cc4 Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Mon, 8 Dec 2025 15:52:10 -0500 Subject: [PATCH 45/47] Add vectorized parquet support for RLE encoded boolean data pages - Fix a couple of tests as well as this now completes vectorized parquet v2 read support - Add golden file tests for booleans encoded in RLE format --- LICENSE | 2 +- .../parquet/VectorizedPageIterator.java | 9 ++ ...edRunLengthEncodedParquetValuesReader.java | 117 ++++++++++++++++++ .../resources/encodings/RLE/boolean.parquet | Bin 0 -> 521 bytes .../encodings/RLE/boolean_with_nulls.parquet | Bin 0 -> 623 bytes .../TestParquetVectorizedReads.java | 26 +--- .../parquet/TestParquetVectorizedReads.java | 28 +---- .../parquet/TestParquetVectorizedReads.java | 28 +---- 8 files changed, 134 insertions(+), 76 deletions(-) create mode 100644 arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedRunLengthEncodedParquetValuesReader.java create mode 100644 parquet/src/testFixtures/resources/encodings/RLE/boolean.parquet create mode 100644 parquet/src/testFixtures/resources/encodings/RLE/boolean_with_nulls.parquet diff --git a/LICENSE b/LICENSE index 5dfde249ee8d..f0b85316cd0b 100644 --- a/LICENSE +++ b/LICENSE @@ -229,7 +229,7 @@ This product includes code from Apache Parquet. * DynConstructors.java * IOUtil.java readFully and tests * ByteBufferInputStream implementations and tests -* ByteStreamSplitValuesReader.java +* ByteStreamSplitValuesReader implementation Copyright: 2014-2017 The Apache Software Foundation. Home page: https://parquet.apache.org/ diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java index 4a72f1b98ebc..b3518e58cbda 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedPageIterator.java @@ -34,6 +34,7 @@ import org.apache.parquet.column.values.RequiresPreviousReader; import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.ParquetDecodingException; +import org.apache.parquet.schema.PrimitiveType; public class VectorizedPageIterator extends BasePageIterator { private final boolean setArrowValidityVector; @@ -109,6 +110,14 @@ protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, i case BYTE_STREAM_SPLIT: valuesReader = new VectorizedByteStreamSplitValuesReader(); break; + case RLE: + if (desc.getPrimitiveType().getPrimitiveTypeName() + == PrimitiveType.PrimitiveTypeName.BOOLEAN) { + valuesReader = + new VectorizedRunLengthEncodedParquetValuesReader(setArrowValidityVector); + break; + } + // fall through default: throw new UnsupportedOperationException( "Cannot support vectorized reads for column " diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedRunLengthEncodedParquetValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedRunLengthEncodedParquetValuesReader.java new file mode 100644 index 000000000000..f324aa37efbf --- /dev/null +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedRunLengthEncodedParquetValuesReader.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow.vectorized.parquet; + +import org.apache.arrow.vector.FieldVector; +import org.apache.parquet.io.api.Binary; + +/** + * A {@link VectorizedValuesReader} implementation for the encoding type Run Length Encoding / RLE. + * + * @see + * Parquet format encodings: RLE + */ +public class VectorizedRunLengthEncodedParquetValuesReader extends BaseVectorizedParquetValuesReader + implements VectorizedValuesReader { + + // Since we can only read booleans, bit-width is always 1 + private static final int BOOLEAN_BIT_WIDTH = 1; + // Since this can only be used in the context of a data page, the definition level can be set to + // anything, and it doesn't really matter + private static final int IRRELEVANT_MAX_DEFINITION_LEVEL = 1; + // For boolean values in data page v1 & v2, length is always prepended to the encoded data + // See + // https://parquet.apache.org/docs/file-format/data-pages/encodings/#run-length-encoding--bit-packing-hybrid-rle--3 + private static final boolean ALWAYS_READ_LENGTH = true; + + public VectorizedRunLengthEncodedParquetValuesReader(boolean setArrowValidityVector) { + super( + BOOLEAN_BIT_WIDTH, + IRRELEVANT_MAX_DEFINITION_LEVEL, + ALWAYS_READ_LENGTH, + setArrowValidityVector); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public byte readByte() { + throw new UnsupportedOperationException("readByte is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public short readShort() { + throw new UnsupportedOperationException("readShort is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public long readLong() { + throw new UnsupportedOperationException("readLong is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public float readFloat() { + throw new UnsupportedOperationException("readFloat is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public double readDouble() { + throw new UnsupportedOperationException("readDouble is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public Binary readBinary(int len) { + throw new UnsupportedOperationException("readBinary is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public void readIntegers(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readIntegers is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public void readLongs(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readLongs is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public void readFloats(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readFloats is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public void readDoubles(int total, FieldVector vec, int rowId) { + throw new UnsupportedOperationException("readDoubles is not supported"); + } + + /** RLE only supports BOOLEAN as a data page encoding */ + @Override + public void readBinary(int total, FieldVector vec, int rowId, boolean setArrowValidityVector) { + throw new UnsupportedOperationException("readBinary is not supported"); + } +} diff --git a/parquet/src/testFixtures/resources/encodings/RLE/boolean.parquet b/parquet/src/testFixtures/resources/encodings/RLE/boolean.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bbc11933919f4f41bc4b92512d06dc78e6b1e5c5 GIT binary patch literal 521 zcmWG=3^EjD5S_vVL^`4u_(j=3P)350QG$^{j6nlT3kotYFfar01%AeQAXR@&Oxv@8 z#ZDqy+xnUB<-2DyS90Haq#2=M`8i;VRGg#rpS+u2qV8Q^%9|2&y;Cop>1~yAKubTD zV9buv^SxVM)TXV^nviku@@L)!_4>2DPb-U0>6s?~{Nl@&Z3mxp%|1QH#C7}D<{O{2 zy!^dh1*^?9OfzZS{iRgmr7`E$e5PPg7D*jb1_`#}+LY5C|q69laeL%N?E49o-xq(;c1D!Jfgr^Y;OY)m=>!%BGa=$2tDUkVBZ4wu ze4rMvJcEQnL1Iy1X=;gXazTM^Vo_0kxk6cLQE_H|o`R8)o`If$q>K#EpT3L?3<1DI F006r+la~Mh literal 0 HcmV?d00001 diff --git a/parquet/src/testFixtures/resources/encodings/RLE/boolean_with_nulls.parquet b/parquet/src/testFixtures/resources/encodings/RLE/boolean_with_nulls.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6eed9b602c68041b8070fa904a6930c5c7ae1ac3 GIT binary patch literal 623 zcmWG=3^EjD5WU3=L^`4u_(j=3P)350QG$^{Yyz_em?0?0z`#%s!uP7*@BL0bQA>&D zz2DP7PT0Lkv!SVdg<9_&uiIsFb*~h*E;6$8N{zX2eEX$cuj5v;=|nXj*V$3Z=w-vo z(S7%vTHEw>qD|b_D;fXlz1`X8EvJ=O^RB}8%Hd5Lb!K{1+-?7}PSJ0w?W9>9K9bw3 z`c{~`d}9t1Ws%e|WsqPiPR>ZpO%!Ef5M@whl3++V*A~g;h%m9=Gss@HF6N8N=o21$V<^w!H&w$il;8)`S(hQO^5 8 bytes) - Schema schema = - new Schema( - optional(102, "float_data", Types.FloatType.get()), - optional(103, "double_data", Types.DoubleType.get()), - optional(104, "decimal_data", Types.DecimalType.of(25, 5))); + Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); OutputFile outputFile = new InMemoryOutputFile(); Iterable data = @@ -311,26 +307,6 @@ public void testSupportedReadsForParquetV2() throws Exception { assertRecordsMatch(schema, 30000, data, outputFile.toInputFile(), false, BATCH_SIZE); } - @Test - public void testUnsupportedReadsForParquetV2() throws Exception { - // Longs, ints, string types etc use delta encoding and which are not supported for vectorized - // reads - Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - OutputFile outputFile = new InMemoryOutputFile(); - Iterable data = - generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); - try (FileAppender writer = getParquetV2Writer(schema, outputFile)) { - writer.addAll(data); - } - assertThatThrownBy( - () -> - assertRecordsMatch( - schema, 30000, data, outputFile.toInputFile(), false, BATCH_SIZE)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageStartingWith("Cannot support vectorized reads for column") - .hasMessageEndingWith("Disable vectorized reads to read this table/file"); - } - @Test public void testUuidReads() throws Exception { // Just one row to maintain dictionary encoding diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 9f3db5761242..0fc7c7d8d1e2 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -87,7 +87,8 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY", "DELTA_BYTE_ARRAY", - "BYTE_STREAM_SPLIT"); + "BYTE_STREAM_SPLIT", + "RLE"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), @@ -411,13 +412,7 @@ public void testSupportedReadsForParquetV2() throws Exception { // Float and double column types are written using plain encoding with Parquet V2, // also Parquet V2 will dictionary encode decimals that use fixed length binary // (i.e. decimals > 8 bytes). Int and long types use DELTA_BINARY_PACKED. - Schema schema = - new Schema( - optional(102, "float_data", Types.FloatType.get()), - optional(103, "double_data", Types.DoubleType.get()), - optional(104, "decimal_data", Types.DecimalType.of(25, 5)), - optional(105, "int_data", Types.IntegerType.get()), - optional(106, "long_data", Types.LongType.get())); + Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); @@ -429,23 +424,6 @@ public void testSupportedReadsForParquetV2() throws Exception { assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE); } - @Test - public void testUnsupportedReadsForParquetV2() throws Exception { - // Some types use delta encoding and which are not supported for vectorized reads - Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - File dataFile = File.createTempFile("junit", null, temp.toFile()); - assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); - Iterable data = - generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); - try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { - writer.addAll(data); - } - assertThatThrownBy(() -> assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageStartingWith("Cannot support vectorized reads for column") - .hasMessageEndingWith("Disable vectorized reads to read this table/file"); - } - @Test public void testUuidReads() throws Exception { // Just one row to maintain dictionary encoding diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 0a83616488be..165fbab60322 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -87,7 +87,8 @@ public class TestParquetVectorizedReads extends AvroDataTestBase { "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY", "DELTA_BYTE_ARRAY", - "BYTE_STREAM_SPLIT"); + "BYTE_STREAM_SPLIT", + "RLE"); private static final Map GOLDEN_FILE_TYPES = ImmutableMap.of( "string", Types.StringType.get(), @@ -411,13 +412,7 @@ public void testSupportedReadsForParquetV2() throws Exception { // Float and double column types are written using plain encoding with Parquet V2, // also Parquet V2 will dictionary encode decimals that use fixed length binary // (i.e. decimals > 8 bytes). Int and long types use DELTA_BINARY_PACKED. - Schema schema = - new Schema( - optional(102, "float_data", Types.FloatType.get()), - optional(103, "double_data", Types.DoubleType.get()), - optional(104, "decimal_data", Types.DecimalType.of(25, 5)), - optional(105, "int_data", Types.IntegerType.get()), - optional(106, "long_data", Types.LongType.get())); + Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); @@ -429,23 +424,6 @@ public void testSupportedReadsForParquetV2() throws Exception { assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE); } - @Test - public void testUnsupportedReadsForParquetV2() throws Exception { - // Some types use delta encoding and which are not supported for vectorized reads - Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - File dataFile = File.createTempFile("junit", null, temp.toFile()); - assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); - Iterable data = - generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); - try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { - writer.addAll(data); - } - assertThatThrownBy(() -> assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageStartingWith("Cannot support vectorized reads for column") - .hasMessageEndingWith("Disable vectorized reads to read this table/file"); - } - @Test public void testUuidReads() throws Exception { // Just one row to maintain dictionary encoding From 6e07e0c08b5261342563a0005bd44deb89960c8a Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Tue, 20 Jan 2026 22:09:15 -0500 Subject: [PATCH 46/47] Add test to verify fall through for reading non-boolean rle encoded data pages - https://github.com/apache/iceberg/pull/14853#discussion_r2699781090 --- .../TestParquetVectorizedReads.java | 34 +++++++++++++++++++ .../parquet/TestParquetVectorizedReads.java | 34 +++++++++++++++++++ .../parquet/TestParquetVectorizedReads.java | 34 +++++++++++++++++++ 3 files changed, 102 insertions(+) diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java index d7f7eedc7ff7..68aaf83ae0a7 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java @@ -20,15 +20,18 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.apache.parquet.schema.Types.primitive; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.assertj.core.api.Assumptions.assumeThat; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.Iterator; import java.util.function.Consumer; import org.apache.arrow.memory.BufferAllocator; import org.apache.avro.generic.GenericData; +import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator; import org.apache.iceberg.Schema; import org.apache.iceberg.arrow.ArrowAllocation; import org.apache.iceberg.inmemory.InMemoryOutputFile; @@ -48,9 +51,13 @@ import org.apache.iceberg.spark.data.vectorized.VectorizedSparkParquetReaders; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.Encoding; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; import org.apache.spark.sql.vectorized.ColumnarBatch; import org.junit.jupiter.api.Test; @@ -321,6 +328,33 @@ public void testUuidReads() throws Exception { assertRecordsMatch(schema, numRows, data, dataFile.toInputFile(), false, BATCH_SIZE); } + @Test + public void testRLEEncodingOnlySupportsBooleanDataPage() { + MessageType schema = + new MessageType( + "test", + primitive(PrimitiveTypeName.INT32, Type.Repetition.OPTIONAL).id(1).named("int_col")); + ColumnDescriptor intColumnDesc = schema.getColumnDescription(new String[] {"int_col"}); + ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.allocate(0)); + + String expectedMessage = + "Cannot support vectorized reads for column " + + intColumnDesc + + " with encoding " + + Encoding.RLE + + ". Disable vectorized reads to read this table/file"; + + assertThatThrownBy( + () -> + new VectorizedPageIterator(intColumnDesc, "parquet-mr", false) { + { + initDataReader(Encoding.RLE, stream, 0); + } + }) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage(expectedMessage); + } + protected void assertNoLeak(String testName, Consumer testFunction) { BufferAllocator allocator = ArrowAllocation.rootAllocator().newChildAllocator(testName, 0, Long.MAX_VALUE); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 0fc7c7d8d1e2..0271db9e4927 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -21,6 +21,7 @@ import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.apache.parquet.schema.Types.primitive; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.assertj.core.api.Assumptions.assumeThat; @@ -30,6 +31,7 @@ import java.io.InputStream; import java.net.URISyntaxException; import java.net.URL; +import java.nio.ByteBuffer; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Iterator; @@ -38,6 +40,7 @@ import java.util.function.Consumer; import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; +import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.arrow.ArrowAllocation; @@ -64,9 +67,13 @@ import org.apache.iceberg.types.Type.PrimitiveType; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.Encoding; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.vectorized.ColumnarBatch; @@ -439,6 +446,33 @@ public void testUuidReads() throws Exception { assertRecordsMatch(schema, numRows, data, dataFile, false, BATCH_SIZE); } + @Test + public void testRLEEncodingOnlySupportsBooleanDataPage() { + MessageType schema = + new MessageType( + "test", + primitive(PrimitiveTypeName.INT32, Type.Repetition.OPTIONAL).id(1).named("int_col")); + ColumnDescriptor intColumnDesc = schema.getColumnDescription(new String[] {"int_col"}); + ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.allocate(0)); + + String expectedMessage = + "Cannot support vectorized reads for column " + + intColumnDesc + + " with encoding " + + Encoding.RLE + + ". Disable vectorized reads to read this table/file"; + + assertThatThrownBy( + () -> + new VectorizedPageIterator(intColumnDesc, "parquet-mr", false) { + { + initDataReader(Encoding.RLE, stream, 0); + } + }) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage(expectedMessage); + } + protected void assertNoLeak(String testName, Consumer testFunction) { BufferAllocator allocator = ArrowAllocation.rootAllocator().newChildAllocator(testName, 0, Long.MAX_VALUE); diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 165fbab60322..a0cf877478cf 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -21,6 +21,7 @@ import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.apache.parquet.schema.Types.primitive; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.assertj.core.api.Assumptions.assumeThat; @@ -30,6 +31,7 @@ import java.io.InputStream; import java.net.URISyntaxException; import java.net.URL; +import java.nio.ByteBuffer; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Iterator; @@ -41,6 +43,7 @@ import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.arrow.ArrowAllocation; +import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator; import org.apache.iceberg.data.RandomGenericData; import org.apache.iceberg.data.Record; import org.apache.iceberg.data.parquet.GenericParquetReaders; @@ -64,9 +67,13 @@ import org.apache.iceberg.types.Type.PrimitiveType; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.Encoding; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.vectorized.ColumnarBatch; @@ -439,6 +446,33 @@ public void testUuidReads() throws Exception { assertRecordsMatch(schema, numRows, data, dataFile, false, BATCH_SIZE); } + @Test + public void testRLEEncodingOnlySupportsBooleanDataPage() { + MessageType schema = + new MessageType( + "test", + primitive(PrimitiveTypeName.INT32, Type.Repetition.OPTIONAL).id(1).named("int_col")); + ColumnDescriptor intColumnDesc = schema.getColumnDescription(new String[] {"int_col"}); + ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.allocate(0)); + + String expectedMessage = + "Cannot support vectorized reads for column " + + intColumnDesc + + " with encoding " + + Encoding.RLE + + ". Disable vectorized reads to read this table/file"; + + assertThatThrownBy( + () -> + new VectorizedPageIterator(intColumnDesc, "parquet-mr", false) { + { + initDataReader(Encoding.RLE, stream, 0); + } + }) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage(expectedMessage); + } + private void assertIdenticalFileContents( File actual, File expected, Schema schema, boolean vectorized) throws IOException { try (CloseableIterable expectedIterator = From b5d7cbbdb44bc3714a89615d46c70c9d0b44a4d1 Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Tue, 20 Jan 2026 22:16:15 -0500 Subject: [PATCH 47/47] Run spotless for Spark 3.5 & 3.4 --- .../data/parquet/vectorized/TestParquetVectorizedReads.java | 2 +- .../data/vectorized/parquet/TestParquetVectorizedReads.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java index 68aaf83ae0a7..d428fae70718 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java @@ -31,9 +31,9 @@ import java.util.function.Consumer; import org.apache.arrow.memory.BufferAllocator; import org.apache.avro.generic.GenericData; -import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator; import org.apache.iceberg.Schema; import org.apache.iceberg.arrow.ArrowAllocation; +import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator; import org.apache.iceberg.inmemory.InMemoryOutputFile; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java index 0271db9e4927..d0d0aabaf532 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java @@ -40,10 +40,10 @@ import java.util.function.Consumer; import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; -import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.arrow.ArrowAllocation; +import org.apache.iceberg.arrow.vectorized.parquet.VectorizedPageIterator; import org.apache.iceberg.data.RandomGenericData; import org.apache.iceberg.data.Record; import org.apache.iceberg.data.parquet.GenericParquetReaders;