From c8b646701047796e51858a497545228302b1fe53 Mon Sep 17 00:00:00 2001 From: siddharth Date: Wed, 6 Dec 2017 09:41:11 -0800 Subject: [PATCH 01/13] Cherry-pick ARROW-1877 --- .../arrow/vector/util/JsonStringArrayList.java | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java index 480bd76d445..b6db29a7fc6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java @@ -19,7 +19,6 @@ package org.apache.arrow.vector.util; import java.util.ArrayList; -import java.util.List; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -40,21 +39,6 @@ public JsonStringArrayList(int size) { super(size); } - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof List)) { - return false; - } - List other = (List) obj; - return this.size() == other.size() && this.containsAll(other); - } - @Override public final String toString() { try { From 41b11bfacad92a5c7117ddf62dd94f8aa8426e6a Mon Sep 17 00:00:00 2001 From: siddharth Date: Thu, 7 Dec 2017 13:57:30 -0800 Subject: [PATCH 02/13] Add Decimal Vector APIs to write Big Endian data --- .../templates/AbstractFieldWriter.java | 4 ++ .../AbstractPromotableFieldWriter.java | 8 ++++ .../codegen/templates/ComplexWriters.java | 7 +++ .../arrow/vector/NullableDecimalVector.java | 48 +++++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java index 853f67fd0dd..fce6876025a 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -67,6 +67,10 @@ public void write(${name}Holder holder) { public void write${minor.class}(${friendlyType} value) { fail("${name}"); } + + public void writeBigEndianBytesToDecimal(byte[] value) { + fail("${name}"); + } diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 228c2c531f9..7f4a13d4f06 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -16,6 +16,8 @@ * limitations under the License. */ +import io.netty.buffer.ArrowBuf; +import org.apache.arrow.vector.types.Types; import org.apache.drill.common.types.TypeProtos.MinorType; <@pp.dropOutputFile /> @@ -82,6 +84,12 @@ public void write(${name}Holder holder) { getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); } + <#if minor.class == "Decimal"> + public void writeBigEndianBytesToDecimal(byte[] value) { + getWriter(Types.MinorType.DECIMAL).writeBigEndianBytesToDecimal(value); + } + + public void writeNull() { } diff --git a/java/vector/src/main/codegen/templates/ComplexWriters.java b/java/vector/src/main/codegen/templates/ComplexWriters.java index 406bbb39c7f..8cad12ac318 100644 --- a/java/vector/src/main/codegen/templates/ComplexWriters.java +++ b/java/vector/src/main/codegen/templates/ComplexWriters.java @@ -120,6 +120,11 @@ public void write(Nullable${minor.class}Holder h) { vector.setSafe(idx(), value); vector.setValueCount(idx()+1); } + + public void writeBigEndianBytesToDecimal(byte[] value) { + vector.setBigEndianSafe(idx(), value); + vector.setValueCount(idx()+1); + } <#if mode == "Nullable"> @@ -148,6 +153,8 @@ public interface ${eName}Writer extends BaseWriter { <#if minor.class == "Decimal"> public void write${minor.class}(${friendlyType} value); + + public void writeBigEndianBytesToDecimal(byte[] value); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java index dcc551094ae..0d0a7c0ec13 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector; +import com.google.common.base.Preconditions; import io.netty.buffer.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.impl.DecimalReaderImpl; @@ -199,6 +200,43 @@ public void set(int index, ArrowBuf buffer) { valueBuffer.setBytes(index * TYPE_WIDTH, buffer, 0, TYPE_WIDTH); } + /** + * Set the decimal element at given index to the provided array of bytes. + * Decimal is now implemented as Little Endian. This API allows the user + * to pass a decimal value in the form of byte array in BE byte order. + * + * Consumers of Arrow code can use this API instead of first swapping + * the source bytes (doing a write and read) and then finally writing to + * ArrowBuf of decimal vector. + * + * This method takes care of adding the necessary padding if the length + * of byte array is less then 16 (length of decimal type). + * + * @param index position of element + * @param value array of bytes containing decimal in big endian byte order. + */ + public void setBigEndian(int index, byte[] value) { + assert value.length <= TYPE_WIDTH; + BitVectorHelper.setValidityBitToOne(validityBuffer, index); + final int length = value.length; + int startIndex = index * TYPE_WIDTH; + if (length == TYPE_WIDTH) { + for (int i = TYPE_WIDTH - 1; i >= 3; i-=4) { + valueBuffer.setByte(startIndex, value[i]); + valueBuffer.setByte(startIndex + 1, value[i-1]); + valueBuffer.setByte(startIndex + 2, value[i-2]); + valueBuffer.setByte(startIndex + 3, value[i-3]); + startIndex += 4; + } + } else { + for (int i = length - 1; i >= 0; i--) { + valueBuffer.setByte(startIndex, value[i]); + startIndex++; + } + valueBuffer.setZero(startIndex, TYPE_WIDTH - length); + } + } + /** * Set the element at the given index to the given value. * @@ -266,6 +304,16 @@ public void setSafe(int index, ArrowBuf buffer) { set(index, buffer); } + /** + * Same as {@link #setBigEndian(int, byte[])} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + */ + public void setBigEndianSafe(int index, byte[] value) { + handleSafe(index); + setBigEndian(index, value); + } + /** * Same as {@link #set(int, int, ArrowBuf)} except that it handles the * case when index is greater than or equal to existing From 9d84f84543cf51d362293a7bd11094ebc8b8ee68 Mon Sep 17 00:00:00 2001 From: siddharth Date: Fri, 22 Dec 2017 11:24:43 -0800 Subject: [PATCH 03/13] ARROW-1943: [JAVA] handle setInitialCapacity for deeply nested lists The current implementation of setInitialCapacity() uses a factor of 5 for every level we go into list: So if the schema is LIST (LIST (LIST (LIST (LIST (LIST (LIST (BIGINT)))))) and we start with an initial capacity of 128, we end up throwing OversizedAllocationException from the BigIntVector because at every level we increased the capacity by 5 and by the time we reached inner scalar that actually stores the data, we were well over max size limit per vector (1MB). We saw this problem downstream when we failed to read deeply nested JSON data. The potential fix is to use the factor of 5 only when we are down to the leaf vector. As the depth increases and we are still working with complex/list, we don't use the factor of 5. cc @jacques-n , @BryanCutler , @icexelloss Author: siddharth Closes #1439 from siddharthteotia/ARROW-1943 and squashes the following commits: d0adbade [siddharth] unit tests e2f21a8b [siddharth] fix imports d103436b [siddharth] ARROW-1943: handle setInitialCapacity for deeply nested lists --- .../complex/BaseRepeatedValueVector.java | 8 +- .../apache/arrow/vector/TestListVector.java | 128 ++++++++++++++++++ 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 4648d078949..a909828602a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -29,6 +29,8 @@ import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.BaseNullableVariableWidthVector; +import org.apache.arrow.vector.BaseNullableFixedWidthVector; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; @@ -134,7 +136,11 @@ public FieldVector getDataVector() { @Override public void setInitialCapacity(int numRecords) { offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH; - vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + if (vector instanceof BaseNullableFixedWidthVector || vector instanceof BaseNullableVariableWidthVector) { + vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + } else { + vector.setInitialCapacity(numRecords); + } } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index f6aa86a3008..ce9fb444255 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -559,6 +559,134 @@ public void testNestedListVector() throws Exception { } } + @Test + public void testNestedListVector1() throws Exception { + try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { + + MinorType listType = MinorType.LIST; + MinorType scalarType = MinorType.BIGINT; + + listVector.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList1 = (ListVector)listVector.getDataVector(); + innerList1.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList2 = (ListVector)innerList1.getDataVector(); + innerList2.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList3 = (ListVector)innerList2.getDataVector(); + innerList3.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList4 = (ListVector)innerList3.getDataVector(); + innerList4.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList5 = (ListVector)innerList4.getDataVector(); + innerList5.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList6 = (ListVector)innerList5.getDataVector(); + innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); + + listVector.setInitialCapacity(128); + } + } + + @Test + public void testNestedListVector2() throws Exception { + try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { + listVector.setInitialCapacity(1); + UnionListWriter listWriter = listVector.getWriter(); + /* allocate memory */ + listWriter.allocate(); + + /* write one or more inner lists at index 0 */ + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(50); + listWriter.list().bigInt().writeBigInt(100); + listWriter.list().bigInt().writeBigInt(200); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(75); + listWriter.list().bigInt().writeBigInt(125); + listWriter.list().endList(); + + listWriter.endList(); + + /* write one or more inner lists at index 1 */ + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(15); + listWriter.list().bigInt().writeBigInt(20); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(25); + listWriter.list().bigInt().writeBigInt(30); + listWriter.list().bigInt().writeBigInt(35); + listWriter.list().endList(); + + listWriter.endList(); + + assertEquals(2, listVector.getLastSet()); + + listVector.setValueCount(2); + + assertEquals(2, listVector.getValueCount()); + + /* get listVector value at index 0 -- the value itself is a listvector */ + Object result = listVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(50), list.get(0)); + assertEquals(new Long(100), list.get(1)); + assertEquals(new Long(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(new Long(75), list.get(0)); + assertEquals(new Long(125), list.get(1)); + + /* get listVector value at index 1 -- the value itself is a listvector */ + result = listVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(2, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(3, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(15), list.get(0)); + assertEquals(new Long(20), list.get(1)); + + list = resultSet.get(1); + assertEquals(new Long(25), list.get(0)); + assertEquals(new Long(30), list.get(1)); + assertEquals(new Long(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listVector.isNull(0)); + assertFalse(listVector.isNull(1)); + + /* check underlying offsets */ + final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + /* listVector has 2 lists at index 0 and 3 lists at index 1 */ + assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH)); + } + } + @Test public void testGetBufferAddress() throws Exception { try (ListVector listVector = ListVector.empty("vector", allocator)) { From 707af38787c24ef2403ae2c3f7f6aa5422bdf1b2 Mon Sep 17 00:00:00 2001 From: Shixiong Zhu Date: Thu, 7 Dec 2017 20:20:07 -0500 Subject: [PATCH 04/13] ARROW-1864: [Java] Upgrade Netty to 4.1.17 Upgrade Netty to 4.1.17 since the Netty community will deprecate 4.0.x soon. This PR includes the following changes: - Bump Netty version. - Implement new ByteBuf APIs added in Netty 4.1.x: a bunch of get/setXXXLE methods. They are the opposite of get/setXXX method regarding byte order. E.g., as ArrowBuf is little endian, `setInt` will put an `int` to the buffer in little endian byte order, while `setIntLE` will put `int` in big byte endian order. The method naming seems confusing anyway, and I opened a Netty issue: https://github.com/netty/netty/issues/7465. The user can call these new methods to get or set multi-byte integers in big endian byte order. - Make ArrowByteBufAllocator overwrite AbstractByteBufAllocator. Author: Shixiong Zhu Closes #1376 from zsxwing/ARROW-1864 and squashes the following commits: 96a93e18 [Shixiong Zhu] extend AbstractByteBufAllocator; add javadoc for new methods bb973335 [Shixiong Zhu] Add comment for calculateNewCapacity 555f88ae [Shixiong Zhu] Add methods back 5e09cca6 [Shixiong Zhu] Upgrade Netty to 4.1.x --- .../main/java/io/netty/buffer/ArrowBuf.java | 199 +++++++++++++++++- .../netty/buffer/MutableWrappedByteBuf.java | 116 +++++++++- .../arrow/memory/ArrowByteBufAllocator.java | 15 +- java/pom.xml | 2 +- .../arrow/vector/util/MapWithOrdinal.java | 4 +- 5 files changed, 320 insertions(+), 16 deletions(-) diff --git a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java index e2bbe35480b..23f5d65fbb5 100644 --- a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java +++ b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java @@ -23,6 +23,7 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.FileChannel; import java.nio.channels.GatheringByteChannel; import java.nio.channels.ScatteringByteChannel; import java.nio.charset.Charset; @@ -493,6 +494,16 @@ public ArrowBuf retain() { return retain(1); } + @Override + public ByteBuf touch() { + return this; + } + + @Override + public ByteBuf touch(Object hint) { + return this; + } + @Override public long getLong(int index) { chk(index, 8); @@ -505,6 +516,17 @@ public float getFloat(int index) { return Float.intBitsToFloat(getInt(index)); } + /** + * Gets a 64-bit long integer at the specified absolute {@code index} in + * this buffer in Big Endian Byte Order. + */ + @Override + public long getLongLE(int index) { + chk(index, 8); + final long v = PlatformDependent.getLong(addr(index)); + return Long.reverseBytes(v); + } + @Override public double getDouble(int index) { return Double.longBitsToDouble(getLong(index)); @@ -527,6 +549,17 @@ public int getInt(int index) { return v; } + /** + * Gets a 32-bit integer at the specified absolute {@code index} in + * this buffer in Big Endian Byte Order. + */ + @Override + public int getIntLE(int index) { + chk(index, 4); + final int v = PlatformDependent.getInt(addr(index)); + return Integer.reverseBytes(v); + } + @Override public int getUnsignedShort(int index) { return getShort(index) & 0xFFFF; @@ -535,10 +568,44 @@ public int getUnsignedShort(int index) { @Override public short getShort(int index) { chk(index, 2); - short v = PlatformDependent.getShort(addr(index)); + final short v = PlatformDependent.getShort(addr(index)); return v; } + /** + * Gets a 16-bit short integer at the specified absolute {@code index} in + * this buffer in Big Endian Byte Order. + */ + @Override + public short getShortLE(int index) { + final short v = PlatformDependent.getShort(addr(index)); + return Short.reverseBytes(v); + } + + /** + * Gets an unsigned 24-bit medium integer at the specified absolute + * {@code index} in this buffer. + */ + @Override + public int getUnsignedMedium(int index) { + chk(index, 3); + final long addr = addr(index); + return (PlatformDependent.getByte(addr) & 0xff) << 16 | + (PlatformDependent.getShort(addr + 1) & 0xffff); + } + + /** + * Gets an unsigned 24-bit medium integer at the specified absolute {@code index} in + * this buffer in Big Endian Byte Order. + */ + @Override + public int getUnsignedMediumLE(int index) { + chk(index, 3); + final long addr = addr(index); + return (PlatformDependent.getByte(addr) & 0xff) | + (Short.reverseBytes(PlatformDependent.getShort(addr + 1)) & 0xffff) << 8; + } + @Override public ArrowBuf setShort(int index, int value) { chk(index, 2); @@ -546,6 +613,44 @@ public ArrowBuf setShort(int index, int value) { return this; } + /** + * Sets the specified 16-bit short integer at the specified absolute {@code index} + * in this buffer with Big Endian byte order. + */ + @Override + public ByteBuf setShortLE(int index, int value) { + chk(index, 2); + PlatformDependent.putShort(addr(index), Short.reverseBytes((short) value)); + return this; + } + + /** + * Sets the specified 24-bit medium integer at the specified absolute + * {@code index} in this buffer. + */ + @Override + public ByteBuf setMedium(int index, int value) { + chk(index, 3); + final long addr = addr(index); + PlatformDependent.putByte(addr, (byte) (value >>> 16)); + PlatformDependent.putShort(addr + 1, (short) value); + return this; + } + + + /** + * Sets the specified 24-bit medium integer at the specified absolute {@code index} + * in this buffer with Big Endian byte order. + */ + @Override + public ByteBuf setMediumLE(int index, int value) { + chk(index, 3); + final long addr = addr(index); + PlatformDependent.putByte(addr, (byte) value); + PlatformDependent.putShort(addr + 1, Short.reverseBytes((short) (value >>> 8))); + return this; + } + @Override public ArrowBuf setInt(int index, int value) { chk(index, 4); @@ -553,6 +658,17 @@ public ArrowBuf setInt(int index, int value) { return this; } + /** + * Sets the specified 32-bit integer at the specified absolute {@code index} + * in this buffer with Big Endian byte order. + */ + @Override + public ByteBuf setIntLE(int index, int value) { + chk(index, 4); + PlatformDependent.putInt(addr(index), Integer.reverseBytes(value)); + return this; + } + @Override public ArrowBuf setLong(int index, long value) { chk(index, 8); @@ -560,6 +676,17 @@ public ArrowBuf setLong(int index, long value) { return this; } + /** + * Sets the specified 64-bit long integer at the specified absolute {@code index} + * in this buffer with Big Endian byte order. + */ + @Override + public ByteBuf setLongLE(int index, long value) { + chk(index, 8); + PlatformDependent.putLong(addr(index), Long.reverseBytes(value)); + return this; + } + @Override public ArrowBuf setChar(int index, int value) { chk(index, 2); @@ -668,16 +795,46 @@ protected short _getShort(int index) { return getShort(index); } + /** @see {@link #getShortLE(int)} */ + @Override + protected short _getShortLE(int index) { + return getShortLE(index); + } + @Override protected int _getInt(int index) { return getInt(index); } + /** @see {@link #getIntLE(int)} */ + @Override + protected int _getIntLE(int index) { + return getIntLE(index); + } + + /** @see {@link #getUnsignedMedium(int)} */ + @Override + protected int _getUnsignedMedium(int index) { + return getUnsignedMedium(index); + } + + /** @see {@link #getUnsignedMediumLE(int)} */ + @Override + protected int _getUnsignedMediumLE(int index) { + return getUnsignedMediumLE(index); + } + @Override protected long _getLong(int index) { return getLong(index); } + /** @see {@link #getLongLE(int)} */ + @Override + protected long _getLongLE(int index) { + return getLongLE(index); + } + @Override protected void _setByte(int index, int value) { setByte(index, value); @@ -688,21 +845,45 @@ protected void _setShort(int index, int value) { setShort(index, value); } + /** @see {@link #setShortLE(int, int)} */ + @Override + protected void _setShortLE(int index, int value) { + setShortLE(index, value); + } + @Override protected void _setMedium(int index, int value) { setMedium(index, value); } + /** @see {@link #setMediumLE(int, int)} */ + @Override + protected void _setMediumLE(int index, int value) { + setMediumLE(index, value); + } + @Override protected void _setInt(int index, int value) { setInt(index, value); } + /** @see {@link #setIntLE(int, int)} */ + @Override + protected void _setIntLE(int index, int value) { + setIntLE(index, value); + } + @Override protected void _setLong(int index, long value) { setLong(index, value); } + /** @see {@link #setLongLE(int, long)} */ + @Override + public void _setLongLE(int index, long value) { + setLongLE(index, value); + } + @Override public ArrowBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { udle.getBytes(index + offset, dst, dstIndex, length); @@ -716,16 +897,13 @@ public ArrowBuf getBytes(int index, OutputStream out, int length) throws IOExcep } @Override - protected int _getUnsignedMedium(int index) { - final long addr = addr(index); - return (PlatformDependent.getByte(addr) & 0xff) << 16 | - (PlatformDependent.getByte(addr + 1) & 0xff) << 8 | - PlatformDependent.getByte(addr + 2) & 0xff; + public int getBytes(int index, GatheringByteChannel out, int length) throws IOException { + return udle.getBytes(index + offset, out, length); } @Override - public int getBytes(int index, GatheringByteChannel out, int length) throws IOException { - return udle.getBytes(index + offset, out, length); + public int getBytes(int index, FileChannel out, long position, int length) throws IOException { + return udle.getBytes(index + offset, out, position, length); } @Override @@ -776,6 +954,11 @@ public int setBytes(int index, ScatteringByteChannel in, int length) throws IOEx return udle.setBytes(index + offset, in, length); } + @Override + public int setBytes(int index, FileChannel in, long position, int length) throws IOException { + return udle.setBytes(index + offset, in, position, length); + } + @Override public byte getByte(int index) { chk(index, 1); diff --git a/java/memory/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java b/java/memory/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java index a5683adccbc..f0bc84cdc2d 100644 --- a/java/memory/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java +++ b/java/memory/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java @@ -23,9 +23,12 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.FileChannel; import java.nio.channels.GatheringByteChannel; import java.nio.channels.ScatteringByteChannel; +import io.netty.util.ByteProcessor; + /** * This is basically a complete copy of DuplicatedByteBuf. We copy because we want to override * some behaviors and make @@ -128,6 +131,16 @@ protected short _getShort(int index) { return buffer.getShort(index); } + @Override + public short getShortLE(int index) { + return buffer.getShortLE(index); + } + + @Override + protected short _getShortLE(int index) { + return buffer.getShortLE(index); + } + @Override public int getUnsignedMedium(int index) { return _getUnsignedMedium(index); @@ -138,6 +151,16 @@ protected int _getUnsignedMedium(int index) { return buffer.getUnsignedMedium(index); } + @Override + public int getUnsignedMediumLE(int index) { + return buffer.getUnsignedMediumLE(index); + } + + @Override + protected int _getUnsignedMediumLE(int index) { + return buffer.getUnsignedMediumLE(index); + } + @Override public int getInt(int index) { return _getInt(index); @@ -148,6 +171,16 @@ protected int _getInt(int index) { return buffer.getInt(index); } + @Override + public int getIntLE(int index) { + return buffer.getIntLE(index); + } + + @Override + protected int _getIntLE(int index) { + return buffer.getIntLE(index); + } + @Override public long getLong(int index) { return _getLong(index); @@ -158,6 +191,16 @@ protected long _getLong(int index) { return buffer.getLong(index); } + @Override + public long getLongLE(int index) { + return buffer.getLongLE(index); + } + + @Override + protected long _getLongLE(int index) { + return buffer.getLongLE(index); + } + @Override public abstract ByteBuf copy(int index, int length); @@ -206,6 +249,17 @@ protected void _setShort(int index, int value) { buffer.setShort(index, value); } + @Override + public ByteBuf setShortLE(int index, int value) { + buffer.setShortLE(index, value); + return this; + } + + @Override + protected void _setShortLE(int index, int value) { + buffer.setShortLE(index, value); + } + @Override public ByteBuf setMedium(int index, int value) { _setMedium(index, value); @@ -217,6 +271,17 @@ protected void _setMedium(int index, int value) { buffer.setMedium(index, value); } + @Override + public ByteBuf setMediumLE(int index, int value) { + buffer.setMediumLE(index, value); + return this; + } + + @Override + protected void _setMediumLE(int index, int value) { + buffer.setMediumLE(index, value); + } + @Override public ByteBuf setInt(int index, int value) { _setInt(index, value); @@ -228,6 +293,17 @@ protected void _setInt(int index, int value) { buffer.setInt(index, value); } + @Override + public ByteBuf setIntLE(int index, int value) { + buffer.setIntLE(index, value); + return this; + } + + @Override + protected void _setIntLE(int index, int value) { + buffer.setIntLE(index, value); + } + @Override public ByteBuf setLong(int index, long value) { _setLong(index, value); @@ -239,6 +315,17 @@ protected void _setLong(int index, long value) { buffer.setLong(index, value); } + @Override + public ByteBuf setLongLE(int index, long value) { + buffer.setLongLE(index, value); + return this; + } + + @Override + protected void _setLongLE(int index, long value) { + buffer.setLongLE(index, value); + } + @Override public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) { buffer.setBytes(index, src, srcIndex, length); @@ -257,6 +344,12 @@ public ByteBuf setBytes(int index, ByteBuffer src) { return this; } + @Override + public int setBytes(int index, FileChannel in, long position, int length) + throws IOException { + return buffer.setBytes(index, in, position, length); + } + @Override public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { @@ -282,6 +375,13 @@ public int setBytes(int index, ScatteringByteChannel in, int length) return buffer.setBytes(index, in, length); } + + @Override + public int getBytes(int index, FileChannel out, long position, int length) + throws IOException { + return buffer.getBytes(index, out, position, length); + } + @Override public int nioBufferCount() { return buffer.nioBufferCount(); @@ -298,12 +398,12 @@ public ByteBuffer internalNioBuffer(int index, int length) { } @Override - public int forEachByte(int index, int length, ByteBufProcessor processor) { + public int forEachByte(int index, int length, ByteProcessor processor) { return buffer.forEachByte(index, length, processor); } @Override - public int forEachByteDesc(int index, int length, ByteBufProcessor processor) { + public int forEachByteDesc(int index, int length, ByteProcessor processor) { return buffer.forEachByteDesc(index, length, processor); } @@ -312,6 +412,18 @@ public final int refCnt() { return unwrap().refCnt(); } + @Override + public final ByteBuf touch() { + unwrap().touch(); + return this; + } + + @Override + public final ByteBuf touch(Object hint) { + unwrap().touch(hint); + return this; + } + @Override public final ByteBuf retain() { unwrap().retain(); diff --git a/java/memory/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java b/java/memory/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java index b8b5283423c..94102992139 100644 --- a/java/memory/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java +++ b/java/memory/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java @@ -18,8 +18,8 @@ package org.apache.arrow.memory; +import io.netty.buffer.AbstractByteBufAllocator; import io.netty.buffer.ByteBuf; -import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.CompositeByteBuf; import io.netty.buffer.ExpandableByteBuf; @@ -32,7 +32,7 @@ * otherwise non-expandable * ArrowBufs to be expandable. */ -public class ArrowByteBufAllocator implements ByteBufAllocator { +public class ArrowByteBufAllocator extends AbstractByteBufAllocator { private static final int DEFAULT_BUFFER_SIZE = 4096; private static final int DEFAULT_MAX_COMPOSITE_COMPONENTS = 16; @@ -142,8 +142,17 @@ public CompositeByteBuf compositeHeapBuffer(int maxNumComponents) { throw fail(); } + @Override + protected ByteBuf newHeapBuffer(int initialCapacity, int maxCapacity) { + throw fail(); + } + + @Override + protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) { + return buffer(initialCapacity, maxCapacity); + } + private RuntimeException fail() { throw new UnsupportedOperationException("Allocator doesn't support heap-based memory."); } - } diff --git a/java/pom.xml b/java/pom.xml index 0a0f2e0ce8f..384ef56882f 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -32,7 +32,7 @@ 4.11 1.7.25 18.0 - 4.0.49.Final + 4.1.17.Final 2.7.9 2.7.1 1.2.0-3f79e055 diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java index 6d3b390379a..b863fa8af86 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java @@ -134,9 +134,9 @@ public Set keySet() { @Override public Collection values() { - return Lists.newArrayList(Iterables.transform(secondary.entries(), new Function, V>() { + return Lists.newArrayList(Iterables.transform(secondary.entries(), new Function, V>() { @Override - public V apply(IntObjectMap.Entry entry) { + public V apply(IntObjectMap.PrimitiveEntry entry) { return Preconditions.checkNotNull(entry).value(); } })); From 4a661ed7f5a4489c9adc40cb7dbc39dff291a14d Mon Sep 17 00:00:00 2001 From: siddharth Date: Mon, 15 Jan 2018 13:52:39 -0800 Subject: [PATCH 05/13] Add getReservation to Accountant --- .../main/java/org/apache/arrow/memory/Accountant.java | 9 +++++++++ .../java/org/apache/arrow/memory/BufferAllocator.java | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/java/memory/src/main/java/org/apache/arrow/memory/Accountant.java b/java/memory/src/main/java/org/apache/arrow/memory/Accountant.java index 5bd6b9fe379..ed2184aadab 100644 --- a/java/memory/src/main/java/org/apache/arrow/memory/Accountant.java +++ b/java/memory/src/main/java/org/apache/arrow/memory/Accountant.java @@ -219,6 +219,15 @@ public long getLimit() { return allocationLimit.get(); } + /** + * Return the initial reservation. + * + * @return reservation in bytes. + */ + public long getInitReservation() { + return reservation; + } + /** * Set the maximum amount of memory that can be allocated in the this Accountant before failing * an allocation. diff --git a/java/memory/src/main/java/org/apache/arrow/memory/BufferAllocator.java b/java/memory/src/main/java/org/apache/arrow/memory/BufferAllocator.java index b23a6e4bd85..a5da50e6278 100644 --- a/java/memory/src/main/java/org/apache/arrow/memory/BufferAllocator.java +++ b/java/memory/src/main/java/org/apache/arrow/memory/BufferAllocator.java @@ -91,6 +91,13 @@ public interface BufferAllocator extends AutoCloseable { */ public long getLimit(); + /** + * Return the initial reservation. + * + * @return reservation in bytes. + */ + public long getInitReservation(); + /** * Set the maximum amount of memory this allocator is allowed to allocate. * From 9e828eefbf7bc22fd4aa669a2c02c1870f7cbd75 Mon Sep 17 00:00:00 2001 From: siddharth Date: Tue, 23 Jan 2018 15:13:13 -0800 Subject: [PATCH 06/13] ARROW-2019: [JAVA] Control the memory allocated for inner vector in LIST --- .../BaseNullableVariableWidthVector.java | 33 ++++++++++ .../complex/BaseRepeatedValueVector.java | 32 ++++++++++ .../arrow/vector/complex/ListVector.java | 60 ++++++++++++++++--- .../apache/arrow/vector/TestListVector.java | 59 ++++++++++++++++++ .../apache/arrow/vector/TestValueVector.java | 26 ++++++++ .../arrow/vector/TestVectorReAlloc.java | 6 +- 6 files changed, 206 insertions(+), 10 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index edf4987de57..fee573ccd08 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -181,6 +181,39 @@ public void setInitialCapacity(int valueCount) { offsetAllocationSizeInBytes = (valueCount + 1) * OFFSET_WIDTH; } + /** + * Sets the desired value capacity for the vector. This function doesn't + * allocate any memory for the vector. + * @param valueCount desired number of elements in the vector + * @param density average number of bytes per variable width element + */ + public void setInitialCapacity(int valueCount, double density) { + final long size = (long) (valueCount * density); + if (size > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + valueAllocationSizeInBytes = (int) size; + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount); + /* to track the end offset of last data element in vector, we need + * an additional slot in offset buffer. + */ + offsetAllocationSizeInBytes = (valueCount + 1) * OFFSET_WIDTH; + } + + /** + * Get the density of this ListVector + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final int startOffset = offsetBuffer.getInt(0); + final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH); + final double totalListSize = endOffset - startOffset; + return totalListSize/valueCount; + } + /** * Get the current value capacity for the vector * @return number of elements that vector can hold. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index a909828602a..c159d742701 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -143,6 +143,38 @@ public void setInitialCapacity(int numRecords) { } } + /** + * Specialized version of setInitialCapacity() for ListVector. This is + * used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. This is + * very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. In + * such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListVector. Density is the average size of + * list per position in the List vector. For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1 and + * remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ + public void setInitialCapacity(int numRecords, double density) { + if ((numRecords * density) >= 2_000_000_000) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH; + final int innerValueCapacity = (int)(numRecords * density); + vector.setInitialCapacity(innerValueCapacity); + } + @Override public int getValueCapacity() { final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index afe86a692c3..8e96f6cbc06 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -31,12 +31,7 @@ import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.*; import org.apache.arrow.vector.complex.impl.ComplexCopier; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListWriter; @@ -102,6 +97,57 @@ public void initializeChildrenFromFields(List children) { addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); } + @Override + public void setInitialCapacity(int numRecords) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords); + } + + /** + * Specialized version of setInitialCapacity() for ListVector. This is + * used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. This is + * very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. In + * such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListVector. Density is the average size of + * list per position in the List vector. For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1 and + * remaining positions are null (no lists). This helps + * in tightly controlling the memory we provision for + * inner data vector. + * remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords, density); + } + + /** + * Get the density of this ListVector + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final int startOffset = offsetBuffer.getInt(0); + final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH); + final double totalListSize = endOffset - startOffset; + return totalListSize/valueCount; + } + @Override public List getChildrenFromFields() { return singletonList(getDataVector()); @@ -616,7 +662,7 @@ public int getNullCount() { */ @Override public int getValueCapacity() { - return Math.min(getValidityBufferValueCapacity(), super.getValueCapacity()); + return getValidityAndOffsetValueCapacity(); } private int getValidityAndOffsetValueCapacity() { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index ce9fb444255..20c63a1b2dc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -112,6 +112,9 @@ public void testCopyFrom() throws Exception { result = outVector.getObject(2); resultSet = (ArrayList) result; assertEquals(0, resultSet.size()); + + /* 3+0+0/3 */ + assertEquals(1.0D, inVector.getDensity(), 0); } } @@ -209,6 +212,9 @@ public void testSetLastSetUsage() throws Exception { listVector.setLastSet(3); listVector.setValueCount(10); + /* (3+2+3)/10 */ + assertEquals(0.8D, listVector.getDensity(), 0); + index = 0; offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); assertEquals(Integer.toString(0), Integer.toString(offset)); @@ -709,6 +715,8 @@ public void testGetBufferAddress() throws Exception { listWriter.bigInt().writeBigInt(300); listWriter.endList(); + listVector.setValueCount(2); + /* check listVector contents */ Object result = listVector.getObject(0); ArrayList resultSet = (ArrayList) result; @@ -739,6 +747,9 @@ public void testGetBufferAddress() throws Exception { assertEquals(2, buffers.size()); assertEquals(bitAddress, buffers.get(0).memoryAddress()); assertEquals(offsetAddress, buffers.get(1).memoryAddress()); + + /* (3+2)/2 */ + assertEquals(2.5, listVector.getDensity(), 0); } } @@ -753,4 +764,52 @@ public void testConsistentChildName() throws Exception { assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); } } + + @Test + public void testSetInitialCapacity() { + try (final ListVector vector = ListVector.empty("", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + /** + * use the default multiplier of 5, + * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity. + */ + vector.setInitialCapacity(512); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertEquals(4096, vector.getDataVector().getValueCapacity()); + + /* use density as 4 */ + vector.setInitialCapacity(512, 4); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertEquals(512*4, vector.getDataVector().getValueCapacity()); + + /** + * inner value capacity we pass to data vector is 512 * 0.1 => 51 + * For an int vector this is 204 bytes of memory for data buffer + * and 7 bytes for validity buffer. + * and with power of 2 allocation, we allocate 256 bytes and 8 bytes + * for the data buffer and validity buffer of the inner vector. Thus + * value capacity of inner vector is 64 + */ + vector.setInitialCapacity(512, 0.1); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertEquals(64, vector.getDataVector().getValueCapacity()); + + /** + * inner value capacity we pass to data vector is 512 * 0.01 => 5 + * For an int vector this is 20 bytes of memory for data buffer + * and 1 byte for validity buffer. + * and with power of 2 allocation, we allocate 32 bytes and 1 bytes + * for the data buffer and validity buffer of the inner vector. Thus + * value capacity of inner vector is 8 + */ + vector.setInitialCapacity(512, 0.01); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertEquals(8, vector.getDataVector().getValueCapacity()); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index c7ee202f946..be2946262a4 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -1925,4 +1925,30 @@ public static void setBytes(int index, byte[] bytes, NullableVarCharVector vecto vector.offsetBuffer.setInt((index + 1) * vector.OFFSET_WIDTH, currentOffset + bytes.length); vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length); } + + @Test /* VarCharVector */ + public void testSetInitialCapacity() { + try (final NullableVarCharVector vector = new NullableVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + /* use the default 8 data bytes on average per element */ + vector.setInitialCapacity(4096); + vector.allocateNew(); + assertEquals(4096, vector.getValueCapacity()); + assertEquals(4096 * 8, vector.getDataBuffer().capacity()); + + vector.setInitialCapacity(4096, 1); + vector.allocateNew(); + assertEquals(4096, vector.getValueCapacity()); + assertEquals(4096, vector.getDataBuffer().capacity()); + + vector.setInitialCapacity(4096, 0.1); + vector.allocateNew(); + assertEquals(4096, vector.getValueCapacity()); + assertEquals(512, vector.getDataBuffer().capacity()); + + vector.setInitialCapacity(4096, 0.01); + vector.allocateNew(); + assertEquals(4096, vector.getValueCapacity()); + assertEquals(64, vector.getDataBuffer().capacity()); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index 293ffbfe192..3807d978aef 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -105,17 +105,17 @@ public void testListType() { vector.setInitialCapacity(512); vector.allocateNew(); - assertEquals(1023, vector.getValueCapacity()); + assertEquals(512, vector.getValueCapacity()); try { - vector.getOffsetVector().getAccessor().get(2014); + vector.getOffsetBuffer().getInt(2014 * 4); Assert.fail("Expected out of bounds exception"); } catch (Exception e) { // ok } vector.reAlloc(); - assertEquals(2047, vector.getValueCapacity()); // note: size - 1 + assertEquals(1024, vector.getValueCapacity()); assertEquals(0, vector.getOffsetBuffer().getInt(2014 * ListVector.OFFSET_WIDTH)); } } From 182e67ae3b7342c0d73848640b3713e614715fc5 Mon Sep 17 00:00:00 2001 From: siddharth Date: Tue, 23 Jan 2018 15:13:13 -0800 Subject: [PATCH 07/13] ARROW-2019: [JAVA] Control the memory allocated for inner vector in LIST and handle 0 initial capacity in all vectors. --- java/vector/src/main/codegen/templates/UnionVector.java | 1 + .../apache/arrow/vector/BaseNullableFixedWidthVector.java | 1 + .../apache/arrow/vector/BaseNullableVariableWidthVector.java | 2 ++ .../apache/arrow/vector/complex/BaseRepeatedValueVector.java | 5 +++-- .../org/apache/arrow/vector/complex/FixedSizeListVector.java | 1 + .../java/org/apache/arrow/vector/complex/ListVector.java | 1 + .../org/apache/arrow/vector/complex/NullableMapVector.java | 1 + 7 files changed, 10 insertions(+), 2 deletions(-) diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index e44edbd47b6..354afd26acc 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -283,6 +283,7 @@ private void reallocTypeBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java index 209758e4ece..7bfdff52719 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java @@ -455,6 +455,7 @@ private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean dataBuffer) long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index fee573ccd08..5ab884831e7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -497,6 +497,7 @@ public void reallocDataBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); @@ -549,6 +550,7 @@ private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean offsetBuffer long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index c159d742701..fef2cf3c98b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -108,6 +108,7 @@ protected void reallocOffsetBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); @@ -136,10 +137,10 @@ public FieldVector getDataVector() { @Override public void setInitialCapacity(int numRecords) { offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH; - if (vector instanceof BaseNullableFixedWidthVector || vector instanceof BaseNullableVariableWidthVector) { + if (vector instanceof BaseNullableVariableWidthVector || vector instanceof BaseNullableFixedWidthVector) { vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); } else { - vector.setInitialCapacity(numRecords); + vector.setInitialCapacity(numRecords); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 6713b1c7871..408ee930321 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -222,6 +222,7 @@ private void reallocValidityBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 8e96f6cbc06..8a46465253a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -290,6 +290,7 @@ private void reallocValidityBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index f95302f55f8..2709ab068c1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -394,6 +394,7 @@ private void reallocValidityBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); From 80bc33cda33a60b5a16171c44a2a976151d5d18a Mon Sep 17 00:00:00 2001 From: Jacques Nadeau Date: Fri, 26 Jan 2018 05:51:57 -0800 Subject: [PATCH 08/13] Ensure density driven initial capacity is never less than 1. --- .../arrow/vector/BaseNullableVariableWidthVector.java | 7 +++++-- .../arrow/vector/complex/BaseRepeatedValueVector.java | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index 5ab884831e7..0ff3959d4bf 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -24,7 +24,6 @@ import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; @@ -188,10 +187,14 @@ public void setInitialCapacity(int valueCount) { * @param density average number of bytes per variable width element */ public void setInitialCapacity(int valueCount, double density) { - final long size = (long) (valueCount * density); + long size = (long) (valueCount * density); if (size > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); } + + if(size == 0) { + size = 1; + } valueAllocationSizeInBytes = (int) size; validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount); /* to track the end offset of last data element in vector, we need diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index fef2cf3c98b..1cd9b718523 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -172,7 +172,12 @@ public void setInitialCapacity(int numRecords, double density) { throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); } offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH; - final int innerValueCapacity = (int)(numRecords * density); + int innerValueCapacity = (int)(numRecords * density); + + if(innerValueCapacity == 0) { + innerValueCapacity = 1; + } + vector.setInitialCapacity(innerValueCapacity); } From b46ba11baa8d969d723a6cb822527f5a48484877 Mon Sep 17 00:00:00 2001 From: Jacques Nadeau Date: Fri, 26 Jan 2018 07:57:42 -0800 Subject: [PATCH 09/13] propagate density awareness throughout the vector tree. --- .../templates/VariableLengthVectors.java | 12 +++++++ .../BaseNullableVariableWidthVector.java | 1 + .../arrow/vector/DensityAwareVector.java | 32 +++++++++++++++++++ .../arrow/vector/VariableWidthVector.java | 2 +- .../complex/AbstractContainerVector.java | 3 +- .../complex/BaseRepeatedValueVector.java | 8 ++++- .../arrow/vector/complex/ListVector.java | 1 + .../arrow/vector/complex/MapVector.java | 12 ++++++- .../vector/complex/NullableMapVector.java | 8 +++-- .../vector/complex/RepeatedValueVector.java | 3 +- 10 files changed, 75 insertions(+), 7 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java diff --git a/java/vector/src/main/codegen/templates/VariableLengthVectors.java b/java/vector/src/main/codegen/templates/VariableLengthVectors.java index 3934e74f11b..4d0342b8db5 100644 --- a/java/vector/src/main/codegen/templates/VariableLengthVectors.java +++ b/java/vector/src/main/codegen/templates/VariableLengthVectors.java @@ -18,6 +18,7 @@ import java.lang.Override; +import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.drill.exec.exception.OutOfMemoryException; import org.apache.drill.exec.vector.BaseDataValueVector; import org.apache.drill.exec.vector.BaseValueVector; @@ -301,7 +302,18 @@ public void setInitialCapacity(final int valueCount) { allocationSizeInBytes = (int)size; offsetVector.setInitialCapacity(valueCount + 1); } + + @Override + public void setInitialCapacity(int valueCount, double density) { + long size = (long) (valueCount * density); + if (size > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed allocation size"); + } + allocationSizeInBytes = (int)size; + offsetVector.setInitialCapacity(valueCount + 1); + } + @Override public void allocateNew() { if(!allocateNewSafe()){ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index 0ff3959d4bf..fecbe9879eb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -186,6 +186,7 @@ public void setInitialCapacity(int valueCount) { * @param valueCount desired number of elements in the vector * @param density average number of bytes per variable width element */ + @Override public void setInitialCapacity(int valueCount, double density) { long size = (long) (valueCount * density); if (size > MAX_ALLOCATION_SIZE) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java new file mode 100644 index 00000000000..9544b23cefd --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +/** + * Vector that support density aware initial capacity settings. + */ +public interface DensityAwareVector { + /** + * Set value with density + * @param valueCount + * @param density + */ + void setInitialCapacity(int valueCount, double density); + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java index 04c00b7c834..7182fa8476c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector; -public interface VariableWidthVector extends ValueVector { +public interface VariableWidthVector extends ValueVector, DensityAwareVector { /** * Allocate a new memory space for this vector. Must be called prior to using the ValueVector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index db0ff86df47..c777618fdfb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -20,6 +20,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.Types.MinorType; @@ -33,7 +34,7 @@ * * This class implements common functionality of composite vectors. */ -public abstract class AbstractContainerVector implements ValueVector { +public abstract class AbstractContainerVector implements ValueVector, DensityAwareVector { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); protected final String name; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 1cd9b718523..c4414871eb8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -25,6 +25,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; @@ -167,6 +168,7 @@ public void setInitialCapacity(int numRecords) { * This helps in tightly controlling the memory we provision * for inner data vector. */ + @Override public void setInitialCapacity(int numRecords, double density) { if ((numRecords * density) >= 2_000_000_000) { throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); @@ -178,7 +180,11 @@ public void setInitialCapacity(int numRecords, double density) { innerValueCapacity = 1; } - vector.setInitialCapacity(innerValueCapacity); + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector)vector).setInitialCapacity(innerValueCapacity, density); + } else { + vector.setInitialCapacity(innerValueCapacity); + } } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 8a46465253a..33698ca61a0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -129,6 +129,7 @@ public void setInitialCapacity(int numRecords) { * This helps in tightly controlling the memory we provision * for inner data vector. */ + @Override public void setInitialCapacity(int numRecords, double density) { validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); super.setInitialCapacity(numRecords, density); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 6089a67924f..52395026939 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -28,7 +28,6 @@ import javax.annotation.Nullable; -import com.google.common.base.Preconditions; import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; @@ -101,6 +100,17 @@ public void setInitialCapacity(int numRecords) { } } + @Override + public void setInitialCapacity(int valueCount, double density) { + for (final ValueVector vector : (Iterable) this) { + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector)vector).setInitialCapacity(valueCount, density); + } else { + vector.setInitialCapacity(valueCount); + } + } + } + @Override public int getBufferSize() { if (valueCount == 0 || size() == 0) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index 2709ab068c1..c1880189712 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -21,8 +21,6 @@ import static com.google.common.base.Preconditions.checkNotNull; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.List; import com.google.common.collect.ObjectArrays; @@ -348,6 +346,12 @@ public void setInitialCapacity(int numRecords) { super.setInitialCapacity(numRecords); } + @Override + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords); + super.setInitialCapacity(numRecords, density); + } + @Override public boolean allocateNewSafe() { /* Boolean to keep track if all the memory allocations were successful diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java index 91147c663f2..e107430f0ae 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector.complex; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; @@ -28,7 +29,7 @@ * Current design maintains data and offsets vectors. Each cell is stored in the data vector. Repeated vector * uses the offset vector to determine the sequence of cells pertaining to an individual value. */ -public interface RepeatedValueVector extends ValueVector { +public interface RepeatedValueVector extends ValueVector, DensityAwareVector { final static int DEFAULT_REPEAT_PER_RECORD = 5; From eb5897e11dc5df475df3b4e01741790cad235eee Mon Sep 17 00:00:00 2001 From: siddharth Date: Fri, 2 Mar 2018 12:02:39 -0800 Subject: [PATCH 10/13] Fix setValueCount in splitAndTransfer of Variable Width Vector. We need to use the split length as the value count of the target vector. We are incorrectly using the value count of the current vector for the target vector. Thus the latter ends up asking for a realloc when it didn't really need extra memory. --- .../apache/arrow/vector/BaseNullableVariableWidthVector.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index fecbe9879eb..6c69fc6757c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -723,9 +723,7 @@ public void splitAndTransferTo(int startIndex, int length, splitAndTransferValidityBuffer(startIndex, length, target); splitAndTransferOffsetBuffer(startIndex, length, target); target.setLastSet(length - 1); - if (this.valueCount > 0) { - target.setValueCount(this.valueCount); - } + target.setValueCount(length); } /* From 301fdc6a45a978fa12d111f37cf495e674261a4b Mon Sep 17 00:00:00 2001 From: siddharth Date: Wed, 14 Mar 2018 23:38:40 -0700 Subject: [PATCH 11/13] Do not allocate memory for inner vectors in nullable map when creating transfer pair --- .../java/org/apache/arrow/vector/complex/NullableMapVector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index c1880189712..2f938466679 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -130,7 +130,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) { @Override public TransferPair makeTransferPair(ValueVector to) { - return new NullableMapTransferPair(this, (NullableMapVector) to, true); + return new NullableMapTransferPair(this, (NullableMapVector) to, false); } @Override From a14b263ed442a4a088b1b0e5578993644147a599 Mon Sep 17 00:00:00 2001 From: vkorukanti Date: Thu, 29 Mar 2018 13:39:56 -0700 Subject: [PATCH 12/13] ARROW-2368: Correctly pad negative values in DecimalVector#setBigEndian --- .../arrow/vector/NullableDecimalVector.java | 26 +++++++-- .../arrow/vector/TestDecimalVector.java | 54 +++++++++++++++++++ 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java index 0d0a7c0ec13..69bcd33fc6e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java @@ -216,7 +216,6 @@ public void set(int index, ArrowBuf buffer) { * @param value array of bytes containing decimal in big endian byte order. */ public void setBigEndian(int index, byte[] value) { - assert value.length <= TYPE_WIDTH; BitVectorHelper.setValidityBitToOne(validityBuffer, index); final int length = value.length; int startIndex = index * TYPE_WIDTH; @@ -228,13 +227,32 @@ public void setBigEndian(int index, byte[] value) { valueBuffer.setByte(startIndex + 3, value[i-3]); startIndex += 4; } - } else { + + return; + } + + if (length == 0) { + valueBuffer.setZero(startIndex, TYPE_WIDTH); + return; + } + + if (length < 16) { for (int i = length - 1; i >= 0; i--) { valueBuffer.setByte(startIndex, value[i]); startIndex++; } - valueBuffer.setZero(startIndex, TYPE_WIDTH - length); + + final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); + final int maxStartIndex = (index + 1) * TYPE_WIDTH; + while (startIndex < maxStartIndex) { + valueBuffer.setByte(startIndex, pad); + startIndex++; + } + + return; } + + throw new IllegalArgumentException("Invalid decimal value length. Valid length in [1 - 16], got " + length); } /** @@ -472,4 +490,4 @@ public void copyValueSafe(int fromIndex, int toIndex) { to.copyFromSafe(fromIndex, toIndex, NullableDecimalVector.this); } } -} \ No newline at end of file +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java index 4d844d6d3ca..be21200d697 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.math.BigDecimal; import java.math.BigInteger; @@ -111,4 +112,57 @@ public void testBigDecimalDifferentScaleAndPrecision() { } } } + + /** + * Test {@link NullableDecimalVector#setBigEndian(int, byte[])} which takes BE layout input and stores in LE layout. + * Cases to cover: value given in byte array in different lengths in range [1-16] and negative values. + */ + @Test + public void decimalBE2LE() { + try (NullableDecimalVector decimalVector = TestUtils.newVector(NullableDecimalVector.class, "decimal", new ArrowType.Decimal(21, 2), allocator);) { + decimalVector.allocateNew(); + + BigInteger[] testBigInts = new BigInteger[] { + new BigInteger("0"), + new BigInteger("-1"), + new BigInteger("23"), + new BigInteger("234234"), + new BigInteger("-234234234"), + new BigInteger("234234234234"), + new BigInteger("-56345345345345"), + new BigInteger("29823462983462893462934679234653456345"), // converts to 16 byte array + new BigInteger("-3894572983475982374598324598234346536"), // converts to 16 byte array + new BigInteger("-345345"), + new BigInteger("754533") + }; + + int insertionIdx = 0; + insertionIdx++; // insert a null + for (BigInteger val : testBigInts) { + decimalVector.setBigEndian(insertionIdx++, val.toByteArray()); + } + insertionIdx++; // insert a null + // insert a zero length buffer + decimalVector.setBigEndian(insertionIdx++, new byte[0]); + + // Try inserting a buffer larger than 16bytes and expect a failure + try { + decimalVector.setBigEndian(insertionIdx, new byte[17]); + fail("above statement should have failed"); + } catch (IllegalArgumentException ex) { + assertTrue(ex.getMessage().equals("Invalid decimal value length. Valid length in [1 - 16], got 17")); + } + decimalVector.setValueCount(insertionIdx); + + // retrieve values and check if they are correct + int outputIdx = 0; + assertTrue(decimalVector.isNull(outputIdx++)); + for (BigInteger expected : testBigInts) { + final BigDecimal actual = decimalVector.getObject(outputIdx++); + assertEquals(expected, actual.unscaledValue()); + } + assertTrue(decimalVector.isNull(outputIdx++)); + assertEquals(BigInteger.valueOf(0), decimalVector.getObject(outputIdx).unscaledValue()); + } + } } From 7c65c8a314251954aa1ac8b48a31773ccef67840 Mon Sep 17 00:00:00 2001 From: Steven Phillips Date: Tue, 17 Apr 2018 17:28:56 -0700 Subject: [PATCH 13/13] DX-11176: Allow transferring between VarChar and VarBinary vectors --- .../org/apache/arrow/vector/BaseNullableVariableWidthVector.java | 1 - 1 file changed, 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index 6c69fc6757c..db717c48bc1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -697,7 +697,6 @@ public TransferPair getTransferPair(BufferAllocator allocator) { * @param target destination vector for transfer */ public void transferTo(BaseNullableVariableWidthVector target) { - compareTypes(target, "transferTo"); target.clear(); target.validityBuffer = validityBuffer.transferOwnership(target.allocator).buffer; target.valueBuffer = valueBuffer.transferOwnership(target.allocator).buffer;