diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml index ae2981cd6d..286cc8b910 100644 --- a/.github/workflows/jarbuild.yml +++ b/.github/workflows/jarbuild.yml @@ -16,7 +16,7 @@ # under the License. name: JarBuild -on: +on: workflow_dispatch: inputs: arrow_branch: diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index ed075352c9..b44f96c9d0 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -305,15 +305,29 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long /** Set the reader and writer indexes for the inner buffers. */ private void setReaderAndWriterIndex() { + final long requiredOffsetBufferCapacity = (long) (valueCount + 1) * OFFSET_WIDTH; validityBuffer.readerIndex(0); offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); + ensureEmptyOffsetBufferCapacity(requiredOffsetBufferCapacity); } else { validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); } + // IPC serializers use readerIndex and writerIndex to determine readable bytes. Even when the + // list is empty, the Arrow layout requires the offset buffer to contain offset[0]. + offsetBuffer.writerIndex(requiredOffsetBufferCapacity); + } + + private void ensureEmptyOffsetBufferCapacity(long requiredCapacity) { + if (offsetBuffer.capacity() >= requiredCapacity) { + return; + } + long previousOffsetAllocationSizeInBytes = offsetAllocationSizeInBytes; + ArrowBuf oldOffsetBuffer = offsetBuffer; + offsetBuffer = allocateOffsetBuffer(requiredCapacity); + offsetAllocationSizeInBytes = previousOffsetAllocationSizeInBytes; + oldOffsetBuffer.getReferenceManager().release(); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 3daeb6d77b..84b1e2b882 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -263,15 +263,29 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long /** Set the reader and writer indexes for the inner buffers. */ private void setReaderAndWriterIndex() { + final long requiredOffsetBufferCapacity = (long) (valueCount + 1) * OFFSET_WIDTH; validityBuffer.readerIndex(0); offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); + ensureEmptyOffsetBufferCapacity(requiredOffsetBufferCapacity); } else { validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); } + // IPC serializers use readerIndex and writerIndex to determine readable bytes. Even when the + // list is empty, the Arrow layout requires the offset buffer to contain offset[0]. + offsetBuffer.writerIndex(requiredOffsetBufferCapacity); + } + + private void ensureEmptyOffsetBufferCapacity(long requiredCapacity) { + if (offsetBuffer.capacity() >= requiredCapacity) { + return; + } + long previousOffsetAllocationSizeInBytes = offsetAllocationSizeInBytes; + ArrowBuf oldOffsetBuffer = offsetBuffer; + offsetBuffer = allocateOffsetBuffer(requiredCapacity); + offsetAllocationSizeInBytes = previousOffsetAllocationSizeInBytes; + oldOffsetBuffer.getReferenceManager().release(); } /** diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index 101d942d2a..1d089c93e3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -955,6 +955,36 @@ public void testGetBufferSizeFor() { } } + @Test + public void testEmptyLargeListOffsetBuffer() { + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.allocateNew(); + list.setValueCount(0); + + assertEmptyLargeListOffsetBuffer(list); + } + } + + @Test + public void testUnallocatedEmptyLargeListOffsetBuffer() { + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); + + assertEmptyLargeListOffsetBuffer(list); + } + } + + private ArrowBuf assertEmptyLargeListOffsetBuffer(LargeListVector list) { + List buffers = list.getFieldBuffers(); + ArrowBuf offsetBuffer = buffers.get(1); + assertEquals(LargeListVector.OFFSET_WIDTH, offsetBuffer.readableBytes()); + assertTrue(offsetBuffer.capacity() >= LargeListVector.OFFSET_WIDTH); + assertEquals(0L, offsetBuffer.getLong(0)); + return offsetBuffer; + } + @Test public void testIsEmpty() { try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 1d6fa39f9e..f128358394 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1135,6 +1135,36 @@ public void testGetBufferSizeFor() { } } + @Test + public void testEmptyListOffsetBuffer() { + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.allocateNew(); + list.setValueCount(0); + + assertEmptyListOffsetBuffer(list); + } + } + + @Test + public void testUnallocatedEmptyListOffsetBuffer() { + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); + + assertEmptyListOffsetBuffer(list); + } + } + + private ArrowBuf assertEmptyListOffsetBuffer(ListVector list) { + List buffers = list.getFieldBuffers(); + ArrowBuf offsetBuffer = buffers.get(1); + assertEquals(BaseRepeatedValueVector.OFFSET_WIDTH, offsetBuffer.readableBytes()); + assertTrue(offsetBuffer.capacity() >= BaseRepeatedValueVector.OFFSET_WIDTH); + assertEquals(0, offsetBuffer.getInt(0)); + return offsetBuffer; + } + @Test public void testIsEmpty() { try (final ListVector vector = ListVector.empty("list", allocator)) {