Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,14 @@ private void setReaderAndWriterIndex() {
offsetBuffer.readerIndex(0);
if (valueCount == 0) {
validityBuffer.writerIndex(0);
offsetBuffer.writerIndex(0);
} else {
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
}
// IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`.
// Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers
// in other libraries. According to Arrow spec, we should still output the offset buffer which
// is [0].
offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,14 @@ private void setReaderAndWriterIndex() {
offsetBuffer.readerIndex(0);
if (valueCount == 0) {
validityBuffer.writerIndex(0);
offsetBuffer.writerIndex(0);
} else {
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
}
// IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`.
// Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers
// in other libraries. According to Arrow spec, we should still output the offset buffer which
// is [0].
offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,38 @@ public void testCopyValueSafeForExtensionType() throws Exception {
}
}

@Test
public void testNestedEmptyLargeListOffsetBuffer() {
// Test that nested LargeListVector properly allocates offset buffer
// even when nested writers are never invoked. According to Arrow spec,
// offset buffer must have N+1 entries. Even when N=0, it should contain [0].
try (LargeListVector outerList = LargeListVector.empty("outer", allocator)) {
// Setup LargeList<LargeList<Int>>
outerList.addOrGetVector(FieldType.nullable(MinorType.LARGELIST.getType()));
LargeListVector innerList = (LargeListVector) outerList.getDataVector();
innerList.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
Comment on lines +1108 to +1112
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked these tests again. I think this nested structure is not necessary for the unit tests here. It only matter for our usage at Spark side (on the ArrowWriters). But for here, we just need to make sure that a ListVector/LargeListVector has meaningful and correct readableBytes value after they are allocated.

Maybe we can simplify these tests.


// Allocate both outer and inner - simulates case where inner is never written to
outerList.allocateNew();
innerList.allocateNew();
outerList.setValueCount(0);
innerList.setValueCount(0);

// Get field buffers - this is what IPC serialization uses
List<ArrowBuf> innerBuffers = innerList.getFieldBuffers();

// Verify inner list offset buffer has at least OFFSET_WIDTH (8) bytes
assertTrue(
innerBuffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH,
"Inner LargeList offset buffer should have at least "
+ LargeListVector.OFFSET_WIDTH
+ " bytes for offset[0]");

// Verify offset[0] = 0
assertEquals(0L, innerList.getOffsetBuffer().getLong(0));
}
}

private void writeIntValues(UnionLargeListWriter writer, int[] values) {
writer.startList();
for (int v : values) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,49 @@ public void testCopyValueSafeForExtensionType() throws Exception {
}
}

@Test
public void testNestedEmptyListOffsetBuffer() {
// Test that 3-level nested ListVector properly allocates offset buffers
// even when nested writers are never invoked. According to Arrow spec,
// offset buffer must have N+1 entries. Even when N=0, it should contain [0].
try (ListVector level0 = ListVector.empty("level0", allocator)) {
// Setup List<List<List<Int>>> - 3 levels
level0.addOrGetVector(FieldType.nullable(MinorType.LIST.getType()));
ListVector level1 = (ListVector) level0.getDataVector();
level1.addOrGetVector(FieldType.nullable(MinorType.LIST.getType()));
ListVector level2 = (ListVector) level1.getDataVector();
level2.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));

// Allocate all levels - simulates case where nested levels are never written to
level0.allocateNew();
level1.allocateNew();
level2.allocateNew();
level0.setValueCount(0);
level1.setValueCount(0);
level2.setValueCount(0);

// Verify all levels have properly allocated offset buffers
List<ArrowBuf> level1Buffers = level1.getFieldBuffers();
List<ArrowBuf> level2Buffers = level2.getFieldBuffers();

assertTrue(
level1Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH,
"Level1 offset buffer should have at least "
+ BaseRepeatedValueVector.OFFSET_WIDTH
+ " bytes for offset[0]");

assertTrue(
level2Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH,
"Level2 offset buffer should have at least "
+ BaseRepeatedValueVector.OFFSET_WIDTH
+ " bytes for offset[0]");

// Verify offset[0] = 0 for all levels
assertEquals(0, level1.getOffsetBuffer().getInt(0));
assertEquals(0, level2.getOffsetBuffer().getInt(0));
}
}

private void writeIntValues(UnionListWriter writer, int[] values) {
writer.startList();
for (int v : values) {
Expand Down
Loading