Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
GH-3464 Improve DeltaByteArrayWriter.writeBytes to avoid unnecessar…
…y allocation and scalar prefix comparison
  • Loading branch information
arouel committed Apr 6, 2026
commit b7839f24d51101257e073216704aaadb740062d5
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.parquet.column.values.deltastrings;

import java.util.Arrays;
import org.apache.parquet.bytes.ByteBufferAllocator;
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.column.Encoding;
Expand Down Expand Up @@ -88,14 +89,19 @@ public String memUsageString(String prefix) {

@Override
public void writeBytes(Binary v) {
int i = 0;
byte[] vb = v.getBytes();
int length = previous.length < vb.length ? previous.length : vb.length;
// find the number of matching prefix bytes between this value and the previous one
for (i = 0; (i < length) && (previous[i] == vb[i]); i++)
;
byte[] vb = v.getBytesUnsafe();
int length = Math.min(previous.length, vb.length);
// Find the number of matching prefix bytes between this value and the previous one.
// Arrays.mismatch is intrinsified by the JVM to use SIMD instructions.
int i = Arrays.mismatch(previous, 0, length, vb, 0, length);
if (i < 0) {
i = length; // all bytes in the common range matched
}
prefixLengthWriter.writeInteger(i);
suffixWriter.writeBytes(v.slice(i, vb.length - i));
previous = vb;
// Retain an owned copy for prefix comparison with the next value.
// getBytesUnsafe() may return the backing array directly, so we must copy
// if the Binary's backing bytes may be reused by the caller.
previous = v.isBackingBytesReused() ? v.getBytes() : vb;
}
}