Skip to content

Commit a38a85c

Browse files
authored
Improve ByteBufUtil#lastIndexOf (#13942)
Motivation: The performance of `#lastIndexOf` could be enhanced by applying SWAR. Modification: Utilized `SWARUtil` for byte search. Result: Enhanced performance.
1 parent c1d0fd2 commit a38a85c

3 files changed

Lines changed: 231 additions & 4 deletions

File tree

buffer/src/main/java/io/netty/buffer/ByteBufUtil.java

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -714,20 +714,92 @@ public static ByteBuf readBytes(ByteBufAllocator alloc, ByteBuf buffer, int leng
714714
}
715715
}
716716

717-
static int lastIndexOf(AbstractByteBuf buffer, int fromIndex, int toIndex, byte value) {
717+
static int lastIndexOf(final AbstractByteBuf buffer, int fromIndex, final int toIndex, final byte value) {
718718
assert fromIndex > toIndex;
719719
final int capacity = buffer.capacity();
720720
fromIndex = Math.min(fromIndex, capacity);
721-
if (fromIndex < 0 || capacity == 0) {
721+
if (fromIndex <= 0) { // fromIndex is the exclusive upper bound.
722722
return -1;
723723
}
724-
buffer.checkIndex(toIndex, fromIndex - toIndex);
724+
final int length = fromIndex - toIndex;
725+
buffer.checkIndex(toIndex, length);
726+
if (!PlatformDependent.isUnaligned()) {
727+
return linearLastIndexOf(buffer, fromIndex, toIndex, value);
728+
}
729+
final int longCount = length >>> 3;
730+
if (longCount > 0) {
731+
final ByteOrder nativeOrder = ByteOrder.nativeOrder();
732+
final boolean isNative = nativeOrder == buffer.order();
733+
final boolean useLE = nativeOrder == ByteOrder.LITTLE_ENDIAN;
734+
final long pattern = SWARUtil.compilePattern(value);
735+
for (int i = 0, offset = fromIndex - Long.BYTES; i < longCount; i++, offset -= Long.BYTES) {
736+
// use the faster available getLong
737+
final long word = useLE? buffer._getLongLE(offset) : buffer._getLong(offset);
738+
final long result = SWARUtil.applyPattern(word, pattern);
739+
if (result != 0) {
740+
// used the oppoiste endianness since we are looking for the last index.
741+
return offset + Long.BYTES - 1 - SWARUtil.getIndex(result, !isNative);
742+
}
743+
}
744+
}
745+
return unrolledLastIndexOf(buffer, fromIndex - (longCount << 3), length & 7, value);
746+
}
747+
748+
private static int linearLastIndexOf(final AbstractByteBuf buffer, final int fromIndex, final int toIndex,
749+
final byte value) {
725750
for (int i = fromIndex - 1; i >= toIndex; i--) {
726751
if (buffer._getByte(i) == value) {
727752
return i;
728753
}
729754
}
755+
return -1;
756+
}
730757

758+
private static int unrolledLastIndexOf(final AbstractByteBuf buffer, final int fromIndex, final int byteCount,
759+
final byte value) {
760+
assert byteCount >= 0 && byteCount < 8;
761+
if (byteCount == 0) {
762+
return -1;
763+
}
764+
if (buffer._getByte(fromIndex - 1) == value) {
765+
return fromIndex - 1;
766+
}
767+
if (byteCount == 1) {
768+
return -1;
769+
}
770+
if (buffer._getByte(fromIndex - 2) == value) {
771+
return fromIndex - 2;
772+
}
773+
if (byteCount == 2) {
774+
return -1;
775+
}
776+
if (buffer._getByte(fromIndex - 3) == value) {
777+
return fromIndex - 3;
778+
}
779+
if (byteCount == 3) {
780+
return -1;
781+
}
782+
if (buffer._getByte(fromIndex - 4) == value) {
783+
return fromIndex - 4;
784+
}
785+
if (byteCount == 4) {
786+
return -1;
787+
}
788+
if (buffer._getByte(fromIndex - 5) == value) {
789+
return fromIndex - 5;
790+
}
791+
if (byteCount == 5) {
792+
return -1;
793+
}
794+
if (buffer._getByte(fromIndex - 6) == value) {
795+
return fromIndex - 6;
796+
}
797+
if (byteCount == 6) {
798+
return -1;
799+
}
800+
if (buffer._getByte(fromIndex - 7) == value) {
801+
return fromIndex - 7;
802+
}
731803
return -1;
732804
}
733805

buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2293,11 +2293,51 @@ public void testSWARIndexOf() {
22932293
buffer.writeByte((byte) 2);
22942294
buffer.writeByte((byte) 3);
22952295
buffer.writeByte((byte) 4);
2296-
buffer.writeByte((byte) 1);
2296+
buffer.writeByte((byte) 1); // 15
22972297
assertEquals(11, buffer.indexOf(0, 12, (byte) 1));
22982298
assertEquals(12, buffer.indexOf(0, 16, (byte) 2));
22992299
assertEquals(-1, buffer.indexOf(0, 11, (byte) 1));
23002300
assertEquals(11, buffer.indexOf(0, 16, (byte) 1));
2301+
2302+
// lastIndexOf
2303+
assertEquals(15, buffer.indexOf(16, 0, (byte) 1));
2304+
assertEquals(12, buffer.indexOf(16, 0, (byte) 2));
2305+
assertEquals(11, buffer.indexOf(15, 0, (byte) 1));
2306+
assertEquals(-1, buffer.indexOf(11, 0, (byte) 1));
2307+
buffer.release();
2308+
}
2309+
2310+
@Test
2311+
public void testUnrolledSWARIndexOf() {
2312+
ByteBuf buffer = newBuffer(15);
2313+
buffer.clear();
2314+
// Ensure the buffer is completely zero'ed.
2315+
buffer.setZero(0, buffer.capacity());
2316+
buffer.writeByte((byte) 0); // 0
2317+
buffer.writeByte((byte) 1);
2318+
buffer.writeByte((byte) 2);
2319+
buffer.writeByte((byte) 3);
2320+
buffer.writeByte((byte) 4);
2321+
buffer.writeByte((byte) 5);
2322+
buffer.writeByte((byte) 6);
2323+
buffer.writeByte((byte) 7); // 7
2324+
2325+
buffer.writeByte((byte) 8); // 8
2326+
buffer.writeByte((byte) 9);
2327+
buffer.writeByte((byte) 10);
2328+
buffer.writeByte((byte) 11);
2329+
buffer.writeByte((byte) 12);
2330+
buffer.writeByte((byte) 13);
2331+
buffer.writeByte((byte) 14); // 14
2332+
assertEquals(15, buffer.capacity());
2333+
for (int i = 0; i < 14; ++i) {
2334+
assertEquals(i, buffer.indexOf(i, buffer.capacity(), (byte) i));
2335+
}
2336+
2337+
// lastIndexOf
2338+
for (int i = 0; i < 14; ++i) {
2339+
assertEquals(i, buffer.indexOf(buffer.capacity(), 0, (byte) i));
2340+
}
23012341
buffer.release();
23022342
}
23032343

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright 2024 The Netty Project
3+
*
4+
* The Netty Project licenses this file to you under the Apache License,
5+
* version 2.0 (the "License"); you may not use this file except in compliance
6+
* with the License. You may obtain a copy of the License at:
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
* License for the specific language governing permissions and limitations
14+
* under the License.
15+
*/
16+
package io.netty.microbench.buffer;
17+
18+
import io.netty.buffer.ByteBuf;
19+
import io.netty.buffer.ByteBufAllocator;
20+
import io.netty.buffer.PooledByteBufAllocator;
21+
import io.netty.buffer.UnpooledByteBufAllocator;
22+
import io.netty.microbench.util.AbstractMicrobenchmark;
23+
import io.netty.util.internal.SuppressJava6Requirement;
24+
import org.openjdk.jmh.annotations.Benchmark;
25+
import org.openjdk.jmh.annotations.Fork;
26+
import org.openjdk.jmh.annotations.Level;
27+
import org.openjdk.jmh.annotations.Measurement;
28+
import org.openjdk.jmh.annotations.OutputTimeUnit;
29+
import org.openjdk.jmh.annotations.Param;
30+
import org.openjdk.jmh.annotations.Scope;
31+
import org.openjdk.jmh.annotations.Setup;
32+
import org.openjdk.jmh.annotations.State;
33+
import org.openjdk.jmh.annotations.TearDown;
34+
import org.openjdk.jmh.annotations.Warmup;
35+
36+
import java.util.SplittableRandom;
37+
import java.util.concurrent.TimeUnit;
38+
39+
@State(Scope.Benchmark)
40+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
41+
@Fork(2)
42+
@Warmup(iterations = 5, time = 1)
43+
@Measurement(iterations = 8, time = 1)
44+
public class ByteBufLastIndexOfBenchmark extends AbstractMicrobenchmark {
45+
@Param({ "7", "16", "23", "32" })
46+
int size;
47+
48+
@Param({ "4", "11" })
49+
int logPermutations;
50+
51+
@Param({ "1" })
52+
int seed;
53+
54+
int permutations;
55+
56+
ByteBuf[] data;
57+
58+
private int i;
59+
60+
@Param({ "0" })
61+
private byte needleByte;
62+
63+
@Param({ "true", "false" })
64+
private boolean direct;
65+
66+
@Param({ "false", "true" })
67+
private boolean noUnsafe;
68+
69+
@Param({ "false", "true" })
70+
private boolean pooled;
71+
72+
@Setup(Level.Trial)
73+
@SuppressJava6Requirement(reason = "using SplittableRandom to reliably produce data")
74+
public void init() {
75+
System.setProperty("io.netty.noUnsafe", Boolean.valueOf(noUnsafe).toString());
76+
SplittableRandom random = new SplittableRandom(seed);
77+
permutations = 1 << logPermutations;
78+
this.data = new ByteBuf[permutations];
79+
final ByteBufAllocator allocator = pooled? PooledByteBufAllocator.DEFAULT : UnpooledByteBufAllocator.DEFAULT;
80+
for (int i = 0; i < permutations; ++i) {
81+
data[i] = direct? allocator.directBuffer(size, size) : allocator.heapBuffer(size, size);
82+
for (int j = 0; j < size; j++) {
83+
int value = random.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE + 1);
84+
// turn any found value into something different
85+
if (value == needleByte) {
86+
if (needleByte != 1) {
87+
value = 1;
88+
} else {
89+
value = 0;
90+
}
91+
}
92+
data[i].setByte(j, value);
93+
}
94+
final int foundIndex = random.nextInt(0, Math.min(8, size));
95+
data[i].setByte(foundIndex, needleByte);
96+
}
97+
}
98+
99+
private ByteBuf getData() {
100+
return data[i++ & (permutations - 1)];
101+
}
102+
103+
@Benchmark
104+
public int lastIndexOf() {
105+
return getData().indexOf(size, 0, needleByte);
106+
}
107+
108+
@TearDown
109+
public void releaseBuffers() {
110+
for (ByteBuf buffer : data) {
111+
buffer.release();
112+
}
113+
}
114+
115+
}

0 commit comments

Comments
 (0)