Skip to content

Commit 208778a

Browse files
committed
reimplement read string
1 parent 273069f commit 208778a

7 files changed

Lines changed: 299 additions & 155 deletions

File tree

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package com.jsoniter.demo;
2+
3+
4+
import com.jsoniter.JsonIterator;
5+
import org.junit.Test;
6+
import org.openjdk.jmh.Main;
7+
import org.openjdk.jmh.annotations.*;
8+
import org.openjdk.jmh.infra.BenchmarkParams;
9+
import org.openjdk.jmh.infra.Blackhole;
10+
11+
import java.io.IOException;
12+
13+
@State(Scope.Thread)
14+
public class ReadString {
15+
16+
17+
private JsonIterator jsonIterator;
18+
private byte[] input;
19+
20+
public static void main(String[] args) throws Exception {
21+
Main.main(new String[]{
22+
"ReadString",
23+
"-i", "5",
24+
"-wi", "5",
25+
"-f", "1",
26+
});
27+
}
28+
29+
@Test
30+
public void test() throws IOException {
31+
benchSetup(null);
32+
}
33+
34+
@Setup(Level.Trial)
35+
public void benchSetup(BenchmarkParams params) {
36+
jsonIterator = new JsonIterator();
37+
input = "\"hello wo\\trld\"".getBytes();
38+
}
39+
40+
@Benchmark
41+
public void jsoniter(Blackhole bh) throws IOException {
42+
jsonIterator.reset(input);
43+
bh.consume(jsonIterator.readString());
44+
}
45+
}

src/main/java/com/jsoniter/IterImpl.java

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,4 +224,81 @@ public static void skipFixedBytes(JsonIterator iter, int n) throws IOException {
224224
public final static boolean loadMore(JsonIterator iter) throws IOException {
225225
return false;
226226
}
227+
228+
public final static String readStringSlowPath(JsonIterator iter, int j) throws IOException {
229+
try {
230+
for (int i = iter.head; i < iter.tail; ) {
231+
int bc = iter.buf[i++];
232+
if (bc == '"') {
233+
return new String(iter.reusableChars, 0, j);
234+
}
235+
if (bc == '\\') {
236+
bc = iter.buf[i++];
237+
switch (bc) {
238+
case 'b':
239+
bc = '\b';
240+
break;
241+
case 't':
242+
bc = '\t';
243+
break;
244+
case 'n':
245+
bc = '\n';
246+
break;
247+
case 'f':
248+
bc = '\f';
249+
break;
250+
case 'r':
251+
bc = '\r';
252+
break;
253+
case '"':
254+
case '/':
255+
case '\\':
256+
break;
257+
case 'u':
258+
bc = (IterImplString.translateHex(iter.buf[i++]) << 12) +
259+
(IterImplString.translateHex(iter.buf[i++]) << 8) +
260+
(IterImplString.translateHex(iter.buf[i++]) << 4) +
261+
IterImplString.translateHex(iter.buf[i++]);
262+
break;
263+
264+
default:
265+
throw iter.reportError("readStringSlowPath", "invalid escape character: " + bc);
266+
}
267+
} else if ((bc & 0x80) != 0) {
268+
final int u2 = iter.buf[i++];
269+
if ((bc & 0xE0) == 0xC0) {
270+
bc = ((bc & 0x1F) << 6) + (u2 & 0x3F);
271+
} else {
272+
final int u3 = iter.buf[i++];
273+
if ((bc & 0xF0) == 0xE0) {
274+
bc = ((bc & 0x0F) << 12) + ((u2 & 0x3F) << 6) + (u3 & 0x3F);
275+
} else {
276+
final int u4 = iter.buf[i++];
277+
if ((bc & 0xF8) == 0xF0) {
278+
bc = ((bc & 0x07) << 18) + ((u2 & 0x3F) << 12) + ((u3 & 0x3F) << 6) + (u4 & 0x3F);
279+
} else {
280+
throw iter.reportError("readStringSlowPath", "invalid unicode character");
281+
}
282+
283+
if (bc >= 0x10000) {
284+
// check if valid unicode
285+
if (bc >= 0x110000)
286+
throw iter.reportError("readStringSlowPath", "invalid unicode character");
287+
288+
// split surrogates
289+
final int sup = bc - 0x10000;
290+
iter.reusableChars[j++] = (char) ((sup >>> 10) + 0xd800);
291+
iter.reusableChars[j++] = (char) ((sup & 0x3ff) + 0xdc00);
292+
continue;
293+
}
294+
}
295+
}
296+
}
297+
iter.reusableChars[j++] = (char) bc;
298+
}
299+
throw iter.reportError("readStringSlowPath", "incomplete string");
300+
} catch (IndexOutOfBoundsException e) {
301+
throw iter.reportError("readString", "incomplete string");
302+
}
303+
}
227304
}

src/main/java/com/jsoniter/IterImplForStreaming.java

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ private static boolean keepSkippedBytesThenRead(JsonIterator iter) throws IOExce
299299
final static byte readByte(JsonIterator iter) throws IOException {
300300
if (iter.head == iter.tail) {
301301
if (!loadMore(iter)) {
302-
return 0;
302+
throw iter.reportError("readByte", "no more to read");
303303
}
304304
}
305305
return iter.buf[iter.head++];
@@ -343,7 +343,7 @@ public static Any readAny(JsonIterator iter) throws IOException {
343343
case 'n':
344344
skipUntilBreak(iter);
345345
iter.skipStartedAt = -1;
346-
return Any.wrap((Object)null);
346+
return Any.wrap((Object) null);
347347
case '[':
348348
skipArray(iter);
349349
copied = copySkippedBytes(iter);
@@ -376,4 +376,77 @@ public static void skipFixedBytes(JsonIterator iter, int n) throws IOException {
376376
iter.head += more;
377377
}
378378
}
379+
380+
public final static String readStringSlowPath(JsonIterator iter, int j) throws IOException {
381+
for (;;) {
382+
int bc = readByte(iter);
383+
if (bc == '"') {
384+
return new String(iter.reusableChars, 0, j);
385+
}
386+
if (bc == '\\') {
387+
bc = readByte(iter);
388+
switch (bc) {
389+
case 'b':
390+
bc = '\b';
391+
break;
392+
case 't':
393+
bc = '\t';
394+
break;
395+
case 'n':
396+
bc = '\n';
397+
break;
398+
case 'f':
399+
bc = '\f';
400+
break;
401+
case 'r':
402+
bc = '\r';
403+
break;
404+
case '"':
405+
case '/':
406+
case '\\':
407+
break;
408+
case 'u':
409+
bc = (IterImplString.translateHex(readByte(iter)) << 12) +
410+
(IterImplString.translateHex(readByte(iter)) << 8) +
411+
(IterImplString.translateHex(readByte(iter)) << 4) +
412+
IterImplString.translateHex(readByte(iter));
413+
break;
414+
415+
default:
416+
throw iter.reportError("readStringSlowPath", "invalid escape character: " + bc);
417+
}
418+
} else if ((bc & 0x80) != 0) {
419+
final int u2 = readByte(iter);
420+
if ((bc & 0xE0) == 0xC0) {
421+
bc = ((bc & 0x1F) << 6) + (u2 & 0x3F);
422+
} else {
423+
final int u3 = readByte(iter);
424+
if ((bc & 0xF0) == 0xE0) {
425+
bc = ((bc & 0x0F) << 12) + ((u2 & 0x3F) << 6) + (u3 & 0x3F);
426+
} else {
427+
final int u4 = readByte(iter);
428+
if ((bc & 0xF8) == 0xF0) {
429+
bc = ((bc & 0x07) << 18) + ((u2 & 0x3F) << 12) + ((u3 & 0x3F) << 6) + (u4 & 0x3F);
430+
} else {
431+
throw iter.reportError("readStringSlowPath", "invalid unicode character");
432+
}
433+
434+
if (bc >= 0x10000) {
435+
// check if valid unicode
436+
if (bc >= 0x110000)
437+
throw iter.reportError("readStringSlowPath", "invalid unicode character");
438+
439+
// split surrogates
440+
final int sup = bc - 0x10000;
441+
iter.reusableChars[j++] = (char) ((sup >>> 10) + 0xd800);
442+
iter.reusableChars[j++] = (char) ((sup & 0x3ff) + 0xdc00);
443+
continue;
444+
}
445+
}
446+
}
447+
}
448+
iter.reusableChars[j++] = (char) bc;
449+
}
450+
}
451+
379452
}

src/main/java/com/jsoniter/IterImplNumber.java

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3636
// TODO: make separate implementation for streaming and non-streaming
3737
class IterImplNumber {
3838

39-
final static int[] digits = new int[256];
4039
private final static int[] intDigits = new int[256];
4140
private final static int[] floatDigits = new int[256];
4241
private final static int END_OF_NUMBER = -2;
@@ -45,22 +44,14 @@ class IterImplNumber {
4544
private static final int POW10[] = {1, 10, 100, 1000, 10000, 100000, 1000000};
4645

4746
static {
48-
for (int i = 0; i < digits.length; i++) {
49-
digits[i] = INVALID_CHAR_FOR_NUMBER;
47+
for (int i = 0; i < floatDigits.length; i++) {
5048
floatDigits[i] = INVALID_CHAR_FOR_NUMBER;
5149
intDigits[i] = INVALID_CHAR_FOR_NUMBER;
5250
}
5351
for (int i = '0'; i <= '9'; ++i) {
54-
digits[i] = (i - '0');
5552
floatDigits[i] = (i - '0');
5653
intDigits[i] = (i - '0');
5754
}
58-
for (int i = 'a'; i <= 'f'; ++i) {
59-
digits[i] = ((i - 'a') + 10);
60-
}
61-
for (int i = 'A'; i <= 'F'; ++i) {
62-
digits[i] = ((i - 'A') + 10);
63-
}
6455
floatDigits[','] = END_OF_NUMBER;
6556
floatDigits[']'] = END_OF_NUMBER;
6657
floatDigits['}'] = END_OF_NUMBER;
@@ -331,31 +322,4 @@ public static final long readPositiveLong(final JsonIterator iter, byte c) throw
331322
}
332323
}
333324
}
334-
335-
public static final char readU4(JsonIterator iter) throws IOException {
336-
int v = digits[IterImpl.readByte(iter)];
337-
if (v == -1) {
338-
throw iter.reportError("readU4", "bad unicode");
339-
}
340-
char b = (char) v;
341-
v = digits[IterImpl.readByte(iter)];
342-
if (v == -1) {
343-
throw iter.reportError("readU4", "bad unicode");
344-
}
345-
b = (char) (b << 4);
346-
b += v;
347-
v = digits[IterImpl.readByte(iter)];
348-
if (v == -1) {
349-
throw iter.reportError("readU4", "bad unicode");
350-
}
351-
b = (char) (b << 4);
352-
b += v;
353-
v = digits[IterImpl.readByte(iter)];
354-
if (v == -1) {
355-
throw iter.reportError("readU4", "bad unicode");
356-
}
357-
b = (char) (b << 4);
358-
b += v;
359-
return b;
360-
}
361325
}

0 commit comments

Comments
 (0)