Skip to content

Commit 12b5185

Browse files
committed
Ok, the new code seems quite fast.
1 parent ce85dd0 commit 12b5185

3 files changed

Lines changed: 28 additions & 27 deletions

File tree

benchmark/parsingcompetition.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,6 @@ int main(int argc, char *argv[]) {
121121
BEST_TIME("memcpy ", (memcpy(buffer, p.first, p.second) == buffer), true, , repeat, volume, true);
122122
free(p.first);
123123
free(ast_buffer);
124+
free(buffer);
124125
deallocate_ParsedJson(pj_ptr);
125126
}

include/jsonparser/simdjson_internal.h

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ struct ParsedJson {
6464
size_t bytecapacity; // indicates how many bits are meant to be supported by
6565
// structurals
6666
size_t depthcapacity; // how deep we can go
67-
67+
size_t tapecapacity;
68+
size_t stringcapacity;
6869
u32 current_loc;
6970
u8 *structurals;
7071
u32 n_structural_indexes;
@@ -77,19 +78,15 @@ struct ParsedJson {
7778
u8 *string_buf; // should be at least bytecapacity
7879
u8 *current_string_buf_loc;
7980

80-
u8 *number_buf; // holds either doubles or longs, really // should be at least
81-
// 4 * bytecapacity
82-
u8 *current_number_buf_loc;
83-
8481
// this should be called when parsing (right before writing the tapes)
8582
void init() {
8683
current_string_buf_loc = string_buf;
87-
current_number_buf_loc = number_buf;
8884
current_loc = 0;
8985
}
9086

9187
// print the json to stdout (should be valid)
92-
void printjson() {
88+
// return false if the tape is likely wrong (e.g., you did not parse a valid JSON).
89+
bool printjson() {
9390
size_t tapeidx = 0;
9491
u64 tape_val = tape[tapeidx];
9592
u8 type = (tape_val >> 56);
@@ -98,15 +95,17 @@ struct ParsedJson {
9895
howmany = tape_val & JSONVALUEMASK;
9996
} else {
10097
printf("Error: no starting root node?");
101-
abort();
98+
return false;
99+
}
100+
if(howmany > tapecapacity) {
101+
printf("We may be exceeding the tape capacity. Is this a valid document?\n");
102+
return false;
102103
}
103104
tapeidx++;
104105
bool *inobject = new bool[depthcapacity];
105106
size_t *inobjectidx = new size_t[depthcapacity];
106107
int depth = 1; // only root at level 0
107108
inobjectidx[depth] = 0;
108-
int64_t intval;
109-
double doubleval;
110109
for (; tapeidx < howmany; tapeidx++) {
111110
tape_val = tape[tapeidx];
112111
u64 payload = tape_val & JSONVALUEMASK;
@@ -130,12 +129,12 @@ struct ParsedJson {
130129
putchar('"');
131130
break;
132131
case 'l': // we have a long int
133-
memcpy(&intval, number_buf + payload, sizeof(intval));
134-
printf("%" PRId64, intval);
132+
if(tapeidx + 1 >= howmany) return false;
133+
printf("%" PRId64, (int64_t) tape[tapeidx++]);
135134
break;
136135
case 'd': // we have a double
137-
memcpy(&doubleval, number_buf + payload, sizeof(doubleval));
138-
printf("%f", doubleval);
136+
if(tapeidx + 1 >= howmany) return false;
137+
printf("%f", *((double * )& tape[tapeidx++]));
139138
break;
140139
case 'n': // we have a null
141140
printf("null");
@@ -170,11 +169,13 @@ struct ParsedJson {
170169
break;
171170
case 'r': // we start and end with the root node
172171
printf("should we be hitting the root node?\n");
172+
return false;
173173
default:
174174
printf("bug %c\n", type);
175-
abort();
175+
return false;
176176
}
177177
}
178+
return true;
178179
}
179180

180181
// all elements are stored on the tape using a 64-bit word.
@@ -197,15 +198,13 @@ struct ParsedJson {
197198
}
198199

199200
really_inline void write_tape_s64(s64 i) {
200-
write_tape(current_number_buf_loc - number_buf, 'l');
201-
memcpy(current_number_buf_loc, &i, sizeof(s64));
202-
current_number_buf_loc += sizeof(s64);
201+
write_tape(0, 'l');
202+
tape[current_loc++] =*( (u64*) &i);
203203
}
204204

205205
really_inline void write_tape_double(double d) {
206-
write_tape(current_number_buf_loc - number_buf, 'd');
207-
memcpy(current_number_buf_loc, &d, sizeof(double));
208-
current_number_buf_loc += sizeof(double);
206+
write_tape(0, 'd');
207+
tape[current_loc++] =*( (u64*) &d);
209208
}
210209

211210
really_inline u32 get_current_loc() { return current_loc; }

src/jsonparser.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,20 @@ ParsedJson *allocate_ParsedJson(size_t len, size_t maxdepth) {
3333
delete pj_ptr;
3434
return NULL;
3535
}
36-
pj.string_buf = new u8[ROUNDUP_N(len, 64)];
37-
pj.number_buf = new u8[4 * ROUNDUP_N(len, 64)];
38-
pj.tape = new u64[ROUNDUP_N(len, 64)];
36+
size_t tapecapacity = ROUNDUP_N(len, 64);
37+
size_t stringcapacity = ROUNDUP_N(len, 64);
38+
pj.string_buf = new u8[stringcapacity];
39+
pj.tape = new u64[tapecapacity];
3940
pj.containing_scope_offset = new u32[maxdepth];
4041
pj.ret_address = new void*[maxdepth];
4142

42-
if ((pj.string_buf == NULL) || (pj.number_buf == NULL) || (pj.tape == NULL)
43+
if ((pj.string_buf == NULL) || (pj.tape == NULL)
4344
|| (pj.containing_scope_offset == NULL) || (pj.ret_address == NULL) ) {
4445
std::cerr << "Could not allocate memory"
4546
<< std::endl;
4647
delete[] pj.ret_address;
4748
delete[] pj.containing_scope_offset;
4849
delete[] pj.tape;
49-
delete[] pj.number_buf;
5050
delete[] pj.string_buf;
5151
delete[] pj.structural_indexes;
5252
delete[] pj.structurals;
@@ -56,6 +56,8 @@ ParsedJson *allocate_ParsedJson(size_t len, size_t maxdepth) {
5656

5757
pj.bytecapacity = len;
5858
pj.depthcapacity = maxdepth;
59+
pj.tapecapacity = tapecapacity;
60+
pj.stringcapacity = stringcapacity;
5961
return pj_ptr;
6062
}
6163

@@ -65,7 +67,6 @@ void deallocate_ParsedJson(ParsedJson *pj_ptr) {
6567
delete[] pj_ptr->ret_address;
6668
delete[] pj_ptr->containing_scope_offset;
6769
delete[] pj_ptr->tape;
68-
delete[] pj_ptr->number_buf;
6970
delete[] pj_ptr->string_buf;
7071
delete[] pj_ptr->structural_indexes;
7172
free(pj_ptr->structurals);

0 commit comments

Comments
 (0)