Skip to content

Commit 5fae7b2

Browse files
committed
Still working
1 parent 50defa5 commit 5fae7b2

15 files changed

+134
-91
lines changed

benchmark/minifiercompetition.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,11 @@ int main(int argc, char *argv[]) {
113113

114114
BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.second), repeat, volume, true);
115115

116-
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
116+
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
117117
ParsedJson &pj(*pj_ptr);
118118
BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.second, pj), true, memcpy(buffer, p.first, p.second), repeat, volume, true);
119119

120-
ParsedJson *pj_ptr2 = allocate_ParsedJson(p.second);
120+
ParsedJson *pj_ptr2 = allocate_ParsedJson(p.second, 1024);
121121
ParsedJson &pj2(*pj_ptr2);
122122

123123

benchmark/parse.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ int main(int argc, char *argv[]) {
126126
if(verbose) cout << "[verbose] loading " << filename << endl;
127127
pair<u8 *, size_t> p = get_corpus(filename);
128128
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
129-
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
129+
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
130130
ParsedJson &pj(*pj_ptr);
131131
if(verbose) cout << "[verbose] allocated memory for parsed JSON " << endl;
132132

benchmark/parsingcompetition.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ int main(int argc, char *argv[]) {
7373
std::cout << p.second << " B ";
7474
std::cout << std::endl;
7575
}
76-
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
76+
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
7777
if (pj_ptr == NULL) {
7878
std::cerr << "can't allocate memory" << std::endl;
7979
return EXIT_FAILURE;

include/jsonparser/jsonparser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// Return NULL if memory cannot be allocated.
1313
// This structure is meant to be reused from document to document, as needed.
1414
// you can use deallocate_ParsedJson to deallocate the memory.
15-
ParsedJson *allocate_ParsedJson(size_t len);
15+
ParsedJson *allocate_ParsedJson(size_t len, size_t maxdepth);
1616

1717
// deallocate a ParsedJson struct (see allocate_ParsedJson)
1818
void deallocate_ParsedJson(ParsedJson *pj_ptr);

include/jsonparser/numberparsing.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
237237
exponent += (negexp ? -expnumber : expnumber);
238238
}
239239
if (i == 0) {
240-
pj.write_tape_double(depth, 0.0);
240+
pj.write_tape_double(0.0);
241241
#ifdef JSON_TEST_NUMBERS // for unit testing
242242
foundFloat(0.0, buf + offset);
243243
#endif
@@ -252,7 +252,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
252252
double d = i;
253253
d *= power_of_ten[308 + exponent];
254254
d = negative ? -d : d;
255-
pj.write_tape_double(depth, d);
255+
pj.write_tape_double(d);
256256
#ifdef JSON_TEST_NUMBERS // for unit testing
257257
foundFloat(d, buf + offset);
258258
#endif
@@ -325,7 +325,7 @@ static never_inline bool parse_large_integer(const u8 *const buf,
325325
}
326326
}
327327
int64_t signed_answer = negative ? -i : i;
328-
pj.write_tape_s64(depth, signed_answer);
328+
pj.write_tape_s64(signed_answer);
329329
#ifdef JSON_TEST_NUMBERS // for unit testing
330330
foundInteger(signed_answer, buf + offset);
331331
#endif
@@ -468,7 +468,7 @@ static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
468468
// We want 0.1e1 to be a float.
469469
//////////
470470
if (i == 0) {
471-
pj.write_tape_double(depth, 0.0);
471+
pj.write_tape_double(0.0);
472472
#ifdef JSON_TEST_NUMBERS // for unit testing
473473
foundFloat(0.0, buf + offset);
474474
#endif
@@ -483,7 +483,7 @@ static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
483483
double d = i;
484484
d *= power_of_ten[308 + exponent];
485485
// d = negative ? -d : d;
486-
pj.write_tape_double(depth, d);
486+
pj.write_tape_double(d);
487487
#ifdef JSON_TEST_NUMBERS // for unit testing
488488
foundFloat(d, buf + offset);
489489
#endif
@@ -493,7 +493,7 @@ static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
493493
return parse_large_integer(buf, len, pj, depth, offset, found_zero,
494494
found_minus);
495495
}
496-
pj.write_tape_s64(depth, i);
496+
pj.write_tape_s64(i);
497497
#ifdef JSON_TEST_NUMBERS // for unit testing
498498
foundInteger(i, buf + offset);
499499
#endif

include/jsonparser/simdjson_internal.h

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,26 @@ struct ParsedJson {
3232
size_t bytecapacity; // indicates how many bits are meant to be supported by
3333
// structurals
3434
size_t depthcapacity; // how deep we can go
35+
36+
u32 current_loc;
3537
u8 *structurals;
3638
u32 n_structural_indexes;
3739
u32 *structural_indexes;
3840

3941
u64 * tape;//[MAX_TAPE];
40-
u32 * tape_locs;
42+
u32 * containing_scope_offset;
43+
void * * ret_address;
44+
4145
u8 * string_buf;// should be at least bytecapacity
46+
4247
u8 *current_string_buf_loc;
4348
u8 * number_buf;// holds either doubles or longs, really // should be at least 4 * bytecapacity
4449
u8 *current_number_buf_loc;
4550

4651
void init() {
4752
current_string_buf_loc = string_buf;
4853
current_number_buf_loc = number_buf;
54+
current_loc = 0;
4955

5056
//for (u32 i = 0; i < MAX_DEPTH; i++) {
5157
// tape_locs[i] = i * MAX_TAPE_ENTRIES;
@@ -75,33 +81,53 @@ struct ParsedJson {
7581
}
7682
}*/
7783
}
78-
// TODO: will need a way of saving strings that's a bit more encapsulated
7984

80-
void write_tape(u32 depth, u64 val, u8 c) {
81-
tape[tape_locs[depth]] = val | (((u64)c) << 56);
82-
tape_locs[depth]++;
85+
// all elements are stored on the tape using a 64-bit word.
86+
//
87+
// strings, double and ints are stored as
88+
// a 64-bit word with a pointer to the actual value
89+
//
90+
//
91+
//
92+
// for objects or arrays, store [ or { at the beginning and } and ] at the end.
93+
// For the openings ([ or {), we annotate them with a reference to the location on the tape of
94+
// the end, and for then closings (} and ]), we annotate them with a reference to the
95+
// location of the opening
96+
//
97+
//
98+
99+
// this should be considered a private function
100+
void write_tape(u64 val, u8 c) {
101+
tape[current_loc++] = val | (((u64)c) << 56);
102+
//tape[tape_locs[depth]] = val | (((u64)c) << 56);
103+
//tape_locs[depth]++;
83104
}
84105

85-
void write_tape_s64(u32 depth, s64 i) {
106+
107+
void write_tape_s64(s64 i) {
86108
*((s64 *)current_number_buf_loc) = i;
87109
current_number_buf_loc += 8;
88-
write_tape(depth, current_number_buf_loc - number_buf, 'l');
110+
write_tape(current_number_buf_loc - number_buf, 'l');
89111
}
90112

91-
void write_tape_double(u32 depth, double d) {
113+
void write_tape_double(double d) {
92114
*((double *)current_number_buf_loc) = d;
93115
current_number_buf_loc += 8;
94-
write_tape(depth, current_number_buf_loc - number_buf, 'd');
116+
write_tape(current_number_buf_loc - number_buf, 'd');
95117
}
96118

97-
u32 save_loc(u32 depth) {
98-
return tape_locs[depth];
119+
u32 get_current_loc() {
120+
return current_loc;
99121
}
100122

101-
void write_saved_loc(u32 saved_loc, u64 val, u8 c) {
102-
tape[saved_loc] = val | (((u64)c) << 56);
123+
void annotate_previousloc(u32 saved_loc,u64 val) {
124+
tape[saved_loc] |= val;
103125
}
104126

127+
/*void write_saved_loc(u32 saved_loc, u64 val, u8 c) {
128+
tape[saved_loc] = val | (((u64)c) << 56);
129+
}*/
130+
105131
// public interface
106132
#if 1
107133

@@ -121,13 +147,13 @@ struct ParsedJson {
121147
bool prev(); // valid if we're not at the start of a scope
122148
bool up(); // valid if we are at depth != 0
123149
bool down(); // valid if we're at a [ or { call site; moves us to header of that scope
124-
void to_start_scope(); // move us to the start of our current scope; always succeeds
125-
void to_end_scope(); // move us to the start of our current scope; always succeeds
150+
//void to_start_scope(); // move us to the start of our current scope; always succeeds
151+
//void to_end_scope(); // move us to the start of our current scope; always succeeds
126152

127153
// these navigation elements move us across scope if need be, so allow us to iterate over
128154
// everything at a given depth
129-
bool next_flat(); // valid if we're not at the end of a tape
130-
bool prev_flat(); // valid if we're not at the start of a tape
155+
//bool next_flat(); // valid if we're not at the end of a tape
156+
//bool prev_flat(); // valid if we're not at the start of a tape
131157

132158
void print(std::ostream & os); // print the thing we're currently pointing at
133159
u8 get_type(); // retrieve the character code of what we're looking at: [{"sltfn are the possibilities

include/jsonparser/stringparsing.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
114114
// we encountered quotes first. Move dst to point to quotes and exit
115115
dst[quote_dist] = 0; // null terminate and get out
116116

117-
pj.write_tape(depth, pj.current_string_buf_loc - pj.string_buf, '"');
117+
pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"');
118118

119119
pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
120120
#ifdef CHECKUNESCAPED

src/jsonioutil.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ std::pair<u8 *, size_t> get_corpus(std::string filename) {
2323
size_t length = buffer.str().size(); // +1 for null
2424
u8* aligned_buffer = (u8 *)allocate_aligned_buffer(length);
2525
memcpy(aligned_buffer, buffer.str().c_str(), length);
26+
aligned_buffer[length] = '\0';
2627
is.close();
2728
return std::make_pair((u8 *)aligned_buffer, length);
2829
}

src/jsonparser.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
// returns NULL if memory cannot be allocated
66
// This structure is meant to be reused from document to document, as needed.
77
// you can use deallocate_ParsedJson to deallocate the memory.
8-
ParsedJson *allocate_ParsedJson(size_t len) {
8+
ParsedJson *allocate_ParsedJson(size_t len, size_t maxdepth) {
9+
if((maxdepth == 0) || (len == 0)) {
10+
std::cerr << "capacities must be non-zero " << std::endl;
11+
return NULL;
12+
}
913
ParsedJson *pj_ptr = new ParsedJson;
1014
if (pj_ptr == NULL) {
1115
std::cerr << "Could not allocate memory for core struct." << std::endl;
@@ -32,13 +36,15 @@ ParsedJson *allocate_ParsedJson(size_t len) {
3236
pj.string_buf = new u8[ROUNDUP_N(len, 64)];
3337
pj.number_buf = new u8[4 * ROUNDUP_N(len, 64)];
3438
pj.tape = new u64[ROUNDUP_N(len, 64)];
35-
size_t depthcapacity = ROUNDUP_N(len, 64);
36-
pj.tape_locs = new u32[depthcapacity];
39+
pj.containing_scope_offset = new u32[maxdepth];
40+
pj.ret_address = new void*[maxdepth];
3741

38-
if ((pj.string_buf == NULL) || (pj.number_buf == NULL) || (pj.tape == NULL) || (pj.tape_locs == NULL)) {
42+
if ((pj.string_buf == NULL) || (pj.number_buf == NULL) || (pj.tape == NULL)
43+
|| (pj.containing_scope_offset == NULL) || (pj.ret_address == NULL) ) {
3944
std::cerr << "Could not allocate memory"
4045
<< std::endl;
41-
delete[] pj.tape_locs;
46+
delete[] pj.ret_address;
47+
delete[] pj.containing_scope_offset;
4248
delete[] pj.tape;
4349
delete[] pj.number_buf;
4450
delete[] pj.string_buf;
@@ -49,14 +55,15 @@ ParsedJson *allocate_ParsedJson(size_t len) {
4955
}
5056

5157
pj.bytecapacity = len;
52-
pj.depthcapacity = depthcapacity;
58+
pj.depthcapacity = maxdepth;
5359
return pj_ptr;
5460
}
5561

5662
void deallocate_ParsedJson(ParsedJson *pj_ptr) {
5763
if (pj_ptr == NULL)
5864
return;
59-
delete[] pj_ptr->tape_locs;
65+
delete[] pj_ptr->ret_address;
66+
delete[] pj_ptr->containing_scope_offset;
6067
delete[] pj_ptr->tape;
6168
delete[] pj_ptr->number_buf;
6269
delete[] pj_ptr->string_buf;

src/stage1_find_marks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ WARN_UNUSED
251251
*(u64 *)(pj.structurals + idx / 8) = structurals;
252252
}
253253
if(buf[len] != '\0') {
254-
std::cerr << "Your string should NULL terminated." << std::endl;
254+
std::cerr << "Your string should be NULL terminated." << std::endl;
255255
return false;
256256

257257
}

0 commit comments

Comments
 (0)