Skip to content

Commit c2913d5

Browse files
committed
Adding dynamic memory allocation.
1 parent 8589a05 commit c2913d5

7 files changed

Lines changed: 93 additions & 24 deletions

File tree

README.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,29 @@ Goal: Speed up the parsing of JSON per se.
1414
const char * filename = ... //
1515
std::string_view p = get_corpus(filename);
1616
ParsedJson pj;
17-
size_t maxdepth = 1024; // support documents have nesting "depth" up to 1024
18-
pj.allocateCapacity(p.size(), maxdepth); // allocate memory for parsing up to p.size() bytes
17+
pj.allocateCapacity(p.size()); // allocate memory for parsing up to p.size() bytes
1918
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
2019
// parsing is done!
2120
// js can be reused with other json_parse calls.
2221
```
2322

23+
It is also possible to use a simply API if you do not mind having the overhead
24+
of memory allocation:
25+
26+
```C
27+
#include "simdjson/jsonparser.h"
28+
29+
/...
30+
31+
const char * filename = ... //
32+
std::string_view p = get_corpus(filename);
33+
ParsedJson pj = build_parsed_json(p); // do the parsing
34+
if( ! pj.isValid() ) {
35+
// something went wrong
36+
}
37+
```
38+
39+
ParsedJson build_parsed_json(const std::string_view &s)
2440

2541

2642
## Usage

benchmark/parse.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ int main(int argc, char *argv[]) {
7878
}
7979
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.size() << " bytes)" << endl;
8080
ParsedJson pj;
81-
bool allocok = pj.allocateCapacity(p.size(), 1024);
81+
bool allocok = pj.allocateCapacity(p.size());
8282
if(!allocok) {
8383
std::cerr << "failed to allocate memory" << std::endl;
8484
return EXIT_FAILURE;

benchmark/parsingcompetition.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ int main(int argc, char *argv[]) {
8989
int repeat = 10;
9090
int volume = p.size();
9191
BEST_TIME("simdjson", json_parse(p, pj), true, , repeat, volume, true);
92+
BEST_TIME("simdjson (with dyn alloc) ", build_parsed_json(p).isValid(), true, , repeat, volume, true);
9293

9394
rapidjson::Document d;
9495

include/simdjson/jsonparser.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
#include "simdjson/stage34_unified.h"
99

1010
// Parse a document found in buf, need to preallocate ParsedJson.
11-
// Return false in case of a failure.
11+
// Return false in case of a failure. You can also check validity
12+
// by calling pj.isValid(). The same ParsedJson can be reused.
1213
// The string should be NULL terminated.
1314
WARN_UNUSED
1415
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);
@@ -23,3 +24,20 @@ WARN_UNUSED
2324
static inline bool json_parse(const std::string_view &s, ParsedJson &pj) {
2425
return json_parse(s.data(), s.size(), pj);
2526
}
27+
28+
29+
// Build a ParsedJson object. You can check validity
30+
// by calling pj.isValid(). This does memory allocation.
31+
WARN_UNUSED
32+
ParsedJson build_parsed_json(const u8 *buf, size_t len);
33+
34+
WARN_UNUSED
35+
static inline ParsedJson build_parsed_json(const char * buf, size_t len) {
36+
return build_parsed_json((const u8 *) buf, len);
37+
}
38+
39+
// convenience function
40+
WARN_UNUSED
41+
static inline ParsedJson build_parsed_json(const std::string_view &s) {
42+
return build_parsed_json(s.data(), s.size());
43+
}

include/simdjson/parsedjson.h

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,39 +14,25 @@
1414

1515
#include "simdjson/jsonformatutils.h"
1616

17-
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF;
17+
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF
18+
19+
#define DEFAULTMAXDEPTH 1024// a JSON document with a depth exceeding 1024 is probably de facto invalid
1820

1921
struct ParsedJson {
2022
public:
21-
size_t bytecapacity; // indicates how many bits are meant to be supported by
22-
// structurals
23-
size_t depthcapacity; // how deep we can go
24-
size_t tapecapacity;
25-
size_t stringcapacity;
26-
u32 current_loc;
27-
u8 *structurals;
28-
u32 n_structural_indexes;
29-
u32 *structural_indexes;
30-
31-
u64 *tape;
32-
u32 *containing_scope_offset;
33-
void **ret_address;
34-
35-
u8 *string_buf; // should be at least bytecapacity
36-
u8 *current_string_buf_loc;
3723

3824
// create a ParsedJson container with zero capacity, call allocateCapacity to
3925
// allocate memory
4026
ParsedJson()
4127
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
4228
current_loc(0), structurals(NULL), n_structural_indexes(0),
4329
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
44-
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL) {}
30+
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
4531

4632
// if needed, allocate memory so that the object is able to process JSON
4733
// documents having up to len butes and maxdepth "depth"
4834
WARN_UNUSED
49-
inline bool allocateCapacity(size_t len, size_t maxdepth) {
35+
inline bool allocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) {
5036
if ((maxdepth == 0) || (len == 0)) {
5137
std::cerr << "capacities must be non-zero " << std::endl;
5238
return false;
@@ -56,6 +42,7 @@ struct ParsedJson {
5642
return true;
5743
deallocate();
5844
}
45+
isvalid = false;
5946
bytecapacity = 0; // will only set it to len after allocations are a success
6047
if (posix_memalign((void **)&structurals, 8, ROUNDUP_N(len, 64) / 8)) {
6148
std::cerr << "Could not allocate memory for structurals" << std::endl;
@@ -97,6 +84,10 @@ struct ParsedJson {
9784
return true;
9885
}
9986

87+
bool isValid() const {
88+
return isvalid;
89+
}
90+
10091
// deallocate memory and set capacity to zero, called automatically by the
10192
// destructor
10293
void deallocate() {
@@ -110,6 +101,7 @@ struct ParsedJson {
110101
delete[] string_buf;
111102
delete[] structural_indexes;
112103
free(structurals);
104+
isvalid = false;
113105
}
114106

115107
~ParsedJson() { deallocate(); }
@@ -118,13 +110,15 @@ struct ParsedJson {
118110
void init() {
119111
current_string_buf_loc = string_buf;
120112
current_loc = 0;
113+
isvalid = false;
121114
}
122115

123116
// print the json to stdout (should be valid)
124117
// return false if the tape is likely wrong (e.g., you did not parse a valid
125118
// JSON).
126119
WARN_UNUSED
127120
bool printjson() {
121+
if(!isvalid) return false;
128122
size_t tapeidx = 0;
129123
u64 tape_val = tape[tapeidx];
130124
u8 type = (tape_val >> 56);
@@ -227,6 +221,7 @@ struct ParsedJson {
227221

228222
WARN_UNUSED
229223
bool dump_raw_tape() {
224+
if(!isvalid) return false;
230225
size_t tapeidx = 0;
231226
u64 tape_val = tape[tapeidx++];
232227
u8 type = (tape_val >> 56);
@@ -374,6 +369,32 @@ struct ParsedJson {
374369
};
375370

376371
#endif
372+
373+
size_t bytecapacity; // indicates how many bits are meant to be supported by
374+
// structurals
375+
376+
size_t depthcapacity; // how deep we can go
377+
size_t tapecapacity;
378+
size_t stringcapacity;
379+
u32 current_loc;
380+
u8 *structurals;
381+
u32 n_structural_indexes;
382+
383+
u32 *structural_indexes;
384+
385+
u64 *tape;
386+
u32 *containing_scope_offset;
387+
void **ret_address;
388+
389+
u8 *string_buf; // should be at least bytecapacity
390+
u8 *current_string_buf_loc;
391+
bool isvalid;
392+
ParsedJson(const ParsedJson && p); // we don't want the default constructor to be called
393+
394+
private :
395+
ParsedJson(const ParsedJson & p); // we don't want the default constructor to be called
396+
397+
377398
};
378399

379400
#ifdef DEBUG

src/jsonparser.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,15 @@ bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
2323
return isok;
2424
}
2525

26+
WARN_UNUSED
27+
ParsedJson build_parsed_json(const u8 *buf, size_t len) {
28+
ParsedJson pj;
29+
bool ok = pj.allocateCapacity(len);
30+
if(ok) {
31+
ok = json_parse(buf, len, pj);
32+
assert(ok == pj.isValid());
33+
} else {
34+
std::cerr << "failure during memory allocation " << std::endl;
35+
}
36+
return pj;
37+
}

src/stage34_unified.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,8 +432,9 @@ bool unified_machine(const u8 *buf, size_t len, ParsedJson &pj) {
432432

433433

434434
#ifdef DEBUG
435-
pj.dump_tapes();
435+
pj.dump_raw_tape();
436436
#endif
437+
pj.isvalid = true;
437438
return true;
438439

439440
fail:

0 commit comments

Comments
 (0)