Skip to content

Commit d140bc2

Browse files
committed
Automatically allocate memory as needed in parse
1 parent 00f0859 commit d140bc2

18 files changed

Lines changed: 317 additions & 202 deletions

README.md

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -168,23 +168,48 @@ document doc = document::parse(get_corpus(filename));
168168
doc.print_json(cout);
169169
```
170170

171-
If you're using simdjson to parse multiple documents, or in a loop, you should allocate a parser once and reuse it (allocation is slow, do it as little as possible!):
171+
### Reusing the parser for maximum efficiency
172+
173+
If you're using simdjson to parse multiple documents, or in a loop, you should make a parser once
174+
and reuse it. simdjson will allocate and retain internal buffers between parses, keeping buffers
175+
hot in cache and keeping allocation to a minimum.
172176

173177
```c++
174-
// Allocate a parser big enough for all files
175178
document::parser parser;
176-
if (!parser.allocate_capacity(1024*1024)) { exit(1); }
177-
178-
// Read files with the parser, one by one
179179
for (padded_string json : { string("[1, 2, 3]"), string("true"), string("[ true, false ]") }) {
180-
cout << "Parsing " << json.data() << " ..." << endl;
181-
auto [doc, error] = parser.parse(json);
182-
if (error) { cerr << "Error: " << error << endl; exit(1); }
180+
document& doc = parser.parse(json);
183181
doc.print_json(cout);
184-
cout << endl;
185182
}
186183
```
187184

185+
If you are running a server loop and want to limit the document size to keep server memory constant,
186+
you can set a maximum capacity:
187+
188+
```c++
189+
document::parser parser(1024*1024); // Set max capacity to 1MB
190+
for (int i=0;i<argc;i++) {
191+
auto [doc, error] = parser.parse(get_corpus(argv[i]));
192+
if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); }
193+
if (error) { cerr << error << endl; exit(1); }
194+
doc.print_json(cout);
195+
}
196+
```
197+
198+
If you want absolutely constant memory usage, you can even allocate the capacity yourself at the
199+
beginning:
200+
201+
```c++
202+
document::parser parser(0); // This parser is not allowed to auto-allocate
203+
auto alloc_error = parser.set_capacity(1024*1024); // Set initial capacity to 1MB
204+
if (alloc_error) { exit(1); };
205+
206+
for (int i=0;i<argc;i++) {
207+
auto [doc, error] = parser.parse(get_corpus(argv[i]));
208+
if (error == CAPACITY) { cerr << "JSON files larger than 1MB are not supported!" << endl; exit(1); }
209+
if (error) { cerr << error << endl; exit(1); }
210+
doc.print_json(cout);
211+
}
212+
```
188213

189214
## Newline-Delimited JSON (ndjson) and JSON lines
190215

benchmark/bench_parse_call.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ const padded_string EMPTY_ARRAY("[]", 2);
88

99
static void json_parse(State& state) {
1010
document::parser parser;
11-
if (!parser.allocate_capacity(EMPTY_ARRAY.length())) { return; }
11+
if (parser.set_capacity(EMPTY_ARRAY.length())) { return; }
1212
for (auto _ : state) {
1313
auto error = simdjson::json_parse(EMPTY_ARRAY, parser);
1414
if (error) { return; }
@@ -17,7 +17,7 @@ static void json_parse(State& state) {
1717
BENCHMARK(json_parse);
1818
static void parser_parse_error_code(State& state) {
1919
document::parser parser;
20-
if (!parser.allocate_capacity(EMPTY_ARRAY.length())) { return; }
20+
if (parser.set_capacity(EMPTY_ARRAY.length())) { return; }
2121
for (auto _ : state) {
2222
auto [doc, error] = parser.parse(EMPTY_ARRAY);
2323
if (error) { return; }
@@ -26,7 +26,7 @@ static void parser_parse_error_code(State& state) {
2626
BENCHMARK(parser_parse_error_code);
2727
static void parser_parse_exception(State& state) {
2828
document::parser parser;
29-
if (!parser.allocate_capacity(EMPTY_ARRAY.length())) { return; }
29+
if (parser.set_capacity(EMPTY_ARRAY.length())) { return; }
3030
for (auto _ : state) {
3131
try {
3232
UNUSED document &doc = parser.parse(EMPTY_ARRAY);

benchmark/benchmarker.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ struct benchmarker {
298298
// Allocate document::parser
299299
collector.start();
300300
document::parser parser;
301-
bool allocok = parser.allocate_capacity(json.size());
301+
error_code error = parser.set_capacity(json.size());
302302
event_count allocate_count = collector.end();
303303
allocate_stage << allocate_count;
304304
// Run it once to get hot buffers
@@ -309,14 +309,14 @@ struct benchmarker {
309309
}
310310
}
311311

312-
if (!allocok) {
312+
if (error) {
313313
exit_error(string("Unable to allocate_stage ") + to_string(json.size()) + " bytes for the JSON result.");
314314
}
315315
verbose() << "[verbose] allocated memory for parsed JSON " << endl;
316316

317317
// Stage 1 (find structurals)
318318
collector.start();
319-
error_code error = active_implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
319+
error = active_implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
320320
event_count stage1_count = collector.end();
321321
stage1 << stage1_count;
322322
if (error) {

benchmark/parse_stream.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ int main (int argc, char *argv[]){
3939
for (auto i = 0; i < 3; i++) {
4040
//Actual test
4141
simdjson::document::parser parser;
42-
bool allocok = parser.allocate_capacity(p.size());
43-
if (!allocok) {
44-
std::cerr << "failed to allocate memory" << std::endl;
42+
simdjson::error_code alloc_error = parser.set_capacity(p.size());
43+
if (alloc_error) {
44+
std::cerr << alloc_error << std::endl;
4545
return EXIT_FAILURE;
4646
}
4747
std::istringstream ss(std::string(p.data(), p.size()));

0 commit comments

Comments
 (0)