Skip to content

Commit f0d5337

Browse files
authored
Adding independent benchmarks using Google Benchmark (simdjson#826)
* Adding independent benchmarks using Google Benchmark
1 parent 4cd9de5 commit f0d5337

File tree

4 files changed

+122
-30
lines changed

4 files changed

+122
-30
lines changed

HACKING.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,21 @@ Other important files and directories:
4343
cardinal rule is don't regress performance without knowing exactly why, and what you're trading
4444
for it. If you're not sure what else to do to check your performance, this is always a good start:
4545
```bash
46-
make parse && ./parse jsonexamples/twitter.json
46+
mkdir build
47+
cd build
48+
cmake ..
49+
cmake --build . --config=Release
50+
benchmark/parse ../jsonexamples/twitter.json
4751
```
52+
The last line becomes `./benchmark/Release/parse.exe ../jsonexample/twitter.json` under Windows. You may also use Google Benchmark:
53+
```bash
54+
mkdir build
55+
cd build
56+
cmake .. -DSIMDJSON_GOOGLE_BENCHMARKS=ON
57+
cmake --build . --target bench_parse_call --config=Release
58+
./benchmark/bench_parse_call
59+
```
60+
The last line becomes `./benchmark/Release/bench_parse_call.exe` under Windows. Under Windows, you can also build with the clang compiler by adding `-T ClangCL` to the call to `cmake .. `.
4861
* **fuzz:** The source for fuzz testing. This lets us explore important edge and middle cases
4962
automatically, and is run in CI.
5063
* **jsonchecker:** A set of JSON files used to check different functionality of the parser.
@@ -55,7 +68,6 @@ Other important files and directories:
5568
* **singleheader:** Contains generated simdjson.h and simdjson.cpp that we release.
5669
* **test:** The tests are here. basictests.cpp and errortests.cpp are the primary ones.
5770
* **tools:** Source for executables that can be distributed with simdjson
58-
5971
> **Don't modify the files in singleheader/ directly; these are automatically generated.**
6072
>
6173
> While we distribute those files on release, we *maintain* the files under include/ and src/.

benchmark/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ if (SIMDJSON_GOOGLE_BENCHMARKS)
1919
add_executable(bench_parse_call bench_parse_call.cpp)
2020
add_executable(bench_dom_api bench_dom_api.cpp)
2121
target_link_libraries(bench_dom_api test-data)
22+
target_link_libraries(bench_parse_call test-data)
2223
endif()
2324

2425
if (SIMDJSON_COMPETITION)

benchmark/bench_dom_api.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ static void numbers_scan(State& state) {
2323
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
2424
return;
2525
}
26-
for (auto _ : state) {
26+
for (UNUSED auto _ : state) {
2727
std::vector<double> container;
2828
for (auto e : arr) {
2929
double x;
@@ -47,7 +47,7 @@ static void numbers_size_scan(State& state) {
4747
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
4848
return;
4949
}
50-
for (auto _ : state) {
50+
for (UNUSED auto _ : state) {
5151
std::vector<double> container;
5252
container.resize(arr.size());
5353
size_t pos = 0;
@@ -75,7 +75,7 @@ static void numbers_type_scan(State& state) {
7575
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
7676
return;
7777
}
78-
for (auto _ : state) {
78+
for (UNUSED auto _ : state) {
7979
std::vector<double> container;
8080
for (auto e : arr) {
8181
dom::element_type actual_type = e.type();
@@ -102,7 +102,7 @@ static void numbers_type_size_scan(State& state) {
102102
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
103103
return;
104104
}
105-
for (auto _ : state) {
105+
for (UNUSED auto _ : state) {
106106
std::vector<double> container;
107107
container.resize(arr.size());
108108
size_t pos = 0;
@@ -127,7 +127,7 @@ static void numbers_load_scan(State& state) {
127127
dom::parser parser;
128128
dom::array arr;
129129
simdjson::error_code error;
130-
for (auto _ : state) {
130+
for (UNUSED auto _ : state) {
131131
// this may hit the disk, but probably just once
132132
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
133133
if(error) {
@@ -152,7 +152,7 @@ static void numbers_load_size_scan(State& state) {
152152
dom::parser parser;
153153
dom::array arr;
154154
simdjson::error_code error;
155-
for (auto _ : state) {
155+
for (UNUSED auto _ : state) {
156156
// this may hit the disk, but probably just once
157157
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
158158
if(error) {
@@ -183,7 +183,7 @@ static void numbers_exceptions_scan(State& state) {
183183
// Prints the number of results in twitter.json
184184
dom::parser parser;
185185
dom::array arr = parser.load(NUMBERS_JSON);
186-
for (auto _ : state) {
186+
for (UNUSED auto _ : state) {
187187
std::vector<double> container;
188188
for (double x : arr) {
189189
container.push_back(x);
@@ -198,7 +198,7 @@ static void numbers_exceptions_size_scan(State& state) {
198198
// Prints the number of results in twitter.json
199199
dom::parser parser;
200200
dom::array arr = parser.load(NUMBERS_JSON);
201-
for (auto _ : state) {
201+
for (UNUSED auto _ : state) {
202202
std::vector<double> container;
203203
container.resize(arr.size());
204204
size_t pos = 0;
@@ -218,7 +218,7 @@ static void numbers_type_exceptions_scan(State& state) {
218218
// Prints the number of results in twitter.json
219219
dom::parser parser;
220220
dom::array arr = parser.load(NUMBERS_JSON);
221-
for (auto _ : state) {
221+
for (UNUSED auto _ : state) {
222222
std::vector<double> container;
223223
for (auto e : arr) {
224224
dom::element_type actual_type = e.type();
@@ -237,7 +237,7 @@ static void numbers_type_exceptions_size_scan(State& state) {
237237
// Prints the number of results in twitter.json
238238
dom::parser parser;
239239
dom::array arr = parser.load(NUMBERS_JSON);
240-
for (auto _ : state) {
240+
for (UNUSED auto _ : state) {
241241
std::vector<double> container;
242242
container.resize(arr.size());
243243
size_t pos = 0;
@@ -258,7 +258,7 @@ BENCHMARK(numbers_type_exceptions_size_scan);
258258
static void numbers_exceptions_load_scan(State& state) {
259259
// Prints the number of results in twitter.json
260260
dom::parser parser;
261-
for (auto _ : state) {
261+
for (UNUSED auto _ : state) {
262262
// this may hit the disk, but probably just once
263263
dom::array arr = parser.load(NUMBERS_JSON);
264264
std::vector<double> container;
@@ -274,7 +274,7 @@ BENCHMARK(numbers_exceptions_load_scan);
274274
static void numbers_exceptions_load_size_scan(State& state) {
275275
// Prints the number of results in twitter.json
276276
dom::parser parser;
277-
for (auto _ : state) {
277+
for (UNUSED auto _ : state) {
278278
// this may hit the disk, but probably just once
279279
dom::array arr = parser.load(NUMBERS_JSON);
280280
std::vector<double> container;
@@ -295,7 +295,7 @@ static void twitter_count(State& state) {
295295
// Prints the number of results in twitter.json
296296
dom::parser parser;
297297
dom::element doc = parser.load(TWITTER_JSON);
298-
for (auto _ : state) {
298+
for (UNUSED auto _ : state) {
299299
uint64_t result_count = doc["search_metadata"]["count"];
300300
if (result_count != 100) { return; }
301301
}
@@ -308,7 +308,7 @@ static void iterator_twitter_count(State& state) {
308308
// Prints the number of results in twitter.json
309309
padded_string json = padded_string::load(TWITTER_JSON);
310310
ParsedJson pj = build_parsed_json(json);
311-
for (auto _ : state) {
311+
for (UNUSED auto _ : state) {
312312
ParsedJson::Iterator iter(pj);
313313
// uint64_t result_count = doc["search_metadata"]["count"];
314314
if (!iter.move_to_key("search_metadata")) { return; }
@@ -326,7 +326,7 @@ static void twitter_default_profile(State& state) {
326326
// Count unique users with a default profile.
327327
dom::parser parser;
328328
dom::element doc = parser.load(TWITTER_JSON);
329-
for (auto _ : state) {
329+
for (UNUSED auto _ : state) {
330330
set<string_view> default_users;
331331
for (dom::object tweet : doc["statuses"].get<dom::array>()) {
332332
dom::object user = tweet["user"];
@@ -343,7 +343,7 @@ static void twitter_image_sizes(State& state) {
343343
// Count unique image sizes
344344
dom::parser parser;
345345
dom::element doc = parser.load(TWITTER_JSON);
346-
for (auto _ : state) {
346+
for (UNUSED auto _ : state) {
347347
set<tuple<uint64_t, uint64_t>> image_sizes;
348348
for (dom::object tweet : doc["statuses"].get<dom::array>()) {
349349
auto [media, not_found] = tweet["entities"]["media"];
@@ -366,7 +366,7 @@ static void error_code_twitter_count(State& state) noexcept {
366366
// Prints the number of results in twitter.json
367367
dom::parser parser;
368368
dom::element doc = parser.load(TWITTER_JSON);
369-
for (auto _ : state) {
369+
for (UNUSED auto _ : state) {
370370
auto [value, error] = doc["search_metadata"]["count"].get<uint64_t>();
371371
if (error) { return; }
372372
if (value != 100) { return; }
@@ -378,7 +378,7 @@ static void error_code_twitter_default_profile(State& state) noexcept {
378378
// Count unique users with a default profile.
379379
dom::parser parser;
380380
dom::element doc = parser.load(TWITTER_JSON);
381-
for (auto _ : state) {
381+
for (UNUSED auto _ : state) {
382382
set<string_view> default_users;
383383

384384
auto [tweets, error] = doc["statuses"].get<dom::array>();
@@ -406,7 +406,7 @@ static void iterator_twitter_default_profile(State& state) {
406406
// Count unique users with a default profile.
407407
padded_string json = padded_string::load(TWITTER_JSON);
408408
ParsedJson pj = build_parsed_json(json);
409-
for (auto _ : state) {
409+
for (UNUSED auto _ : state) {
410410
set<string_view> default_users;
411411
ParsedJson::Iterator iter(pj);
412412

@@ -445,7 +445,7 @@ static void error_code_twitter_image_sizes(State& state) noexcept {
445445
// Count unique image sizes
446446
dom::parser parser;
447447
dom::element doc = parser.load(TWITTER_JSON);
448-
for (auto _ : state) {
448+
for (UNUSED auto _ : state) {
449449
set<tuple<uint64_t, uint64_t>> image_sizes;
450450
auto [statuses, error] = doc["statuses"].get<dom::array>();
451451
if (error) { return; }
@@ -475,7 +475,7 @@ static void iterator_twitter_image_sizes(State& state) {
475475
// Count unique image sizes
476476
padded_string json = padded_string::load(TWITTER_JSON);
477477
ParsedJson pj = build_parsed_json(json);
478-
for (auto _ : state) {
478+
for (UNUSED auto _ : state) {
479479
set<tuple<uint64_t, uint64_t>> image_sizes;
480480
ParsedJson::Iterator iter(pj);
481481

@@ -534,7 +534,7 @@ static void print_json(State& state) noexcept {
534534
padded_string json = get_corpus(TWITTER_JSON);
535535
dom::parser parser;
536536
if (int error = json_parse(json, parser); error != SUCCESS) { cerr << error_message(error) << endl; return; }
537-
for (auto _ : state) {
537+
for (UNUSED auto _ : state) {
538538
std::stringstream s;
539539
if (!parser.print_json(s)) { cerr << "print_json failed" << endl; return; }
540540
}

benchmark/bench_parse_call.cpp

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,90 @@ using namespace benchmark;
55
using namespace std;
66

77
const padded_string EMPTY_ARRAY("[]", 2);
8+
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
9+
const char *GSOC_JSON = SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json";
10+
11+
12+
13+
static void parse_twitter(State& state) {
14+
dom::parser parser;
15+
padded_string docdata;
16+
simdjson::error_code error;
17+
padded_string::load(TWITTER_JSON).tie(docdata, error);
18+
if(error) {
19+
cerr << "could not parse twitter.json" << error << endl;
20+
return;
21+
}
22+
// we do not want mem. alloc. in the loop.
23+
error = parser.allocate(docdata.size());
24+
if(error) {
25+
cout << error << endl;
26+
return;
27+
}
28+
size_t bytes = 0;
29+
for (UNUSED auto _ : state) {
30+
dom::element doc;
31+
bytes += docdata.size();
32+
parser.parse(docdata).tie(doc,error);
33+
if(error) {
34+
cerr << "could not parse twitter.json" << error << endl;
35+
return;
36+
}
37+
benchmark::DoNotOptimize(doc);
38+
}
39+
state.counters["Bytes"] = benchmark::Counter(
40+
double(bytes), benchmark::Counter::kIsRate,
41+
benchmark::Counter::OneK::kIs1024);
42+
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
43+
}
44+
BENCHMARK(parse_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
45+
return *(std::max_element(std::begin(v), std::end(v)));
46+
})->DisplayAggregatesOnly(true);
47+
48+
49+
static void parse_gsoc(State& state) {
50+
dom::parser parser;
51+
padded_string docdata;
52+
simdjson::error_code error;
53+
padded_string::load(GSOC_JSON).tie(docdata, error);
54+
if(error) {
55+
cerr << "could not parse gsoc-2018.json" << error << endl;
56+
return;
57+
}
58+
// we do not want mem. alloc. in the loop.
59+
error = parser.allocate(docdata.size());
60+
if(error) {
61+
cout << error << endl;
62+
return;
63+
}
64+
size_t bytes = 0;
65+
for (UNUSED auto _ : state) {
66+
dom::element doc;
67+
bytes += docdata.size();
68+
parser.parse(docdata).tie(doc,error);
69+
if(error) {
70+
cerr << "could not parse gsoc-2018.json" << error << endl;
71+
return;
72+
}
73+
benchmark::DoNotOptimize(doc);
74+
}
75+
state.counters["Bytes"] = benchmark::Counter(
76+
double(bytes), benchmark::Counter::kIsRate,
77+
benchmark::Counter::OneK::kIs1024);
78+
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
79+
}
80+
BENCHMARK(parse_gsoc)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
81+
return *(std::max_element(std::begin(v), std::end(v)));
82+
})->DisplayAggregatesOnly(true);
83+
84+
885

986
SIMDJSON_PUSH_DISABLE_WARNINGS
1087
SIMDJSON_DISABLE_DEPRECATED_WARNING
1188
static void json_parse(State& state) {
1289
ParsedJson pj;
1390
if (!pj.allocate_capacity(EMPTY_ARRAY.length())) { return; }
14-
for (auto _ : state) {
91+
for (UNUSED auto _ : state) {
1592
auto error = json_parse(EMPTY_ARRAY, pj);
1693
if (error) { return; }
1794
}
@@ -21,7 +98,7 @@ BENCHMARK(json_parse);
2198
static void parser_parse_error_code(State& state) {
2299
dom::parser parser;
23100
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
24-
for (auto _ : state) {
101+
for (UNUSED auto _ : state) {
25102
auto error = parser.parse(EMPTY_ARRAY).error();
26103
if (error) { return; }
27104
}
@@ -30,10 +107,11 @@ BENCHMARK(parser_parse_error_code);
30107
static void parser_parse_exception(State& state) {
31108
dom::parser parser;
32109
if (parser.allocate(EMPTY_ARRAY.length())) { return; }
33-
for (auto _ : state) {
110+
for (UNUSED auto _ : state) {
34111
try {
35112
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
36113
} catch(simdjson_error &j) {
114+
cout << j.what() << endl;
37115
return;
38116
}
39117
}
@@ -43,27 +121,28 @@ BENCHMARK(parser_parse_exception);
43121
SIMDJSON_PUSH_DISABLE_WARNINGS
44122
SIMDJSON_DISABLE_DEPRECATED_WARNING
45123
static void build_parsed_json(State& state) {
46-
for (auto _ : state) {
124+
for (UNUSED auto _ : state) {
47125
dom::parser parser = simdjson::build_parsed_json(EMPTY_ARRAY);
48126
if (!parser.valid) { return; }
49127
}
50128
}
51129
SIMDJSON_POP_DISABLE_WARNINGS
52130
BENCHMARK(build_parsed_json);
53131
static void document_parse_error_code(State& state) {
54-
for (auto _ : state) {
132+
for (UNUSED auto _ : state) {
55133
dom::parser parser;
56134
auto error = parser.parse(EMPTY_ARRAY).error();
57135
if (error) { return; }
58136
}
59137
}
60138
BENCHMARK(document_parse_error_code);
61139
static void document_parse_exception(State& state) {
62-
for (auto _ : state) {
140+
for (UNUSED auto _ : state) {
63141
try {
64142
dom::parser parser;
65143
UNUSED dom::element doc = parser.parse(EMPTY_ARRAY);
66144
} catch(simdjson_error &j) {
145+
cout << j.what() << endl;
67146
return;
68147
}
69148
}

0 commit comments

Comments
 (0)