Skip to content

Commit 0b21203

Browse files
committed
Document navigation API
1 parent 9a9ca97 commit 0b21203

14 files changed

Lines changed: 1926 additions & 189 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ objs
5757
/allparsingcompetition
5858
/basictests
5959
/benchfeatures
60+
/benchmark/bench_dom_api
6061
/benchmark/bench_parse_call
6162
/benchmark/get_corpus_benchmark
6263
/benchmark/parse

benchmark/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ add_executable(perfdiff perfdiff.cpp)
1212

1313
# Google Benchmarks
1414
if (SIMDJSON_GOOGLE_BENCHMARKS)
15-
add_cpp_benchmark(bench_parse_call bench_parse_call.cpp)
15+
add_cpp_benchmark(bench_parse_call)
1616
target_link_libraries(bench_parse_call benchmark::benchmark)
17+
18+
add_cpp_benchmark(bench_dom_api)
19+
target_link_libraries(bench_dom_api benchmark::benchmark)
20+
target_compile_definitions(bench_dom_api PRIVATE JSON_TEST_PATH="${PROJECT_SOURCE_DIR}/jsonexamples/twitter.json")
1721
endif()

benchmark/bench_dom_api.cpp

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
#include <benchmark/benchmark.h>
2+
#include "simdjson/document.h"
3+
#include "simdjson/jsonparser.h"
4+
using namespace simdjson;
5+
using namespace benchmark;
6+
using namespace std;
7+
8+
#ifndef JSON_TEST_PATH
9+
#define JSON_TEST_PATH "jsonexamples/twitter.json"
10+
#endif
11+
12+
const padded_string EMPTY_ARRAY("[]", 2);
13+
14+
static void twitter_count(State& state) {
15+
// Prints the number of results in twitter.json
16+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
17+
for (auto _ : state) {
18+
uint64_t result_count = doc["search_metadata"]["count"];
19+
if (result_count != 100) { return; }
20+
}
21+
}
22+
BENCHMARK(twitter_count);
23+
24+
static void error_code_twitter_count(State& state) noexcept {
25+
// Prints the number of results in twitter.json
26+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
27+
for (auto _ : state) {
28+
auto [value, error] = doc["search_metadata"]["count"];
29+
if (error) { return; }
30+
if (uint64_t(value) != 100) { return; }
31+
}
32+
}
33+
BENCHMARK(error_code_twitter_count);
34+
35+
static void iterator_twitter_count(State& state) {
36+
// Prints the number of results in twitter.json
37+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
38+
for (auto _ : state) {
39+
document::iterator iter(doc);
40+
// uint64_t result_count = doc["search_metadata"]["count"];
41+
if (!iter.move_to_key("search_metadata")) { return; }
42+
if (!iter.move_to_key("count")) { return; }
43+
if (!iter.is_integer()) { return; }
44+
int64_t result_count = iter.get_integer();
45+
46+
if (result_count != 100) { return; }
47+
}
48+
}
49+
BENCHMARK(iterator_twitter_count);
50+
51+
static void twitter_default_profile(State& state) {
52+
// Count unique users with a default profile.
53+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
54+
for (auto _ : state) {
55+
set<string_view> default_users;
56+
for (document::object tweet : doc["statuses"].as_array()) {
57+
document::object user = tweet["user"];
58+
if (user["default_profile"]) {
59+
default_users.insert(user["screen_name"]);
60+
}
61+
}
62+
if (default_users.size() != 86) { return; }
63+
}
64+
}
65+
BENCHMARK(twitter_default_profile);
66+
67+
static void error_code_twitter_default_profile(State& state) noexcept {
68+
// Count unique users with a default profile.
69+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
70+
for (auto _ : state) {
71+
set<string_view> default_users;
72+
73+
auto [tweets, error] = doc["statuses"].as_array();
74+
if (error) { return; }
75+
for (document::element tweet : tweets) {
76+
auto [user, error2] = tweet["user"].as_object();
77+
if (error2) { return; }
78+
auto [default_profile, error3] = user["default_profile"].as_bool();
79+
if (error3) { return; }
80+
if (default_profile) {
81+
auto [screen_name, error4] = user["screen_name"].as_string();
82+
if (error4) { return; }
83+
default_users.insert(screen_name);
84+
}
85+
}
86+
87+
if (default_users.size() != 86) { return; }
88+
}
89+
}
90+
BENCHMARK(error_code_twitter_default_profile);
91+
92+
static void iterator_twitter_default_profile(State& state) {
93+
// Count unique users with a default profile.
94+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
95+
for (auto _ : state) {
96+
set<string_view> default_users;
97+
document::iterator iter(doc);
98+
99+
// for (document::object tweet : doc["statuses"].as_array()) {
100+
if (!(iter.move_to_key("statuses") && iter.is_array())) { return; }
101+
if (iter.down()) { // first status
102+
do {
103+
104+
// document::object user = tweet["user"];
105+
if (!(iter.move_to_key("user") && iter.is_object())) { return; }
106+
107+
// if (user["default_profile"]) {
108+
if (iter.move_to_key("default_profile")) {
109+
if (iter.is_true()) {
110+
if (!iter.up()) { return; } // back to user
111+
112+
// default_users.insert(user["screen_name"]);
113+
if (!(iter.move_to_key("screen_name") && iter.is_string())) { return; }
114+
default_users.insert(string_view(iter.get_string(), iter.get_string_length()));
115+
}
116+
if (!iter.up()) { return; } // back to user
117+
}
118+
119+
if (!iter.up()) { return; } // back to status
120+
121+
} while (iter.next()); // next status
122+
}
123+
124+
if (default_users.size() != 86) { return; }
125+
}
126+
}
127+
BENCHMARK(iterator_twitter_default_profile);
128+
129+
static void twitter_image_sizes(State& state) {
130+
// Count unique image sizes
131+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
132+
for (auto _ : state) {
133+
set<tuple<uint64_t, uint64_t>> image_sizes;
134+
for (document::object tweet : doc["statuses"].as_array()) {
135+
auto [media, not_found] = tweet["entities"]["media"];
136+
if (!not_found) {
137+
for (document::object image : media.as_array()) {
138+
for (auto [key, size] : image["sizes"].as_object()) {
139+
image_sizes.insert({ size["w"], size["h"] });
140+
}
141+
}
142+
}
143+
}
144+
if (image_sizes.size() != 15) { return; };
145+
}
146+
}
147+
BENCHMARK(twitter_image_sizes);
148+
149+
static void error_code_twitter_image_sizes(State& state) noexcept {
150+
// Count unique image sizes
151+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
152+
for (auto _ : state) {
153+
set<tuple<uint64_t, uint64_t>> image_sizes;
154+
auto [statuses, error] = doc["statuses"].as_array();
155+
if (error) { return; }
156+
for (document::element tweet : statuses) {
157+
auto [images, not_found] = tweet["entities"]["media"].as_array();
158+
if (!not_found) {
159+
for (document::element image : images) {
160+
auto [sizes, error2] = image["sizes"].as_object();
161+
if (error2) { return; }
162+
for (auto [key, size] : sizes) {
163+
auto [width, error3] = size["w"].as_uint64_t();
164+
auto [height, error4] = size["h"].as_uint64_t();
165+
if (error3 || error4) { return; }
166+
image_sizes.insert({ width, height });
167+
}
168+
}
169+
}
170+
}
171+
if (image_sizes.size() != 15) { return; };
172+
}
173+
}
174+
BENCHMARK(error_code_twitter_image_sizes);
175+
176+
static void iterator_twitter_image_sizes(State& state) {
177+
// Count unique image sizes
178+
document doc = document::parse(get_corpus(JSON_TEST_PATH));
179+
for (auto _ : state) {
180+
set<tuple<uint64_t, uint64_t>> image_sizes;
181+
document::iterator iter(doc);
182+
183+
// for (document::object tweet : doc["statuses"].as_array()) {
184+
if (!(iter.move_to_key("statuses") && iter.is_array())) { return; }
185+
if (iter.down()) { // first status
186+
do {
187+
188+
// auto [media, not_found] = tweet["entities"]["media"];
189+
// if (!not_found) {
190+
if (iter.move_to_key("entities")) {
191+
if (!iter.is_object()) { return; }
192+
if (iter.move_to_key("media")) {
193+
if (!iter.is_array()) { return; }
194+
195+
// for (document::object image : media.as_array()) {
196+
if (iter.down()) { // first media
197+
do {
198+
199+
// for (auto [key, size] : image["sizes"].as_object()) {
200+
if (!(iter.move_to_key("sizes") && iter.is_object())) { return; }
201+
if (iter.down()) { // first size
202+
do {
203+
iter.move_to_value();
204+
205+
// image_sizes.insert({ size["w"], size["h"] });
206+
if (!(iter.move_to_key("w")) && !iter.is_integer()) { return; }
207+
uint64_t width = iter.get_integer();
208+
if (!iter.up()) { return; } // back to size
209+
if (!(iter.move_to_key("h")) && !iter.is_integer()) { return; }
210+
uint64_t height = iter.get_integer();
211+
if (!iter.up()) { return; } // back to size
212+
image_sizes.insert({ width, height });
213+
214+
} while (iter.next()); // next size
215+
if (!iter.up()) { return; } // back to sizes
216+
}
217+
if (!iter.up()) { return; } // back to image
218+
} while (iter.next()); // next image
219+
if (!iter.up()) { return; } // back to media
220+
}
221+
if (!iter.up()) { return; } // back to entities
222+
}
223+
if (!iter.up()) { return; } // back to status
224+
}
225+
} while (iter.next()); // next status
226+
}
227+
228+
if (image_sizes.size() != 15) { return; };
229+
}
230+
}
231+
BENCHMARK(iterator_twitter_image_sizes);
232+
233+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)