Skip to content

Commit 036f9d5

Browse files
committed
Merge branch 'master' of https://github.com/lemire/simdjson into Multiple_implementation_refactoring_stage2
2 parents 3f24879 + 43143f6 commit 036f9d5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1780
-1269
lines changed

CONTRIBUTORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@ Reini Urban
1919
Tom Dyson
2020
Ihor Dotsenko
2121
Alexey Milovidov
22+
Chang Liu
2223
# if you have contributed to the project and your name does not
2324
# appear in this list, please let us know!

amalgamation.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ cat <<< '
100100
#include <iostream>
101101
#include "simdjson.h"
102102
#include "simdjson.cpp"
103+
using namespace simdjson;
103104
int main(int argc, char *argv[]) {
104105
const char * filename = argv[1];
105106
padded_string p = get_corpus(filename);

benchmark/distinctuseridcompetition.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void print_vec(const std::vector<int64_t> &v) {
3030
std::cout << std::endl;
3131
}
3232

33-
void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
33+
void simdjson_scan(std::vector<int64_t> &answer, simdjson::ParsedJson::iterator &i) {
3434
while(i.move_forward()) {
3535
if(i.get_scope_type() == '{') {
3636
bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
@@ -48,30 +48,30 @@ void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
4848
}
4949

5050
__attribute__ ((noinline))
51-
std::vector<int64_t> simdjson_justdom(ParsedJson &pj) {
51+
std::vector<int64_t> simdjson_justdom(simdjson::ParsedJson &pj) {
5252
std::vector<int64_t> answer;
53-
ParsedJson::iterator i(pj);
53+
simdjson::ParsedJson::iterator i(pj);
5454
simdjson_scan(answer,i);
5555
remove_duplicates(answer);
5656
return answer;
5757
}
5858

5959
__attribute__ ((noinline))
60-
std::vector<int64_t> simdjson_computestats(const padded_string &p) {
60+
std::vector<int64_t> simdjson_computestats(const simdjson::padded_string &p) {
6161
std::vector<int64_t> answer;
62-
ParsedJson pj = build_parsed_json(p);
62+
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
6363
if (!pj.isValid()) {
6464
return answer;
6565
}
66-
ParsedJson::iterator i(pj);
66+
simdjson::ParsedJson::iterator i(pj);
6767
simdjson_scan(answer,i);
6868
remove_duplicates(answer);
6969
return answer;
7070
}
7171

7272
__attribute__ ((noinline))
73-
bool simdjson_justparse(const padded_string &p) {
74-
ParsedJson pj = build_parsed_json(p);
73+
bool simdjson_justparse(const simdjson::padded_string &p) {
74+
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
7575
bool answer = !pj.isValid();
7676
return answer;
7777
}
@@ -135,7 +135,7 @@ std::vector<int64_t> sasjon_justdom(sajson::document & d) {
135135
}
136136

137137
__attribute__ ((noinline))
138-
std::vector<int64_t> sasjon_computestats(const padded_string &p) {
138+
std::vector<int64_t> sasjon_computestats(const simdjson::padded_string &p) {
139139
std::vector<int64_t> answer;
140140
char *buffer = (char *)malloc(p.size());
141141
memcpy(buffer, p.data(), p.size());
@@ -152,7 +152,7 @@ std::vector<int64_t> sasjon_computestats(const padded_string &p) {
152152
}
153153

154154
__attribute__ ((noinline))
155-
bool sasjon_justparse(const padded_string &p) {
155+
bool sasjon_justparse(const simdjson::padded_string &p) {
156156
char *buffer = (char *)malloc(p.size());
157157
memcpy(buffer, p.data(), p.size());
158158
auto d = sajson::parse(sajson::dynamic_allocation(),
@@ -210,7 +210,7 @@ std::vector<int64_t> rapid_justdom(rapidjson::Document &d) {
210210
}
211211

212212
__attribute__ ((noinline))
213-
std::vector<int64_t> rapid_computestats(const padded_string &p) {
213+
std::vector<int64_t> rapid_computestats(const simdjson::padded_string &p) {
214214
std::vector<int64_t> answer;
215215
char *buffer = (char *)malloc(p.size() + 1);
216216
memcpy(buffer, p.data(), p.size());
@@ -228,7 +228,7 @@ std::vector<int64_t> rapid_computestats(const padded_string &p) {
228228
}
229229

230230
__attribute__ ((noinline))
231-
bool rapid_justparse(const padded_string &p) {
231+
bool rapid_justparse(const simdjson::padded_string &p) {
232232
char *buffer = (char *)malloc(p.size() + 1);
233233
memcpy(buffer, p.data(), p.size());
234234
buffer[p.size()] = '\0';
@@ -267,9 +267,9 @@ int main(int argc, char *argv[]) {
267267
if (optind + 1 < argc) {
268268
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
269269
}
270-
padded_string p;
270+
simdjson::padded_string p;
271271
try {
272-
get_corpus(filename).swap(p);
272+
simdjson::get_corpus(filename).swap(p);
273273
} catch (const std::exception &e) { // caught by reference to base
274274
std::cout << "Could not load the file " << filename << std::endl;
275275
return EXIT_FAILURE;
@@ -321,7 +321,7 @@ int main(int argc, char *argv[]) {
321321
!justdata);
322322
BEST_TIME("sasjon (just parse) ", sasjon_justparse(p), false, , repeat, volume,
323323
!justdata);
324-
ParsedJson dsimdjson = build_parsed_json(p);
324+
simdjson::ParsedJson dsimdjson = simdjson::build_parsed_json(p);
325325
BEST_TIME("simdjson (just dom) ", simdjson_justdom(dsimdjson).size(), size, , repeat,
326326
volume, !justdata);
327327
char *buffer = (char *)malloc(p.size());

benchmark/minifiercompetition.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ int main(int argc, char *argv[]) {
6565
exit(1);
6666
}
6767
const char * filename = argv[optind];
68-
padded_string p;
68+
simdjson::padded_string p;
6969
try {
70-
get_corpus(filename).swap(p);
70+
simdjson::get_corpus(filename).swap(p);
7171
} catch (const std::exception& e) { // caught by reference to base
7272
std::cout << "Could not load the file " << filename << std::endl;
7373
return EXIT_FAILURE;

benchmark/parse.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@ int main(int argc, char *argv[]) {
7878
if (verbose) {
7979
std::cout << "[verbose] loading " << filename << std::endl;
8080
}
81-
padded_string p;
81+
simdjson::padded_string p;
8282
try {
83-
get_corpus(filename).swap(p);
83+
simdjson::get_corpus(filename).swap(p);
8484
} catch (const std::exception &e) { // caught by reference to base
8585
std::cout << "Could not load the file " << filename << std::endl;
8686
return EXIT_FAILURE;
@@ -128,7 +128,7 @@ int main(int argc, char *argv[]) {
128128
std::cout << "[verbose] iteration # " << i << std::endl;
129129
}
130130
unified.start();
131-
ParsedJson pj;
131+
simdjson::ParsedJson pj;
132132
bool allocok = pj.allocateCapacity(p.size());
133133
if (!allocok) {
134134
std::cerr << "failed to allocate memory" << std::endl;
@@ -145,7 +145,7 @@ int main(int argc, char *argv[]) {
145145
}
146146
unified.start();
147147
// The default template is simdjson::instruction_set::native.
148-
isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
148+
isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
149149
unified.end(results);
150150
cy1 += results[0];
151151
cl1 += results[1];
@@ -158,7 +158,7 @@ int main(int argc, char *argv[]) {
158158
}
159159
unified.start();
160160
// The default template is simdjson::instruction_set::native.
161-
isok = isok && (simdjson::SUCCESS == unified_machine<>(p.data(), p.size(), pj));
161+
isok = isok && (simdjson::SUCCESS == simdjson::unified_machine<>(p.data(), p.size(), pj));
162162
unified.end(results);
163163
cy2 += results[0];
164164
cl2 += results[1];
@@ -176,7 +176,7 @@ int main(int argc, char *argv[]) {
176176
if (verbose) {
177177
std::cout << "[verbose] iteration # " << i << std::endl;
178178
}
179-
ParsedJson pj;
179+
simdjson::ParsedJson pj;
180180
bool allocok = pj.allocateCapacity(p.size());
181181
if (!allocok) {
182182
std::cerr << "failed to allocate memory" << std::endl;
@@ -188,8 +188,8 @@ int main(int argc, char *argv[]) {
188188

189189
auto start = std::chrono::steady_clock::now();
190190
// The default template is simdjson::instruction_set::native.
191-
isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
192-
isok = isok && (simdjson::SUCCESS == unified_machine<>(p.data(), p.size(), pj));
191+
isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
192+
isok = isok && (simdjson::SUCCESS == simdjson::unified_machine<>(p.data(), p.size(), pj));
193193
auto end = std::chrono::steady_clock::now();
194194
std::chrono::duration<double> secs = end - start;
195195
res[i] = secs.count();
@@ -199,7 +199,7 @@ int main(int argc, char *argv[]) {
199199
return EXIT_FAILURE;
200200
}
201201
}
202-
ParsedJson pj = build_parsed_json(p); // do the parsing again to get the stats
202+
simdjson::ParsedJson pj = build_parsed_json(p); // do the parsing again to get the stats
203203
if (!pj.isValid()) {
204204
std::cerr << pj.getErrorMsg() << std::endl;
205205
std::cerr << "Could not parse. " << std::endl;

benchmark/parseandstatcompetition.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ void sajson_traverse(stat_t &stats, const sajson::value &node) {
146146
}
147147

148148
__attribute__ ((noinline))
149-
stat_t sasjon_computestats(const padded_string &p) {
149+
stat_t sasjon_computestats(const simdjson::padded_string &p) {
150150
stat_t answer;
151151
char *buffer = (char *)malloc(p.size());
152152
memcpy(buffer, p.data(), p.size());
@@ -204,7 +204,7 @@ void rapid_traverse(stat_t &stats, const rapidjson::Value &v) {
204204
}
205205

206206
__attribute__ ((noinline))
207-
stat_t rapid_computestats(const padded_string &p) {
207+
stat_t rapid_computestats(const simdjson::padded_string &p) {
208208
stat_t answer;
209209
char *buffer = (char *)malloc(p.size() + 1);
210210
memcpy(buffer, p.data(), p.size());
@@ -253,9 +253,9 @@ int main(int argc, char *argv[]) {
253253
if (optind + 1 < argc) {
254254
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
255255
}
256-
padded_string p;
256+
simdjson::padded_string p;
257257
try {
258-
get_corpus(filename).swap(p);
258+
simdjson::get_corpus(filename).swap(p);
259259
} catch (const std::exception &e) { // caught by reference to base
260260
std::cout << "Could not load the file " << filename << std::endl;
261261
return EXIT_FAILURE;

benchmark/parsingcompetition.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ int main(int argc, char *argv[]) {
8383
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
8484
<< std::endl;
8585
}
86-
padded_string p;
86+
simdjson::padded_string p;
8787
try {
88-
get_corpus(filename).swap(p);
88+
simdjson::get_corpus(filename).swap(p);
8989
} catch (const std::exception &e) { // caught by reference to base
9090
std::cout << "Could not load the file " << filename << std::endl;
9191
return EXIT_FAILURE;
@@ -101,7 +101,7 @@ int main(int argc, char *argv[]) {
101101
std::cout << p.size() << " B ";
102102
std::cout << std::endl;
103103
}
104-
ParsedJson pj;
104+
simdjson::ParsedJson pj;
105105
bool allocok = pj.allocateCapacity(p.size(), 1024);
106106

107107
if (!allocok) {

benchmark/statisticalmodel.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ struct stat_s {
4242

4343
using stat_t = struct stat_s;
4444

45-
stat_t simdjson_computestats(const padded_string &p) {
45+
stat_t simdjson_computestats(const simdjson::padded_string &p) {
4646
stat_t answer;
47-
ParsedJson pj = build_parsed_json(p);
47+
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
4848
answer.valid = pj.isValid();
4949
if (!answer.valid) {
5050
return answer;
@@ -134,9 +134,9 @@ int main(int argc, char *argv[]) {
134134
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
135135
<< std::endl;
136136
}
137-
padded_string p;
137+
simdjson::padded_string p;
138138
try {
139-
get_corpus(filename).swap(p);
139+
simdjson::get_corpus(filename).swap(p);
140140
} catch (const std::exception &e) { // caught by reference to base
141141
std::cerr << "Could not load the file " << filename << std::endl;
142142
return EXIT_FAILURE;
@@ -163,7 +163,7 @@ int main(int argc, char *argv[]) {
163163
s.object_count, s.array_count, s.null_count, s.true_count,
164164
s.false_count, s.byte_count, s.structural_indexes_count);
165165
#ifdef __linux__
166-
ParsedJson pj;
166+
simdjson::ParsedJson pj;
167167
bool allocok = pj.allocateCapacity(p.size());
168168
if (!allocok) {
169169
std::cerr << "failed to allocate memory" << std::endl;
@@ -181,7 +181,7 @@ int main(int argc, char *argv[]) {
181181
for (uint32_t i = 0; i < iterations; i++) {
182182
unified.start();
183183
// The default template is simdjson::instruction_set::native.
184-
bool isok = (find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
184+
bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
185185
unified.end(results);
186186

187187
cy1 += results[0];

include/simdjson/jsoncharutils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "simdjson/common_defs.h"
55
#include "simdjson/parsedjson.h"
66

7+
namespace simdjson {
78
// structural chars here are
89
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
910
// we are also interested in the four whitespace characters
@@ -293,5 +294,6 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
293294
// will return 0 when the code point was too large.
294295
return 0; // bad r
295296
}
297+
}
296298

297299
#endif

include/simdjson/jsonformatutils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <iomanip>
66
#include <iostream>
77

8+
namespace simdjson {
89
// ends with zero char
910
static inline void print_with_escapes(const unsigned char *src) {
1011
while (*src) {
@@ -195,6 +196,7 @@ static inline void print_with_escapes(const char *src, std::ostream &os,
195196
size_t len) {
196197
print_with_escapes(reinterpret_cast<const unsigned char *>(src), os, len);
197198
}
199+
}
198200

199201
#
200202
#endif

0 commit comments

Comments
 (0)