Skip to content

Commit 6723221

Browse files
committed
Refactoring stage1 to facilitate multiple implementations.
1 parent e1af373 commit 6723221

File tree

7 files changed

+974
-828
lines changed

7 files changed

+974
-828
lines changed

benchmark/parse.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,11 @@ int main(int argc, char *argv[]) {
144144
std::cout << "[verbose] allocated memory for parsed JSON " << std::endl;
145145
}
146146
unified.start();
147-
isok = (find_structural_bits(p.data(), p.size(), pj) == simdjson::SUCCESS);
147+
#ifdef __AVX2__
148+
isok = (find_structural_bits<simdjson::instruction_set::avx2>(p.data(), p.size(), pj) == simdjson::SUCCESS);
149+
#elif defined (__ARM_NEON)
150+
isok = (find_structural_bits<simdjson::instruction_set::neon>(p.data(), p.size(), pj) == simdjson::SUCCESS);
151+
#endif
148152
unified.end(results);
149153
cy1 += results[0];
150154
cl1 += results[1];
@@ -185,7 +189,11 @@ int main(int argc, char *argv[]) {
185189
}
186190

187191
auto start = std::chrono::steady_clock::now();
188-
isok = (find_structural_bits(p.data(), p.size(), pj) == simdjson::SUCCESS);
192+
#ifdef __AVX2__
193+
isok = (find_structural_bits<simdjson::instruction_set::avx2>(p.data(), p.size(), pj) == simdjson::SUCCESS);
194+
#elif defined (__ARM_NEON)
195+
isok = (find_structural_bits<simdjson::instruction_set::neon>(p.data(), p.size(), pj) == simdjson::SUCCESS);
196+
#endif
189197
isok = isok && (simdjson::SUCCESS == unified_machine(p.data(), p.size(), pj));
190198
auto end = std::chrono::steady_clock::now();
191199
std::chrono::duration<double> secs = end - start;

benchmark/statisticalmodel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,11 @@ int main(int argc, char *argv[]) {
180180
results.resize(evts.size());
181181
for (uint32_t i = 0; i < iterations; i++) {
182182
unified.start();
183-
bool isok = (find_structural_bits(p.data(), p.size(), pj) == simdjson::SUCCESS);
183+
#ifdef __AVX2__
184+
bool isok = (find_structural_bits<simdjson::instruction_set::avx2>(p.data(), p.size(), pj) == simdjson::SUCCESS);
185+
#elif defined (__ARM_NEON)
186+
bool isok = (find_structural_bits<simdjson::instruction_set::neon>(p.data(), p.size(), pj) == simdjson::SUCCESS);
187+
#endif
184188
unified.end(results);
185189

186190
cy1 += results[0];

include/simdjson/jsonparser.h

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,62 @@
88
#include "simdjson/stage1_find_marks.h"
99
#include "simdjson/stage2_build_tape.h"
1010
#include "simdjson/simdjson.h"
11+
#ifdef _MSC_VER
12+
#include <windows.h>
13+
#include <sysinfoapi.h>
14+
#else
15+
#include <unistd.h>
16+
#endif
17+
18+
// function pointer type for json_parse
19+
using json_parse_functype = int (const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded);
20+
21+
// Pointer that holds the json_parse implementation corresponding to the available SIMD instruction set
22+
extern json_parse_functype *json_parse_ptr;
23+
24+
template<simdjson::instruction_set T>
25+
int json_parse_implementation(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
26+
if (pj.bytecapacity < len) {
27+
return simdjson::CAPACITY;
28+
}
29+
bool reallocated = false;
30+
if(reallocifneeded) {
31+
#ifdef ALLOW_SAME_PAGE_BUFFER_OVERRUN
32+
// realloc is needed if the end of the memory crosses a page
33+
#ifdef _MSC_VER
34+
SYSTEM_INFO sysInfo;
35+
GetSystemInfo(&sysInfo);
36+
long pagesize = sysInfo.dwPageSize;
37+
#else
38+
long pagesize = sysconf (_SC_PAGESIZE);
39+
#endif
40+
//////////////
41+
// We want to check that buf + len - 1 and buf + len - 1 + SIMDJSON_PADDING
42+
// are in the same page.
43+
// That is, we want to check that
44+
// (buf + len - 1) / pagesize == (buf + len - 1 + SIMDJSON_PADDING) / pagesize
45+
// That's true if (buf + len - 1) % pagesize + SIMDJSON_PADDING < pagesize.
46+
///////////
47+
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) + SIMDJSON_PADDING < static_cast<uintptr_t>(pagesize) ) {
48+
#else // SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN
49+
if(true) { // if not SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN, we always reallocate
50+
#endif
51+
const uint8_t *tmpbuf = buf;
52+
buf = (uint8_t *) allocate_padded_buffer(len);
53+
if(buf == NULL) return simdjson::MEMALLOC;
54+
memcpy((void*)buf,tmpbuf,len);
55+
reallocated = true;
56+
}
57+
}
58+
int stage1_is_ok = find_structural_bits<T>(buf, len, pj);
59+
if(stage1_is_ok != simdjson::SUCCESS) {
60+
pj.errorcode = stage1_is_ok;
61+
return pj.errorcode;
62+
}
63+
int res = unified_machine(buf, len, pj);
64+
if(reallocated) { aligned_free((void*)buf);}
65+
return res;
66+
}
1167

1268
// Parse a document found in buf.
1369
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
@@ -24,8 +80,11 @@
2480
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
2581
// all bytes at and after buf + len are ignored (can be garbage).
2682
// The ParsedJson object can be reused.
83+
2784
WARN_UNUSED
28-
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
85+
inline int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
86+
return json_parse_ptr(buf, len, pj, reallocifneeded);
87+
}
2988

3089
// Parse a document found in buf.
3190
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
@@ -45,7 +104,7 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
45104
// The ParsedJson object can be reused.
46105
WARN_UNUSED
47106
inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
48-
return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
107+
return json_parse_ptr(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
49108
}
50109

51110
// We do not want to allow implicit conversion from C string to std::string.
@@ -140,4 +199,4 @@ inline ParsedJson build_parsed_json(const padded_string &s) {
140199

141200

142201

143-
#endif
202+
#endif

include/simdjson/simdjson.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
#include <string>
55

66
struct simdjson {
7+
enum class instruction_set {
8+
avx2,
9+
sse4_2,
10+
neon,
11+
none
12+
};
13+
714
enum errorValues {
815
SUCCESS = 0,
916
CAPACITY, // This ParsedJson can't support a document that big

0 commit comments

Comments
 (0)