Skip to content

Commit a43b077

Browse files
committed
Lots and lots of cleaning.
1 parent 5fae7b2 commit a43b077

15 files changed

+521
-465
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
.PHONY: clean cleandist
88

99
DEPSINCLUDE = -Idependencies/rapidjson/include -Idependencies/sajson/include -Idependencies/json11 -Idependencies/fastjson/src -Idependencies/fastjson/include -Idependencies/gason/src -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
10-
CXXFLAGS = -std=c++11 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux $(DEPSINCLUDE)
10+
CXXFLAGS = -std=c++11 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux $(DEPSINCLUDE)
1111
CFLAGS = -march=native -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
1212
ifeq ($(SANITIZE),1)
1313
CXXFLAGS += -g3 -O0 -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined

benchmark/linux/linux-perf-events.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
2121
std::vector<uint64_t> ids;
2222

2323
public:
24-
LinuxEvents(std::vector<int> config_vec) : fd(0) {
24+
explicit LinuxEvents(std::vector<int> config_vec) : fd(0) {
2525
memset(&attribs, 0, sizeof(attribs));
2626
attribs.type = TYPE;
2727
attribs.size = sizeof(attribs);

benchmark/minifiercompetition.cpp

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <unistd.h>
12
#include <iostream>
23

34
#include "benchmark.h"
@@ -13,6 +14,7 @@
1314
#include "rapidjson/writer.h"
1415
#include "sajson.h"
1516

17+
1618
using namespace rapidjson;
1719
using namespace std;
1820

@@ -43,17 +45,29 @@ std::string rapidstringme(char *json) {
4345
}
4446

4547
int main(int argc, char *argv[]) {
46-
if (argc < 2) {
47-
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
48-
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
48+
int c;
49+
bool verbose = false;
50+
while ((c = getopt (argc, argv, "v")) != -1)
51+
switch (c)
52+
{
53+
case 'v':
54+
verbose = true;
55+
break;
56+
default:
57+
abort ();
58+
}
59+
if (optind >= argc) {
60+
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
4961
exit(1);
5062
}
51-
bool verbose = false;
52-
if (argc > 2) {
53-
if (strcmp(argv[1], "-v"))
54-
verbose = true;
63+
const char * filename = argv[optind];
64+
pair<u8 *, size_t> p;
65+
try {
66+
p = get_corpus(filename);
67+
} catch (const std::exception& e) { // caught by reference to base
68+
std::cout << "Could not load the file " << filename << std::endl;
69+
return EXIT_FAILURE;
5570
}
56-
pair<u8 *, size_t> p = get_corpus(argv[argc - 1]);
5771
if (verbose) {
5872
std::cout << "Input has ";
5973
if (p.second > 1024 * 1024)

benchmark/parse.cpp

Lines changed: 15 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -31,79 +31,14 @@
3131
#include "jsonparser/stage34_unified.h"
3232
using namespace std;
3333

34-
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
35-
namespace Color {
36-
enum Code {
37-
FG_DEFAULT = 39,
38-
FG_BLACK = 30,
39-
FG_RED = 31,
40-
FG_GREEN = 32,
41-
FG_YELLOW = 33,
42-
FG_BLUE = 34,
43-
FG_MAGENTA = 35,
44-
FG_CYAN = 36,
45-
FG_LIGHT_GRAY = 37,
46-
FG_DARK_GRAY = 90,
47-
FG_LIGHT_RED = 91,
48-
FG_LIGHT_GREEN = 92,
49-
FG_LIGHT_YELLOW = 93,
50-
FG_LIGHT_BLUE = 94,
51-
FG_LIGHT_MAGENTA = 95,
52-
FG_LIGHT_CYAN = 96,
53-
FG_WHITE = 97,
54-
BG_RED = 41,
55-
BG_GREEN = 42,
56-
BG_BLUE = 44,
57-
BG_DEFAULT = 49
58-
};
59-
class Modifier {
60-
Code code;
61-
62-
public:
63-
Modifier(Code pCode) : code(pCode) {}
64-
friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
65-
return os << "\033[" << mod.code << "m";
66-
}
67-
};
68-
} // namespace Color
69-
70-
void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
71-
Color::Modifier greenfg(Color::FG_GREEN);
72-
Color::Modifier yellowfg(Color::FG_YELLOW);
73-
Color::Modifier deffg(Color::FG_DEFAULT);
74-
size_t i = 0;
75-
// skip initial fluff
76-
while ((i + 1 < pj.n_structural_indexes) &&
77-
(pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
78-
i++;
79-
}
80-
for (; i < pj.n_structural_indexes; i++) {
81-
u32 idx = pj.structural_indexes[i];
82-
u8 c = buf[idx];
83-
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
84-
std::cout << greenfg << buf[idx] << deffg;
85-
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
86-
std::cout << greenfg << buf[idx] << deffg;
87-
} else {
88-
std::cout << yellowfg << buf[idx] << deffg;
89-
}
90-
if (i + 1 < pj.n_structural_indexes) {
91-
u32 nextidx = pj.structural_indexes[i + 1];
92-
for (u32 pos = idx + 1; pos < nextidx; pos++) {
93-
std::cout << buf[pos];
94-
}
95-
}
96-
}
97-
std::cout << std::endl;
98-
}
99-
10034
int main(int argc, char *argv[]) {
10135
bool verbose = false;
10236
bool dump = false;
37+
bool forceoneiteration = false;
10338

10439
int c;
10540

106-
while ((c = getopt (argc, argv, "vd")) != -1)
41+
while ((c = getopt (argc, argv, "1vd")) != -1)
10742
switch (c)
10843
{
10944
case 'v':
@@ -112,6 +47,9 @@ int main(int argc, char *argv[]) {
11247
case 'd':
11348
dump = true;
11449
break;
50+
case '1':
51+
forceoneiteration = true;
52+
break;
11553
default:
11654
abort ();
11755
}
@@ -124,7 +62,13 @@ int main(int argc, char *argv[]) {
12462
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
12563
}
12664
if(verbose) cout << "[verbose] loading " << filename << endl;
127-
pair<u8 *, size_t> p = get_corpus(filename);
65+
pair<u8 *, size_t> p;
66+
try {
67+
p = get_corpus(filename);
68+
} catch (const std::exception& e) { // caught by reference to base
69+
std::cout << "Could not load the file " << filename << std::endl;
70+
return EXIT_FAILURE;
71+
}
12872
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
12973
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
13074
ParsedJson &pj(*pj_ptr);
@@ -133,7 +77,7 @@ int main(int argc, char *argv[]) {
13377
#if defined(DEBUG)
13478
const u32 iterations = 1;
13579
#else
136-
const u32 iterations = p.second < 1 * 1000 * 1000? 1000 : 10;
80+
const u32 iterations = forceoneiteration ? 1 : ( p.second < 1 * 1000 * 1000? 1000 : 10);
13781
#endif
13882
vector<double> res;
13983
res.resize(iterations);
@@ -174,7 +118,7 @@ int main(int argc, char *argv[]) {
174118
}
175119
unified.start();
176120
#endif
177-
isok = flatten_indexes(p.second, pj);
121+
isok = isok && flatten_indexes(p.second, pj);
178122
#ifndef SQUASH_COUNTERS
179123
unified.end(results);
180124
cy2 += results[0];
@@ -187,7 +131,7 @@ int main(int argc, char *argv[]) {
187131
unified.start();
188132
#endif
189133

190-
isok = unified_machine(p.first, p.second, pj);
134+
isok = isok && unified_machine(p.first, p.second, pj);
191135
#ifndef SQUASH_COUNTERS
192136
unified.end(results);
193137
cy3 += results[0];

benchmark/parsingcompetition.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ void on_json_error( void *, const fastjson::ErrorContext& ec) {
3131
bool fastjson_parse(const char *input) {
3232
fastjson::Token token;
3333
fastjson::dom::Chunk chunk;
34-
std::string error_message;
3534
return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error, NULL);
3635
}
3736
// end of fastjson stuff
@@ -62,7 +61,14 @@ int main(int argc, char *argv[]) {
6261
if(optind + 1 < argc) {
6362
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
6463
}
65-
pair<u8 *, size_t> p = get_corpus(filename);
64+
pair<u8 *, size_t> p;
65+
try {
66+
p = get_corpus(filename);
67+
} catch (const std::exception& e) { // caught by reference to base
68+
std::cout << "Could not load the file " << filename << std::endl;
69+
return EXIT_FAILURE;
70+
}
71+
6672
if (verbose) {
6773
std::cout << "Input has ";
6874
if (p.second > 1024 * 1024)

include/jsonparser/jsonioutil.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ char * allocate_aligned_buffer(size_t length);
2020
// first element of the pair is a string (null terminated)
2121
// whereas the second element is the length.
2222
// caller is responsible to free (free std::pair<u8 *, size_t>.first)
23+
//
24+
// throws an exception if the file cannot be opened, use try/catch
25+
// try {
26+
// p = get_corpus(filename);
27+
// } catch (const std::exception& e) {
28+
// std::cout << "Could not load the file " << filename << std::endl;
29+
// }
2330
std::pair<u8 *, size_t> get_corpus(std::string filename);
2431

2532
#endif

include/jsonparser/numberparsing.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
128128
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
129129
const __m128i mul_1_10000 =
130130
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
131-
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((__m128i *)chars), ascii0);
131+
const __m128i input = _mm_sub_epi8(_mm_loadu_si128((const __m128i *)chars), ascii0);
132132
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
133133
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
134134
const __m128i t3 = _mm_packus_epi32(t2, t2);
@@ -149,7 +149,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
149149
//
150150
static never_inline bool
151151
parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
152-
ParsedJson &pj, const u32 depth, const u32 offset,
152+
ParsedJson &pj, UNUSED const u32 depth, const u32 offset,
153153
UNUSED bool found_zero, bool found_minus) {
154154
const char *p = (const char *)(buf + offset);
155155

@@ -193,7 +193,6 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
193193
}
194194
exponent = firstafterperiod - p;
195195
}
196-
int64_t expnumber = 0; // exponential part
197196
if (('e' == *p) || ('E' == *p)) {
198197
++p;
199198
bool negexp = false;
@@ -210,7 +209,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
210209
return false;
211210
}
212211
unsigned char digit = *p - '0';
213-
expnumber = digit;
212+
int64_t expnumber = digit; // exponential part
214213
p++;
215214
if (is_integer(*p)) {
216215
digit = *p - '0';
@@ -270,7 +269,7 @@ parse_highprecision_float(const u8 *const buf, UNUSED size_t len,
270269
//
271270
static never_inline bool parse_large_integer(const u8 *const buf,
272271
UNUSED size_t len, ParsedJson &pj,
273-
const u32 depth, const u32 offset,
272+
UNUSED const u32 depth, const u32 offset,
274273
UNUSED bool found_zero,
275274
bool found_minus) {
276275
const char *p = (const char *)(buf + offset);
@@ -340,10 +339,12 @@ static never_inline bool parse_large_integer(const u8 *const buf,
340339
#define unlikely(x) __builtin_expect(!!(x), 0)
341340
#endif
342341

342+
343+
343344
// parse the number at buf + offset
344345
// define JSON_TEST_NUMBERS for unit testing
345346
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
346-
ParsedJson &pj, const u32 depth,
347+
ParsedJson &pj, UNUSED const u32 depth,
347348
const u32 offset, UNUSED bool found_zero,
348349
bool found_minus) {
349350
const char *p = (const char *)(buf + offset);

include/jsonparser/simdjson_internal.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,14 @@ struct ParsedJson {
105105

106106

107107
void write_tape_s64(s64 i) {
108-
*((s64 *)current_number_buf_loc) = i;
109-
current_number_buf_loc += 8;
108+
*((s64 *)current_number_buf_loc) = i;// safe because array will be 8-byte aligned, could use memcpy
109+
current_number_buf_loc += sizeof(s64);
110110
write_tape(current_number_buf_loc - number_buf, 'l');
111111
}
112112

113113
void write_tape_double(double d) {
114-
*((double *)current_number_buf_loc) = d;
115-
current_number_buf_loc += 8;
114+
*((double *)current_number_buf_loc) = d;// safe because array will be 8-byte aligned, could use memcpy
115+
current_number_buf_loc += sizeof(double);
116116
write_tape(current_number_buf_loc - number_buf, 'd');
117117
}
118118

@@ -137,7 +137,7 @@ struct ParsedJson {
137137
u32 scope_header; // the start of our current scope that contains our current location
138138
u32 location; // our current location on a tape
139139

140-
ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
140+
explicit ParsedJsonHandle(ParsedJson & pj_) : pj(pj_), depth(0), scope_header(0), location(0) {}
141141
// OK with default copy constructor as the way to clone the POD structure
142142

143143
// some placeholder navigation. Will convert over to a more native C++-ish way of doing
@@ -167,7 +167,7 @@ struct ParsedJson {
167167

168168

169169
#ifdef DEBUG
170-
inline void dump256(m256 d, std::string msg) {
170+
inline void dump256(m256 d, const std::string msg) {
171171
for (u32 i = 0; i < 32; i++) {
172172
std::cout << std::setw(3) << (int)*(((u8 *)(&d)) + i);
173173
if (!((i + 1) % 8))
@@ -181,14 +181,14 @@ inline void dump256(m256 d, std::string msg) {
181181
}
182182

183183
// dump bits low to high
184-
inline void dumpbits(u64 v, std::string msg) {
184+
inline void dumpbits(u64 v, const std::string msg) {
185185
for (u32 i = 0; i < 64; i++) {
186186
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
187187
}
188188
std::cout << " " << msg << "\n";
189189
}
190190

191-
inline void dumpbits32(u32 v, std::string msg) {
191+
inline void dumpbits32(u32 v, const std::string msg) {
192192
for (u32 i = 0; i < 32; i++) {
193193
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
194194
}
@@ -201,14 +201,14 @@ inline void dumpbits32(u32 v, std::string msg) {
201201
#endif
202202

203203
// dump bits low to high
204-
inline void dumpbits_always(u64 v, std::string msg) {
204+
inline void dumpbits_always(u64 v, const std::string msg) {
205205
for (u32 i = 0; i < 64; i++) {
206206
std::cout << (((v >> (u64)i) & 0x1ULL) ? "1" : "_");
207207
}
208208
std::cout << " " << msg << "\n";
209209
}
210210

211-
inline void dumpbits32_always(u32 v, std::string msg) {
211+
inline void dumpbits32_always(u32 v, const std::string msg) {
212212
for (u32 i = 0; i < 32; i++) {
213213
std::cout << (((v >> (u32)i) & 0x1ULL) ? "1" : "_");
214214
}

include/jsonparser/stringparsing.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
5858
}
5959

6060
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
61-
ParsedJson &pj, u32 depth, u32 offset) {
61+
ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
6262
using namespace std;
6363
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
6464
u8 *dst = pj.current_string_buf_loc;

src/jsonminifier.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
137137
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
138138
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
139139
quote_mask ^= prev_iter_inside_quote;
140-
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
140+
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// might be undefined behavior
141141
const __m256i low_nibble_mask = _mm256_setr_epi8(
142142
// 0 9 a b c d
143143
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
@@ -220,7 +220,7 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
220220
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
221221
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
222222
quote_mask ^= prev_iter_inside_quote;
223-
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);
223+
// prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore
224224

225225
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
226226
__m256i mask_70 =

0 commit comments

Comments
 (0)