Skip to content

Commit 18633e0

Browse files
committed
Added more thorough testing.
1 parent f0af315 commit 18633e0

6 files changed

Lines changed: 496 additions & 48 deletions

File tree

Makefile

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
.PHONY: clean cleandist
88

99
CXXFLAGS = -std=c++11 -g2 -O3 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux -Idependencies/rapidjson/include -Idependencies/sajson/include
10-
EXECUTABLES=parse jsoncheck numberparsingcheck minifiercompetition parsingcompetition minify allparserscheckfile
10+
EXECUTABLES=parse jsoncheck numberparsingcheck stringparsingcheck minifiercompetition parsingcompetition minify allparserscheckfile
1111

1212
HEADERS= include/jsonparser/simdutf8check.h include/jsonparser/stringparsing.h include/jsonparser/numberparsing.h include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage34_unified.h include/jsonparser/jsoncharutils.h
1313
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
@@ -24,9 +24,15 @@ LIBS=$(RAPIDJSON_INCLUDE) $(SAJSON_INCLUDE)
2424

2525
all: $(LIBS) $(EXECUTABLES)
2626

27-
test: jsoncheck numberparsingcheck
28-
-./numberparsingcheck
27+
test: jsoncheck numberparsingcheck stringparsingcheck
28+
./numberparsingcheck
29+
./stringparsingcheck
2930
./jsoncheck
31+
@echo
32+
@tput setaf 2
33+
@echo "It looks like the code is good!"
34+
@tput sgr0
35+
3036

3137
$(SAJSON_INCLUDE):
3238
git submodule update --init --recursive
@@ -48,6 +54,11 @@ jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
4854
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
4955
$(CXX) $(CXXFLAGS) -o numberparsingcheck tests/numberparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
5056

57+
58+
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
59+
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
60+
61+
5162
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
5263
$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) $(MINIFIERLIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS)
5364

include/jsonparser/stringparsing.h

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ static const u8 escape_map[256] = {
1717

1818
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
1919
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5.
20-
0, 0, 0x08, 0, 0, 0, 0x12, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6.
20+
0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6.
2121
0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
2222

2323
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -72,6 +72,9 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
7272
using namespace std;
7373
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
7474
u8 *dst = pj.current_string_buf_loc;
75+
#ifdef JSON_TEST_STRINGS // for unit testing
76+
u8 *const start_of_string = dst;
77+
#endif
7578
#ifdef DEBUG
7679
cout << "Entering parse string with offset " << offset << "\n";
7780
#endif
@@ -104,6 +107,7 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
104107
m256 unitsep = _mm256_set1_epi8(0x1F);
105108
m256 unescaped_vec = _mm256_cmpeq_epi8(_mm256_max_epu8(unitsep,v),unitsep);// could do it with saturated subtraction
106109
#endif // CHECKUNESCAPED
110+
107111
u32 quote_dist = __builtin_ctz(quote_bits);
108112
u32 bs_dist = __builtin_ctz(bs_bits);
109113
// store to dest unconditionally - we can overwrite the bits we don't like
@@ -122,12 +126,20 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
122126

123127
pj.write_tape(depth, pj.current_string_buf_loc - pj.string_buf, '"');
124128

125-
pj.current_string_buf_loc = dst + quote_dist + 1;
129+
pj.current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
126130
#ifdef CHECKUNESCAPED
127131
// check that there is no unescaped char before the quote
128132
u32 unescaped_bits = (u32)_mm256_movemask_epi8(unescaped_vec);
129-
return ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
133+
bool is_ok = ((quote_bits - 1) & (~ quote_bits) & unescaped_bits) == 0;
134+
#ifdef JSON_TEST_STRINGS // for unit testing
135+
if(is_ok) foundString(buf + offset,start_of_string,pj.current_string_buf_loc - 1);
136+
else foundBadString(buf + offset);
137+
#endif // JSON_TEST_STRINGS
138+
return is_ok;
130139
#else //CHECKUNESCAPED
140+
#ifdef JSON_TEST_STRINGS // for unit testing
141+
foundString(buf + offset,start_of_string,pj.current_string_buf_loc);
142+
#endif // JSON_TEST_STRINGS
131143
return true;
132144
#endif //CHECKUNESCAPED
133145
} else if (quote_dist > bs_dist) {
@@ -139,6 +151,9 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
139151
// we are going to need the unescaped_bits to check for unescaped chars
140152
u32 unescaped_bits = (u32)_mm256_movemask_epi8(unescaped_vec);
141153
if(((bs_bits - 1) & (~ bs_bits) & unescaped_bits) != 0) {
154+
#ifdef JSON_TEST_STRINGS // for unit testing
155+
foundBadString(buf + offset);
156+
#endif // JSON_TEST_STRINGS
142157
return false;
143158
}
144159
#endif //CHECKUNESCAPED
@@ -149,6 +164,9 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
149164
src += bs_dist;
150165
dst += bs_dist;
151166
if (!handle_unicode_codepoint(&src, &dst)) {
167+
#ifdef JSON_TEST_STRINGS // for unit testing
168+
foundBadString(buf + offset);
169+
#endif // JSON_TEST_STRINGS
152170
return false;
153171
}
154172
} else {
@@ -157,8 +175,12 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
157175
// note this may reach beyond the part of the buffer we've actually
158176
// seen. I think this is ok
159177
u8 escape_result = escape_map[escape_char];
160-
if (!escape_result)
178+
if (!escape_result) {
179+
#ifdef JSON_TEST_STRINGS // for unit testing
180+
foundBadString(buf + offset);
181+
#endif // JSON_TEST_STRINGS
161182
return false; // bogus escape value is an error
183+
}
162184
dst[bs_dist] = escape_result;
163185
src += bs_dist + 2;
164186
dst += bs_dist + 1;
@@ -171,6 +193,9 @@ really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
171193
#ifdef CHECKUNESCAPED
172194
// check for unescaped chars
173195
if(_mm256_testz_si256(unescaped_vec,unescaped_vec) != 1) {
196+
#ifdef JSON_TEST_STRINGS // for unit testing
197+
foundBadString(buf + offset);
198+
#endif // JSON_TEST_STRINGS
174199
return false;
175200
}
176201
#endif // CHECKUNESCAPED

jsonchecker/fail36.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
["this is an unclosed string ]

tests/jsoncheck.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ bool startsWith(const char *pre, const char *str) {
2121
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
2222
}
2323

24+
bool contains(const char *pre, const char *str) {
25+
return (strstr(str, pre) != NULL);
26+
}
27+
28+
2429
bool validate(const char *dirname) {
2530
bool everythingfine = true;
2631
// init_state_machine(); // no longer necessary
@@ -36,6 +41,7 @@ bool validate(const char *dirname) {
3641
printf("nothing in dir %s \n", dirname);
3742
return false;
3843
}
44+
size_t howmany = 0;
3945
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
4046
for (int i = 0; i < c; i++) {
4147
const char *name = entry_list[i]->d_name;
@@ -51,15 +57,18 @@ bool validate(const char *dirname) {
5157
strcpy(fullpath + dirlen, name);
5258
}
5359
std::pair<u8 *, size_t> p = get_corpus(fullpath);
54-
// terrible hack but just to get it working
5560
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
5661
if(pj_ptr == NULL) {
5762
std::cerr<< "can't allocate memory"<<std::endl;
5863
return false;
5964
}
65+
++howmany;
6066
ParsedJson &pj(*pj_ptr);
6167
bool isok = json_parse(p.first, p.second, pj);
62-
if (startsWith("pass", name)) {
68+
if(contains("EXCLUDE",name)) {
69+
// skipping
70+
howmany--;
71+
} else if (startsWith("pass", name)) {
6372
if (!isok) {
6473
printf("warning: file %s should pass but it fails.\n", name);
6574
everythingfine = false;
@@ -81,6 +90,8 @@ bool validate(const char *dirname) {
8190
for (int i = 0; i < c; ++i)
8291
free(entry_list[i]);
8392
free(entry_list);
93+
printf("%zu files checked.\n", howmany);
94+
if(everythingfine) printf("All ok!\n");
8495
return everythingfine;
8596
}
8697

tests/numberparsingcheck.cpp

Lines changed: 58 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
#include <cstring>
33
#include <dirent.h>
44
#include <inttypes.h>
5+
#include <math.h>
56
#include <stdbool.h>
67
#include <stdio.h>
78
#include <stdlib.h>
8-
#include <inttypes.h>
9-
#include <math.h>
109

1110
#ifndef JSON_TEST_NUMBERS
1211
#define JSON_TEST_NUMBERS
@@ -16,55 +15,73 @@
1615

1716
int parse_error;
1817
char *fullpath;
19-
enum{PARSE_WARNING, PARSE_ERROR};
18+
enum { PARSE_WARNING, PARSE_ERROR };
2019

2120
size_t float_count;
2221
size_t int_count;
2322
size_t invalid_count;
2423

25-
inline void foundInvalidNumber(const u8 * buf) {
24+
// strings that start with these should not be parsed as numbers
25+
const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
26+
27+
bool startsWith(const char *pre, const char *str) {
28+
size_t lenpre = strlen(pre), lenstr = strlen(str);
29+
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
30+
}
31+
bool is_in_bad_list(char *buf) {
32+
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
33+
if (startsWith(really_bad[i], buf))
34+
return true;
35+
return false;
36+
}
37+
38+
inline void foundInvalidNumber(const u8 *buf) {
2639
invalid_count++;
27-
char * endptr;
40+
char *endptr;
2841
double expected = strtod((char *)buf, &endptr);
29-
if(endptr != (char *)buf) {
30-
printf("Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ", buf, expected);
31-
printf(" while parsing %s \n", fullpath);
32-
parse_error |= PARSE_WARNING;
42+
if (endptr != (char *)buf) {
43+
if (!is_in_bad_list((char *)buf)) {
44+
printf(
45+
"Warning: foundInvalidNumber %.32s whereas strtod parses it to %f, ",
46+
buf, expected);
47+
printf(" while parsing %s \n", fullpath);
48+
parse_error |= PARSE_WARNING;
49+
}
3350
}
3451
}
3552

36-
inline void foundInteger(int64_t result, const u8 * buf) {
53+
inline void foundInteger(int64_t result, const u8 *buf) {
3754
int_count++;
38-
char * endptr;
39-
long long expected = strtoll((char *)buf, & endptr, 10);
40-
if((endptr == (char *)buf) || (expected != result)) {
55+
char *endptr;
56+
long long expected = strtoll((char *)buf, &endptr, 10);
57+
if ((endptr == (char *)buf) || (expected != result)) {
4158
printf("Error: parsed %" PRId64 " out of %.32s, ", result, buf);
4259
printf(" while parsing %s \n", fullpath);
4360
parse_error |= PARSE_ERROR;
4461
}
4562
}
4663

47-
inline void foundFloat(double result, const u8 * buf) {
48-
char * endptr;
64+
inline void foundFloat(double result, const u8 *buf) {
65+
char *endptr;
4966
float_count++;
5067
double expected = strtod((char *)buf, &endptr);
51-
if(endptr == (char *)buf) {
52-
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ", result, buf);
68+
if (endptr == (char *)buf) {
69+
printf("parsed %f from %.32s whereas strtod refuses to parse a float, ",
70+
result, buf);
5371
printf(" while parsing %s \n", fullpath);
5472
parse_error |= PARSE_ERROR;
5573
}
5674
// we want to get some reasonable relative accuracy
57-
if(fabs(expected - result)/fmin(fabs(expected),fabs(result)) > 0.000000000000001) {
75+
if (fabs(expected - result) / fmin(fabs(expected), fabs(result)) >
76+
0.000000000000001) {
5877
printf("parsed %.32f from \n", result);
5978
printf(" %.32s whereas strtod gives\n", buf);
60-
printf(" %.32f,", expected);
79+
printf(" %.32f,", expected);
6180
printf(" while parsing %s \n", fullpath);
6281
parse_error |= PARSE_ERROR;
6382
}
6483
}
6584

66-
67-
6885
#include "jsonparser/jsonparser.h"
6986
#include "src/stage34_unified.cpp"
7087

@@ -76,13 +93,10 @@ static bool hasExtension(const char *filename, const char *extension) {
7693
return (ext && !strcmp(ext, extension));
7794
}
7895

79-
bool startsWith(const char *pre, const char *str) {
80-
size_t lenpre = strlen(pre), lenstr = strlen(str);
81-
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
82-
}
83-
8496
bool validate(const char *dirname) {
8597
parse_error = 0;
98+
size_t total_count = 0;
99+
86100
// init_state_machine(); // no longer necessary
87101
const char *extension = ".json";
88102
size_t dirlen = strlen(dirname);
@@ -112,28 +126,32 @@ bool validate(const char *dirname) {
112126
std::pair<u8 *, size_t> p = get_corpus(fullpath);
113127
// terrible hack but just to get it working
114128
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
115-
if(pj_ptr == NULL) {
116-
std::cerr<< "can't allocate memory"<<std::endl;
129+
if (pj_ptr == NULL) {
130+
std::cerr << "can't allocate memory" << std::endl;
117131
return false;
118132
}
119133
float_count = 0;
120134
int_count = 0;
121135
invalid_count = 0;
136+
total_count += float_count + int_count + invalid_count;
122137
ParsedJson &pj(*pj_ptr);
123-
bool isok =
124-
json_parse(p.first, p.second, pj);
125-
if(int_count+float_count+invalid_count > 0) {
126-
printf("File %40s %s --- integers: %10zu floats: %10zu invalid: %10zu total numbers: %10zu \n", name,
127-
isok ? " is valid " :
128-
" is not valid ",int_count, float_count, invalid_count, int_count+float_count+invalid_count);
138+
bool isok = json_parse(p.first, p.second, pj);
139+
if (int_count + float_count + invalid_count > 0) {
140+
printf("File %40s %s --- integers: %10zu floats: %10zu invalid: %10zu "
141+
"total numbers: %10zu \n",
142+
name, isok ? " is valid " : " is not valid ", int_count,
143+
float_count, invalid_count,
144+
int_count + float_count + invalid_count);
129145
}
130146
free(p.first);
131147
free(fullpath);
132148
deallocate_ParsedJson(pj_ptr);
133149
}
134150
}
135-
if((parse_error & PARSE_ERROR) != 0) {
151+
if ((parse_error & PARSE_ERROR) != 0) {
136152
printf("NUMBER PARSING FAILS?\n");
153+
} else {
154+
printf("All ok.\n");
137155
}
138156
for (int i = 0; i < c; ++i)
139157
free(entry_list[i]);
@@ -145,10 +163,11 @@ int main(int argc, char *argv[]) {
145163
if (argc != 2) {
146164
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
147165
<< std::endl;
148-
std::cout
149-
<< "We are going to assume you mean to use the 'jsonchecker' and 'jsonexamples' directories."
150-
<< std::endl;
151-
return validate("jsonchecker/") && validate("jsonexamples/") ? EXIT_SUCCESS : EXIT_FAILURE;
166+
std::cout << "We are going to assume you mean to use the 'jsonchecker' and "
167+
"'jsonexamples' directories."
168+
<< std::endl;
169+
return validate("jsonchecker/") && validate("jsonexamples/") ? EXIT_SUCCESS
170+
: EXIT_FAILURE;
152171
}
153172
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
154173
}

0 commit comments

Comments
 (0)